Changeset View
Changeset View
Standalone View
Standalone View
swh/provenance/mongo/backend.py
Show First 20 Lines • Show All 275 Lines • ▼ Show 20 Lines | def revision_get(self, ids: Iterable[Sha1Git]) -> Dict[Sha1Git, RevisionData]: | ||||
"sha1": {"$in": list(ids)}, | "sha1": {"$in": list(ids)}, | ||||
"$or": [{"preferred": {"$ne": None}}, {"ts": {"$ne": None}}], | "$or": [{"preferred": {"$ne": None}}, {"ts": {"$ne": None}}], | ||||
}, | }, | ||||
{"sha1": 1, "preferred": 1, "ts": 1, "_id": 0}, | {"sha1": 1, "preferred": 1, "ts": 1, "_id": 0}, | ||||
) | ) | ||||
} | } | ||||
def relation_add( | def relation_add( | ||||
self, relation: RelationType, data: Iterable[RelationData] | self, relation: RelationType, data: Dict[Sha1Git, Set[RelationData]] | ||||
) -> bool: | ) -> bool: | ||||
src_relation, *_, dst_relation = relation.value.split("_") | src_relation, *_, dst_relation = relation.value.split("_") | ||||
set_data = set(data) | |||||
dst_objs = { | dst_objs = { | ||||
x["sha1"]: x["_id"] | x["sha1"]: x["_id"] | ||||
for x in self.db.get_collection(dst_relation).find( | for x in self.db.get_collection(dst_relation).find( | ||||
{"sha1": {"$in": [x.dst for x in set_data]}}, {"_id": 1, "sha1": 1} | { | ||||
"sha1": { | |||||
"$in": list({rel.dst for rels in data.values() for rel in rels}) | |||||
} | |||||
}, | |||||
{"_id": 1, "sha1": 1}, | |||||
) | ) | ||||
} | } | ||||
denorm: Dict[Sha1Git, Any] = {} | denorm: Dict[Sha1Git, Any] = {} | ||||
for each in set_data: | for src, rels in data.items(): | ||||
for rel in rels: | |||||
if src_relation != "revision": | if src_relation != "revision": | ||||
denorm.setdefault(each.src, {}).setdefault( | denorm.setdefault(src, {}).setdefault( | ||||
str(dst_objs[each.dst]), [] | str(dst_objs[rel.dst]), [] | ||||
).append(each.path) | ).append(rel.path) | ||||
else: | else: | ||||
denorm.setdefault(each.src, []).append(dst_objs[each.dst]) | denorm.setdefault(src, []).append(dst_objs[rel.dst]) | ||||
src_objs = { | src_objs = { | ||||
x["sha1"]: x | x["sha1"]: x | ||||
for x in self.db.get_collection(src_relation).find( | for x in self.db.get_collection(src_relation).find( | ||||
{"sha1": {"$in": list(denorm)}} | {"sha1": {"$in": list(denorm.keys())}} | ||||
) | ) | ||||
} | } | ||||
for sha1, dsts in denorm.items(): | for sha1, dsts in denorm.items(): | ||||
# update | # update | ||||
if src_relation != "revision": | if src_relation != "revision": | ||||
k = { | k = { | ||||
obj_id: list(set(paths + dsts.get(obj_id, []))) | obj_id: list(set(paths + dsts.get(obj_id, []))) | ||||
Show All 11 Lines | ) -> bool: | ||||
dst_relation: list(set(src_objs[sha1][dst_relation] + dsts)) | dst_relation: list(set(src_objs[sha1][dst_relation] + dsts)) | ||||
} | } | ||||
}, | }, | ||||
) | ) | ||||
return True | return True | ||||
def relation_get( | def relation_get( | ||||
self, relation: RelationType, ids: Iterable[Sha1Git], reverse: bool = False | self, relation: RelationType, ids: Iterable[Sha1Git], reverse: bool = False | ||||
) -> Set[RelationData]: | ) -> Dict[Sha1Git, Set[RelationData]]: | ||||
src, *_, dst = relation.value.split("_") | src, *_, dst = relation.value.split("_") | ||||
sha1s = set(ids) | sha1s = set(ids) | ||||
if not reverse: | if not reverse: | ||||
empty: Union[Dict[str, bytes], List[str]] = {} if src != "revision" else [] | |||||
src_objs = { | src_objs = { | ||||
x["sha1"]: x[dst] | x["sha1"]: x[dst] | ||||
for x in self.db.get_collection(src).find( | for x in self.db.get_collection(src).find( | ||||
{"sha1": {"$in": list(sha1s)}}, {"_id": 0, "sha1": 1, dst: 1} | {"sha1": {"$in": list(sha1s)}, dst: {"$ne": empty}}, | ||||
{"_id": 0, "sha1": 1, dst: 1}, | |||||
) | ) | ||||
} | } | ||||
dst_ids = list( | dst_ids = list( | ||||
{ObjectId(obj_id) for _, value in src_objs.items() for obj_id in value} | {ObjectId(obj_id) for _, value in src_objs.items() for obj_id in value} | ||||
) | ) | ||||
dst_objs = { | dst_objs = { | ||||
x["sha1"]: x["_id"] | x["sha1"]: x["_id"] | ||||
for x in self.db.get_collection(dst).find( | for x in self.db.get_collection(dst).find( | ||||
{"_id": {"$in": dst_ids}}, {"_id": 1, "sha1": 1} | {"_id": {"$in": dst_ids}}, {"_id": 1, "sha1": 1} | ||||
) | ) | ||||
} | } | ||||
if src != "revision": | if src != "revision": | ||||
return { | return { | ||||
RelationData(src=src_sha1, dst=dst_sha1, path=path) | src_sha1: { | ||||
for src_sha1, denorm in src_objs.items() | RelationData(dst=dst_sha1, path=path) | ||||
for dst_sha1, dst_obj_id in dst_objs.items() | for dst_sha1, dst_obj_id in dst_objs.items() | ||||
for dst_obj_str, paths in denorm.items() | for dst_obj_str, paths in denorm.items() | ||||
for path in paths | for path in paths | ||||
if dst_obj_id == ObjectId(dst_obj_str) | if dst_obj_id == ObjectId(dst_obj_str) | ||||
} | } | ||||
for src_sha1, denorm in src_objs.items() | |||||
} | |||||
else: | else: | ||||
return { | return { | ||||
RelationData(src=src_sha1, dst=dst_sha1, path=None) | src_sha1: { | ||||
for src_sha1, denorm in src_objs.items() | RelationData(dst=dst_sha1, path=None) | ||||
for dst_sha1, dst_obj_id in dst_objs.items() | for dst_sha1, dst_obj_id in dst_objs.items() | ||||
for dst_obj_ref in denorm | for dst_obj_ref in denorm | ||||
if dst_obj_id == dst_obj_ref | if dst_obj_id == dst_obj_ref | ||||
} | } | ||||
for src_sha1, denorm in src_objs.items() | |||||
} | |||||
else: | else: | ||||
dst_objs = { | dst_objs = { | ||||
x["sha1"]: x["_id"] | x["sha1"]: x["_id"] | ||||
for x in self.db.get_collection(dst).find( | for x in self.db.get_collection(dst).find( | ||||
{"sha1": {"$in": list(sha1s)}}, {"_id": 1, "sha1": 1} | {"sha1": {"$in": list(sha1s)}}, {"_id": 1, "sha1": 1} | ||||
) | ) | ||||
} | } | ||||
src_objs = { | src_objs = { | ||||
x["sha1"]: x[dst] | x["sha1"]: x[dst] | ||||
for x in self.db.get_collection(src).find( | for x in self.db.get_collection(src).find( | ||||
{}, {"_id": 0, "sha1": 1, dst: 1} | {}, {"_id": 0, "sha1": 1, dst: 1} | ||||
) | ) | ||||
} | } | ||||
result: Dict[Sha1Git, Set[RelationData]] = {} | |||||
if src != "revision": | if src != "revision": | ||||
return { | for dst_sha1, dst_obj_id in dst_objs.items(): | ||||
RelationData(src=src_sha1, dst=dst_sha1, path=path) | for src_sha1, denorm in src_objs.items(): | ||||
for src_sha1, denorm in src_objs.items() | for dst_obj_str, paths in denorm.items(): | ||||
for dst_sha1, dst_obj_id in dst_objs.items() | if dst_obj_id == ObjectId(dst_obj_str): | ||||
for dst_obj_str, paths in denorm.items() | result.setdefault(src_sha1, set()).update( | ||||
RelationData(dst=dst_sha1, path=path) | |||||
for path in paths | for path in paths | ||||
if dst_obj_id == ObjectId(dst_obj_str) | ) | ||||
} | |||||
else: | else: | ||||
return { | for dst_sha1, dst_obj_id in dst_objs.items(): | ||||
RelationData(src=src_sha1, dst=dst_sha1, path=None) | for src_sha1, denorm in src_objs.items(): | ||||
for src_sha1, denorm in src_objs.items() | if dst_obj_id in { | ||||
for dst_sha1, dst_obj_id in dst_objs.items() | ObjectId(dst_obj_str) for dst_obj_str in denorm | ||||
for dst_obj_ref in denorm | }: | ||||
if dst_obj_id == dst_obj_ref | result.setdefault(src_sha1, set()).add( | ||||
} | RelationData(dst=dst_sha1, path=None) | ||||
) | |||||
def relation_get_all(self, relation: RelationType) -> Set[RelationData]: | return result | ||||
def relation_get_all( | |||||
self, relation: RelationType | |||||
) -> Dict[Sha1Git, Set[RelationData]]: | |||||
src, *_, dst = relation.value.split("_") | src, *_, dst = relation.value.split("_") | ||||
empty: Union[Dict[str, bytes], List[str]] = {} if src != "revision" else [] | |||||
src_objs = { | src_objs = { | ||||
x["sha1"]: x[dst] | x["sha1"]: x[dst] | ||||
for x in self.db.get_collection(src).find({}, {"_id": 0, "sha1": 1, dst: 1}) | for x in self.db.get_collection(src).find( | ||||
{dst: {"$ne": empty}}, {"_id": 0, "sha1": 1, dst: 1} | |||||
) | |||||
} | } | ||||
dst_ids = list( | dst_ids = list( | ||||
{ObjectId(obj_id) for _, value in src_objs.items() for obj_id in value} | {ObjectId(obj_id) for _, value in src_objs.items() for obj_id in value} | ||||
) | ) | ||||
if src != "revision": | |||||
dst_objs = { | dst_objs = { | ||||
x["_id"]: x["sha1"] | x["_id"]: x["sha1"] | ||||
for x in self.db.get_collection(dst).find( | for x in self.db.get_collection(dst).find( | ||||
{"_id": {"$in": dst_ids}}, {"_id": 1, "sha1": 1} | {"_id": {"$in": dst_ids}}, {"_id": 1, "sha1": 1} | ||||
) | ) | ||||
} | } | ||||
if src != "revision": | |||||
return { | return { | ||||
RelationData(src=src_sha1, dst=dst_sha1, path=path) | src_sha1: { | ||||
for src_sha1, denorm in src_objs.items() | RelationData(dst=dst_sha1, path=path) | ||||
for dst_obj_id, dst_sha1 in dst_objs.items() | for dst_obj_id, dst_sha1 in dst_objs.items() | ||||
for dst_obj_str, paths in denorm.items() | for dst_obj_str, paths in denorm.items() | ||||
for path in paths | for path in paths | ||||
if dst_obj_id == ObjectId(dst_obj_str) | if dst_obj_id == ObjectId(dst_obj_str) | ||||
} | } | ||||
else: | for src_sha1, denorm in src_objs.items() | ||||
dst_objs = { | |||||
x["_id"]: x["sha1"] | |||||
for x in self.db.get_collection(dst).find( | |||||
{"_id": {"$in": dst_ids}}, {"_id": 1, "sha1": 1} | |||||
) | |||||
} | } | ||||
else: | |||||
return { | return { | ||||
RelationData(src=src_sha1, dst=dst_sha1, path=None) | src_sha1: { | ||||
for src_sha1, denorm in src_objs.items() | RelationData(dst=dst_sha1, path=None) | ||||
for dst_obj_id, dst_sha1 in dst_objs.items() | for dst_obj_id, dst_sha1 in dst_objs.items() | ||||
for dst_obj_ref in denorm | for dst_obj_ref in denorm | ||||
if dst_obj_id == dst_obj_ref | if dst_obj_id == dst_obj_ref | ||||
} | } | ||||
for src_sha1, denorm in src_objs.items() | |||||
} | |||||
def with_path(self) -> bool: | def with_path(self) -> bool: | ||||
return True | return True |