diff --git a/swh/provenance/graph.py b/swh/provenance/graph.py --- a/swh/provenance/graph.py +++ b/swh/provenance/graph.py @@ -23,47 +23,15 @@ UTCMIN = datetime.min.replace(tzinfo=timezone.utc) -class HistoryNode: - def __init__( - self, entry: RevisionEntry, is_head: bool = False, in_history: bool = False - ) -> None: - self.entry = entry - # A revision is `is_head` if it is directly pointed by an origin (ie. a head - # revision for some snapshot) - self.is_head = is_head - # A revision is `in_history` if it appears in the history graph of an already - # processed revision in the provenance database - self.in_history = in_history - # XXX: the current simplified version of the origin-revision layer algorithm - # does not use this previous two flags at all. They are kept for now but might - # be removed in the future (hence, RevisionEntry might be used instead of - # HistoryNode). - - def __str__(self) -> str: - return f"<{self.entry}: is_head={self.is_head}, in_history={self.in_history}>" - - def as_dict(self) -> Dict[str, Any]: - return { - "rev": hash_to_hex(self.entry.id), - "is_head": self.is_head, - "in_history": self.in_history, - } - - class HistoryGraph: @statsd.timed(metric=GRAPH_DURATION_METRIC, tags={"method": "build_history_graph"}) def __init__( self, - provenance: ProvenanceInterface, archive: ArchiveInterface, revision: RevisionEntry, ) -> None: - self._head = HistoryNode( - revision, - is_head=provenance.revision_visited(revision), - in_history=provenance.revision_in_history(revision), - ) - self._graph: Dict[HistoryNode, Set[HistoryNode]] = {} + self._head = revision + self._graph: Dict[RevisionEntry, Set[RevisionEntry]] = {} stack = [self._head] while stack: @@ -71,22 +39,17 @@ if current not in self._graph: self._graph[current] = set() - current.entry.retrieve_parents(archive) - for parent in current.entry.parents: - node = HistoryNode( - parent, - is_head=provenance.revision_visited(parent), - in_history=provenance.revision_in_history(parent), - ) - self._graph[current].add(node) - stack.append(node) + current.retrieve_parents(archive) + for parent in current.parents: + self._graph[current].add(parent) + stack.append(parent) @property - def head(self) -> HistoryNode: + def head(self) -> RevisionEntry: return self._head @property - def parents(self) -> Dict[HistoryNode, Set[HistoryNode]]: + def parents(self) -> Dict[RevisionEntry, Set[RevisionEntry]]: return self._graph def __str__(self) -> str: @@ -94,11 +57,10 @@ def as_dict(self) -> Dict[str, Any]: return { - "head": self.head.as_dict(), + "head": hash_to_hex(self.head.id), "graph": { - hash_to_hex(node.entry.id): sorted( - [parent.as_dict() for parent in parents], - key=lambda d: d["rev"], + hash_to_hex(node.id): sorted( + [hash_to_hex(parent.id) for parent in parents] ) for node, parents in self._graph.items() }, diff --git a/swh/provenance/interface.py b/swh/provenance/interface.py --- a/swh/provenance/interface.py +++ b/swh/provenance/interface.py @@ -377,20 +377,8 @@ """Retrieve the preferred origin associated to `revision`.""" ... - def revision_in_history(self, revision: RevisionEntry) -> bool: - """Check if `revision` is known to be an ancestor of some head revision in the - provenance model. - """ - ... - def revision_set_preferred_origin( self, origin: OriginEntry, revision: RevisionEntry ) -> None: """Associate `origin` as the preferred origin for `revision`.""" ... - - def revision_visited(self, revision: RevisionEntry) -> bool: - """Check if `revision` is known to be a head revision for some origin in the - provenance model. - """ - ... diff --git a/swh/provenance/origin.py b/swh/provenance/origin.py --- a/swh/provenance/origin.py +++ b/swh/provenance/origin.py @@ -55,7 +55,7 @@ provenance.origin_add(origin) origin.retrieve_revisions(archive) for revision in origin.revisions: - graph = HistoryGraph(provenance, archive, revision) + graph = HistoryGraph(archive, revision) origin_add_revision(provenance, origin, graph) provenance.flush() @@ -66,24 +66,19 @@ origin: OriginEntry, graph: HistoryGraph, ) -> None: - # XXX: simplified version of the origin-revision algorithm. This is generating flat - # models for the history of all head revisions. No previous result is reused now! - # The previous implementation was missing some paths from origins to certain - # revisions due to a wrong reuse logic. - # head is treated separately since it should always be added to the given origin - check_preferred_origin(provenance, origin, graph.head.entry) - provenance.revision_add_to_origin(origin, graph.head.entry) + check_preferred_origin(provenance, origin, graph.head) + provenance.revision_add_to_origin(origin, graph.head) visited = {graph.head} # head's history should be recursively iterated starting from its parents stack = list(graph.parents[graph.head]) while stack: current = stack.pop() - check_preferred_origin(provenance, origin, current.entry) + check_preferred_origin(provenance, origin, current) # create a link between it and the head, and recursively walk its history - provenance.revision_add_before_revision(graph.head.entry, current.entry) + provenance.revision_add_before_revision(graph.head, current) visited.add(current) for parent in graph.parents[current]: if parent not in visited: diff --git a/swh/provenance/provenance.py b/swh/provenance/provenance.py --- a/swh/provenance/provenance.py +++ b/swh/provenance/provenance.py @@ -487,18 +487,8 @@ cache[revision.id] = origin return cache.get(revision.id) - def revision_in_history(self, revision: RevisionEntry) -> bool: - return revision.id in self.cache["revision_before_revision"] or bool( - self.storage.relation_get(RelationType.REV_BEFORE_REV, [revision.id]) - ) - def revision_set_preferred_origin( self, origin: OriginEntry, revision: RevisionEntry ) -> None: self.cache["revision_origin"]["data"][revision.id] = origin.id self.cache["revision_origin"]["added"].add(revision.id) - - def revision_visited(self, revision: RevisionEntry) -> bool: - return revision.id in dict(self.cache["revision_in_origin"]) or bool( - self.storage.relation_get(RelationType.REV_IN_ORG, [revision.id]) - ) diff --git a/swh/provenance/tests/data/history_graphs_with-merges_visits-01.yaml b/swh/provenance/tests/data/history_graphs_with-merges_visits-01.yaml --- a/swh/provenance/tests/data/history_graphs_with-merges_visits-01.yaml +++ b/swh/provenance/tests/data/history_graphs_with-merges_visits-01.yaml @@ -2,229 +2,124 @@ - origin: "https://repo_with_merges/1/" snapshot: "e2520f0dbf34c92754f00c5a60241dfa7d612868" graphs: - - head: - rev: "1444db96cbd8cd791abe83527becee73d3c64e86" - is_head: False - in_history: False + - head: "1444db96cbd8cd791abe83527becee73d3c64e86" graph: 1444db96cbd8cd791abe83527becee73d3c64e86: - - rev: "c0d8929936631ecbcf9147be6b8aa13b13b014e4" - is_head: False - in_history: False + - "c0d8929936631ecbcf9147be6b8aa13b13b014e4" c0d8929936631ecbcf9147be6b8aa13b13b014e4: [] # History graph for snapshot with branches: R03 and R06 - origin: "https://repo_with_merges/1/" snapshot: "e2520f0dbf34c92754f00c5a60241dfa7d612868" graphs: - - head: - rev: "20f4da0f48609d9f7f908ebbcac3b3741a0f25cb" - is_head: False - in_history: False + - head: "20f4da0f48609d9f7f908ebbcac3b3741a0f25cb" graph: 20f4da0f48609d9f7f908ebbcac3b3741a0f25cb: - - rev: "1c533587277731236616cac0d44f3b46c1da0f8a" - is_head: False - in_history: False + - "1c533587277731236616cac0d44f3b46c1da0f8a" 1c533587277731236616cac0d44f3b46c1da0f8a: - - rev: "1444db96cbd8cd791abe83527becee73d3c64e86" - is_head: True - in_history: False + - "1444db96cbd8cd791abe83527becee73d3c64e86" 1444db96cbd8cd791abe83527becee73d3c64e86: - - rev: "c0d8929936631ecbcf9147be6b8aa13b13b014e4" - is_head: False - in_history: True + - "c0d8929936631ecbcf9147be6b8aa13b13b014e4" c0d8929936631ecbcf9147be6b8aa13b13b014e4: [] - - head: - rev: "72d92d41a9095db2dd6b8fb1c62d92c8251753ff" - is_head: False - in_history: False + - head: "72d92d41a9095db2dd6b8fb1c62d92c8251753ff" graph: 72d92d41a9095db2dd6b8fb1c62d92c8251753ff: - - rev: "1444db96cbd8cd791abe83527becee73d3c64e86" - is_head: True - in_history: True + - "1444db96cbd8cd791abe83527becee73d3c64e86" 1444db96cbd8cd791abe83527becee73d3c64e86: - - rev: "c0d8929936631ecbcf9147be6b8aa13b13b014e4" - is_head: False - in_history: True + - "c0d8929936631ecbcf9147be6b8aa13b13b014e4" c0d8929936631ecbcf9147be6b8aa13b13b014e4: [] # History graph for snapshot with branches: R05 and R06 - origin: "https://repo_with_merges/2/" snapshot: "e2520f0dbf34c92754f00c5a60241dfa7d612868" graphs: - - head: - rev: "65e58853df939b318c106c4c1f55acaf8b41c74c" - is_head: False - in_history: False + - head: "65e58853df939b318c106c4c1f55acaf8b41c74c" graph: 65e58853df939b318c106c4c1f55acaf8b41c74c: - - rev: "0d66eadcc15e0d7f6cfd4289329a7749a1309982" - is_head: False - in_history: False + - "0d66eadcc15e0d7f6cfd4289329a7749a1309982" 0d66eadcc15e0d7f6cfd4289329a7749a1309982: - - rev: "20f4da0f48609d9f7f908ebbcac3b3741a0f25cb" - is_head: True - in_history: False + - "20f4da0f48609d9f7f908ebbcac3b3741a0f25cb" 20f4da0f48609d9f7f908ebbcac3b3741a0f25cb: - - rev: "1c533587277731236616cac0d44f3b46c1da0f8a" - is_head: False - in_history: True + - "1c533587277731236616cac0d44f3b46c1da0f8a" 1c533587277731236616cac0d44f3b46c1da0f8a: - - rev: "1444db96cbd8cd791abe83527becee73d3c64e86" - is_head: True - in_history: True + - "1444db96cbd8cd791abe83527becee73d3c64e86" 1444db96cbd8cd791abe83527becee73d3c64e86: - - rev: "c0d8929936631ecbcf9147be6b8aa13b13b014e4" - is_head: False - in_history: True + - "c0d8929936631ecbcf9147be6b8aa13b13b014e4" c0d8929936631ecbcf9147be6b8aa13b13b014e4: [] - - head: - rev: "72d92d41a9095db2dd6b8fb1c62d92c8251753ff" - is_head: True - in_history: False + - head: "72d92d41a9095db2dd6b8fb1c62d92c8251753ff" graph: 72d92d41a9095db2dd6b8fb1c62d92c8251753ff: - - rev: "1444db96cbd8cd791abe83527becee73d3c64e86" - is_head: True - in_history: True + - "1444db96cbd8cd791abe83527becee73d3c64e86" 1444db96cbd8cd791abe83527becee73d3c64e86: - - rev: "c0d8929936631ecbcf9147be6b8aa13b13b014e4" - is_head: False - in_history: True + - "c0d8929936631ecbcf9147be6b8aa13b13b014e4" c0d8929936631ecbcf9147be6b8aa13b13b014e4: [] # History graph for snapshot with branches: R06 and R07 - origin: "https://repo_with_merges/1/" snapshot: "e2520f0dbf34c92754f00c5a60241dfa7d612868" graphs: - - head: - rev: "72d92d41a9095db2dd6b8fb1c62d92c8251753ff" - is_head: True - in_history: False + - head: "72d92d41a9095db2dd6b8fb1c62d92c8251753ff" graph: 72d92d41a9095db2dd6b8fb1c62d92c8251753ff: - - rev: "1444db96cbd8cd791abe83527becee73d3c64e86" - is_head: True - in_history: True + - "1444db96cbd8cd791abe83527becee73d3c64e86" 1444db96cbd8cd791abe83527becee73d3c64e86: - - rev: "c0d8929936631ecbcf9147be6b8aa13b13b014e4" - is_head: False - in_history: True + - "c0d8929936631ecbcf9147be6b8aa13b13b014e4" c0d8929936631ecbcf9147be6b8aa13b13b014e4: [] - - head: - rev: "fff0089fad98e8f5b46ec5c9025a20a602851ba6" - is_head: False - in_history: False + - head: "fff0089fad98e8f5b46ec5c9025a20a602851ba6" graph: fff0089fad98e8f5b46ec5c9025a20a602851ba6: - - rev: "20f4da0f48609d9f7f908ebbcac3b3741a0f25cb" - is_head: True - in_history: True + - "20f4da0f48609d9f7f908ebbcac3b3741a0f25cb" 20f4da0f48609d9f7f908ebbcac3b3741a0f25cb: - - rev: "1c533587277731236616cac0d44f3b46c1da0f8a" - is_head: False - in_history: True + - "1c533587277731236616cac0d44f3b46c1da0f8a" 1c533587277731236616cac0d44f3b46c1da0f8a: - - rev: "1444db96cbd8cd791abe83527becee73d3c64e86" - is_head: True - in_history: True + - "1444db96cbd8cd791abe83527becee73d3c64e86" 1444db96cbd8cd791abe83527becee73d3c64e86: - - rev: "c0d8929936631ecbcf9147be6b8aa13b13b014e4" - is_head: False - in_history: True + - "c0d8929936631ecbcf9147be6b8aa13b13b014e4" c0d8929936631ecbcf9147be6b8aa13b13b014e4: [] # History graph for snapshot with branches: R08 - origin: "https://repo_with_merges/1/" snapshot: "e2520f0dbf34c92754f00c5a60241dfa7d612868" graphs: - - head: - rev: "7c8f29237dded4f9d265e46ec7066503e7858e87" - is_head: False - in_history: False + - head: "7c8f29237dded4f9d265e46ec7066503e7858e87" graph: 7c8f29237dded4f9d265e46ec7066503e7858e87: - - rev: "65e58853df939b318c106c4c1f55acaf8b41c74c" - is_head: True - in_history: False - - rev: "72d92d41a9095db2dd6b8fb1c62d92c8251753ff" - is_head: True - in_history: False - - rev: "fff0089fad98e8f5b46ec5c9025a20a602851ba6" - is_head: True - in_history: False + - "65e58853df939b318c106c4c1f55acaf8b41c74c" + - "72d92d41a9095db2dd6b8fb1c62d92c8251753ff" + - "fff0089fad98e8f5b46ec5c9025a20a602851ba6" 65e58853df939b318c106c4c1f55acaf8b41c74c: - - rev: "0d66eadcc15e0d7f6cfd4289329a7749a1309982" - is_head: False - in_history: True + - "0d66eadcc15e0d7f6cfd4289329a7749a1309982" 0d66eadcc15e0d7f6cfd4289329a7749a1309982: - - rev: "20f4da0f48609d9f7f908ebbcac3b3741a0f25cb" - is_head: True - in_history: True + - "20f4da0f48609d9f7f908ebbcac3b3741a0f25cb" 20f4da0f48609d9f7f908ebbcac3b3741a0f25cb: - - rev: "1c533587277731236616cac0d44f3b46c1da0f8a" - is_head: False - in_history: True + - "1c533587277731236616cac0d44f3b46c1da0f8a" 1c533587277731236616cac0d44f3b46c1da0f8a: - - rev: "1444db96cbd8cd791abe83527becee73d3c64e86" - is_head: True - in_history: True + - "1444db96cbd8cd791abe83527becee73d3c64e86" 1444db96cbd8cd791abe83527becee73d3c64e86: - - rev: "c0d8929936631ecbcf9147be6b8aa13b13b014e4" - is_head: False - in_history: True + - "c0d8929936631ecbcf9147be6b8aa13b13b014e4" c0d8929936631ecbcf9147be6b8aa13b13b014e4: [] 72d92d41a9095db2dd6b8fb1c62d92c8251753ff: - - rev: "1444db96cbd8cd791abe83527becee73d3c64e86" - is_head: True - in_history: True + - "1444db96cbd8cd791abe83527becee73d3c64e86" fff0089fad98e8f5b46ec5c9025a20a602851ba6: - - rev: "20f4da0f48609d9f7f908ebbcac3b3741a0f25cb" - is_head: True - in_history: True + - "20f4da0f48609d9f7f908ebbcac3b3741a0f25cb" # History graph for snapshot with branches: R08 - origin: "https://repo_with_merges/2/" snapshot: "e2520f0dbf34c92754f00c5a60241dfa7d612868" graphs: - - head: - rev: "7c8f29237dded4f9d265e46ec7066503e7858e87" - is_head: True - in_history: False + - head: "7c8f29237dded4f9d265e46ec7066503e7858e87" graph: 7c8f29237dded4f9d265e46ec7066503e7858e87: - - rev: "65e58853df939b318c106c4c1f55acaf8b41c74c" - is_head: True - in_history: True - - rev: "72d92d41a9095db2dd6b8fb1c62d92c8251753ff" - is_head: True - in_history: True - - rev: "fff0089fad98e8f5b46ec5c9025a20a602851ba6" - is_head: True - in_history: True + - "65e58853df939b318c106c4c1f55acaf8b41c74c" + - "72d92d41a9095db2dd6b8fb1c62d92c8251753ff" + - "fff0089fad98e8f5b46ec5c9025a20a602851ba6" 65e58853df939b318c106c4c1f55acaf8b41c74c: - - rev: "0d66eadcc15e0d7f6cfd4289329a7749a1309982" - is_head: False - in_history: True + - "0d66eadcc15e0d7f6cfd4289329a7749a1309982" 0d66eadcc15e0d7f6cfd4289329a7749a1309982: - - rev: "20f4da0f48609d9f7f908ebbcac3b3741a0f25cb" - is_head: True - in_history: True + - "20f4da0f48609d9f7f908ebbcac3b3741a0f25cb" 20f4da0f48609d9f7f908ebbcac3b3741a0f25cb: - - rev: "1c533587277731236616cac0d44f3b46c1da0f8a" - is_head: False - in_history: True + - "1c533587277731236616cac0d44f3b46c1da0f8a" 1c533587277731236616cac0d44f3b46c1da0f8a: - - rev: "1444db96cbd8cd791abe83527becee73d3c64e86" - is_head: True - in_history: True + - "1444db96cbd8cd791abe83527becee73d3c64e86" 1444db96cbd8cd791abe83527becee73d3c64e86: - - rev: "c0d8929936631ecbcf9147be6b8aa13b13b014e4" - is_head: False - in_history: True + - "c0d8929936631ecbcf9147be6b8aa13b13b014e4" c0d8929936631ecbcf9147be6b8aa13b13b014e4: [] 72d92d41a9095db2dd6b8fb1c62d92c8251753ff: - - rev: "1444db96cbd8cd791abe83527becee73d3c64e86" - is_head: True - in_history: True + - "1444db96cbd8cd791abe83527becee73d3c64e86" fff0089fad98e8f5b46ec5c9025a20a602851ba6: - - rev: "20f4da0f48609d9f7f908ebbcac3b3741a0f25cb" - is_head: True - in_history: True + - "20f4da0f48609d9f7f908ebbcac3b3741a0f25cb" diff --git a/swh/provenance/tests/test_history_graph.py b/swh/provenance/tests/test_history_graph.py --- a/swh/provenance/tests/test_history_graph.py +++ b/swh/provenance/tests/test_history_graph.py @@ -42,9 +42,8 @@ print("Expected graph:", expected_graph_as_dict) computed_graph = HistoryGraph( - provenance, archive, - RevisionEntry(hash_to_bytes(expected_graph_as_dict["head"]["rev"])), + RevisionEntry(hash_to_bytes(expected_graph_as_dict["head"])), ) print("Computed graph:", computed_graph.as_dict()) assert computed_graph.as_dict() == expected_graph_as_dict