Page MenuHomeSoftware Heritage

D6862.diff
No OneTemporary

D6862.diff

diff --git a/swh/provenance/graph.py b/swh/provenance/graph.py
--- a/swh/provenance/graph.py
+++ b/swh/provenance/graph.py
@@ -23,47 +23,15 @@
UTCMIN = datetime.min.replace(tzinfo=timezone.utc)
-class HistoryNode:
- def __init__(
- self, entry: RevisionEntry, is_head: bool = False, in_history: bool = False
- ) -> None:
- self.entry = entry
- # A revision is `is_head` if it is directly pointed by an origin (ie. a head
- # revision for some snapshot)
- self.is_head = is_head
- # A revision is `in_history` if it appears in the history graph of an already
- # processed revision in the provenance database
- self.in_history = in_history
- # XXX: the current simplified version of the origin-revision layer algorithm
- # does not use this previous two flags at all. They are kept for now but might
- # be removed in the future (hence, RevisionEntry might be used instead of
- # HistoryNode).
-
- def __str__(self) -> str:
- return f"<{self.entry}: is_head={self.is_head}, in_history={self.in_history}>"
-
- def as_dict(self) -> Dict[str, Any]:
- return {
- "rev": hash_to_hex(self.entry.id),
- "is_head": self.is_head,
- "in_history": self.in_history,
- }
-
-
class HistoryGraph:
@statsd.timed(metric=GRAPH_DURATION_METRIC, tags={"method": "build_history_graph"})
def __init__(
self,
- provenance: ProvenanceInterface,
archive: ArchiveInterface,
revision: RevisionEntry,
) -> None:
- self._head = HistoryNode(
- revision,
- is_head=provenance.revision_visited(revision),
- in_history=provenance.revision_in_history(revision),
- )
- self._graph: Dict[HistoryNode, Set[HistoryNode]] = {}
+ self._head = revision
+ self._graph: Dict[RevisionEntry, Set[RevisionEntry]] = {}
stack = [self._head]
while stack:
@@ -71,22 +39,17 @@
if current not in self._graph:
self._graph[current] = set()
- current.entry.retrieve_parents(archive)
- for parent in current.entry.parents:
- node = HistoryNode(
- parent,
- is_head=provenance.revision_visited(parent),
- in_history=provenance.revision_in_history(parent),
- )
- self._graph[current].add(node)
- stack.append(node)
+ current.retrieve_parents(archive)
+ for parent in current.parents:
+ self._graph[current].add(parent)
+ stack.append(parent)
@property
- def head(self) -> HistoryNode:
+ def head(self) -> RevisionEntry:
return self._head
@property
- def parents(self) -> Dict[HistoryNode, Set[HistoryNode]]:
+ def parents(self) -> Dict[RevisionEntry, Set[RevisionEntry]]:
return self._graph
def __str__(self) -> str:
@@ -94,11 +57,10 @@
def as_dict(self) -> Dict[str, Any]:
return {
- "head": self.head.as_dict(),
+ "head": hash_to_hex(self.head.id),
"graph": {
- hash_to_hex(node.entry.id): sorted(
- [parent.as_dict() for parent in parents],
- key=lambda d: d["rev"],
+ hash_to_hex(node.id): sorted(
+ [hash_to_hex(parent.id) for parent in parents]
)
for node, parents in self._graph.items()
},
diff --git a/swh/provenance/interface.py b/swh/provenance/interface.py
--- a/swh/provenance/interface.py
+++ b/swh/provenance/interface.py
@@ -377,20 +377,8 @@
"""Retrieve the preferred origin associated to `revision`."""
...
- def revision_in_history(self, revision: RevisionEntry) -> bool:
- """Check if `revision` is known to be an ancestor of some head revision in the
- provenance model.
- """
- ...
-
def revision_set_preferred_origin(
self, origin: OriginEntry, revision: RevisionEntry
) -> None:
"""Associate `origin` as the preferred origin for `revision`."""
...
-
- def revision_visited(self, revision: RevisionEntry) -> bool:
- """Check if `revision` is known to be a head revision for some origin in the
- provenance model.
- """
- ...
diff --git a/swh/provenance/origin.py b/swh/provenance/origin.py
--- a/swh/provenance/origin.py
+++ b/swh/provenance/origin.py
@@ -55,7 +55,7 @@
provenance.origin_add(origin)
origin.retrieve_revisions(archive)
for revision in origin.revisions:
- graph = HistoryGraph(provenance, archive, revision)
+ graph = HistoryGraph(archive, revision)
origin_add_revision(provenance, origin, graph)
provenance.flush()
@@ -66,24 +66,19 @@
origin: OriginEntry,
graph: HistoryGraph,
) -> None:
- # XXX: simplified version of the origin-revision algorithm. This is generating flat
- # models for the history of all head revisions. No previous result is reused now!
- # The previous implementation was missing some paths from origins to certain
- # revisions due to a wrong reuse logic.
-
# head is treated separately since it should always be added to the given origin
- check_preferred_origin(provenance, origin, graph.head.entry)
- provenance.revision_add_to_origin(origin, graph.head.entry)
+ check_preferred_origin(provenance, origin, graph.head)
+ provenance.revision_add_to_origin(origin, graph.head)
visited = {graph.head}
# head's history should be recursively iterated starting from its parents
stack = list(graph.parents[graph.head])
while stack:
current = stack.pop()
- check_preferred_origin(provenance, origin, current.entry)
+ check_preferred_origin(provenance, origin, current)
# create a link between it and the head, and recursively walk its history
- provenance.revision_add_before_revision(graph.head.entry, current.entry)
+ provenance.revision_add_before_revision(graph.head, current)
visited.add(current)
for parent in graph.parents[current]:
if parent not in visited:
diff --git a/swh/provenance/provenance.py b/swh/provenance/provenance.py
--- a/swh/provenance/provenance.py
+++ b/swh/provenance/provenance.py
@@ -487,18 +487,8 @@
cache[revision.id] = origin
return cache.get(revision.id)
- def revision_in_history(self, revision: RevisionEntry) -> bool:
- return revision.id in self.cache["revision_before_revision"] or bool(
- self.storage.relation_get(RelationType.REV_BEFORE_REV, [revision.id])
- )
-
def revision_set_preferred_origin(
self, origin: OriginEntry, revision: RevisionEntry
) -> None:
self.cache["revision_origin"]["data"][revision.id] = origin.id
self.cache["revision_origin"]["added"].add(revision.id)
-
- def revision_visited(self, revision: RevisionEntry) -> bool:
- return revision.id in dict(self.cache["revision_in_origin"]) or bool(
- self.storage.relation_get(RelationType.REV_IN_ORG, [revision.id])
- )
diff --git a/swh/provenance/tests/data/history_graphs_with-merges_visits-01.yaml b/swh/provenance/tests/data/history_graphs_with-merges_visits-01.yaml
--- a/swh/provenance/tests/data/history_graphs_with-merges_visits-01.yaml
+++ b/swh/provenance/tests/data/history_graphs_with-merges_visits-01.yaml
@@ -2,229 +2,124 @@
- origin: "https://repo_with_merges/1/"
snapshot: "e2520f0dbf34c92754f00c5a60241dfa7d612868"
graphs:
- - head:
- rev: "1444db96cbd8cd791abe83527becee73d3c64e86"
- is_head: False
- in_history: False
+ - head: "1444db96cbd8cd791abe83527becee73d3c64e86"
graph:
1444db96cbd8cd791abe83527becee73d3c64e86:
- - rev: "c0d8929936631ecbcf9147be6b8aa13b13b014e4"
- is_head: False
- in_history: False
+ - "c0d8929936631ecbcf9147be6b8aa13b13b014e4"
c0d8929936631ecbcf9147be6b8aa13b13b014e4: []
# History graph for snapshot with branches: R03 and R06
- origin: "https://repo_with_merges/1/"
snapshot: "e2520f0dbf34c92754f00c5a60241dfa7d612868"
graphs:
- - head:
- rev: "20f4da0f48609d9f7f908ebbcac3b3741a0f25cb"
- is_head: False
- in_history: False
+ - head: "20f4da0f48609d9f7f908ebbcac3b3741a0f25cb"
graph:
20f4da0f48609d9f7f908ebbcac3b3741a0f25cb:
- - rev: "1c533587277731236616cac0d44f3b46c1da0f8a"
- is_head: False
- in_history: False
+ - "1c533587277731236616cac0d44f3b46c1da0f8a"
1c533587277731236616cac0d44f3b46c1da0f8a:
- - rev: "1444db96cbd8cd791abe83527becee73d3c64e86"
- is_head: True
- in_history: False
+ - "1444db96cbd8cd791abe83527becee73d3c64e86"
1444db96cbd8cd791abe83527becee73d3c64e86:
- - rev: "c0d8929936631ecbcf9147be6b8aa13b13b014e4"
- is_head: False
- in_history: True
+ - "c0d8929936631ecbcf9147be6b8aa13b13b014e4"
c0d8929936631ecbcf9147be6b8aa13b13b014e4: []
- - head:
- rev: "72d92d41a9095db2dd6b8fb1c62d92c8251753ff"
- is_head: False
- in_history: False
+ - head: "72d92d41a9095db2dd6b8fb1c62d92c8251753ff"
graph:
72d92d41a9095db2dd6b8fb1c62d92c8251753ff:
- - rev: "1444db96cbd8cd791abe83527becee73d3c64e86"
- is_head: True
- in_history: True
+ - "1444db96cbd8cd791abe83527becee73d3c64e86"
1444db96cbd8cd791abe83527becee73d3c64e86:
- - rev: "c0d8929936631ecbcf9147be6b8aa13b13b014e4"
- is_head: False
- in_history: True
+ - "c0d8929936631ecbcf9147be6b8aa13b13b014e4"
c0d8929936631ecbcf9147be6b8aa13b13b014e4: []
# History graph for snapshot with branches: R05 and R06
- origin: "https://repo_with_merges/2/"
snapshot: "e2520f0dbf34c92754f00c5a60241dfa7d612868"
graphs:
- - head:
- rev: "65e58853df939b318c106c4c1f55acaf8b41c74c"
- is_head: False
- in_history: False
+ - head: "65e58853df939b318c106c4c1f55acaf8b41c74c"
graph:
65e58853df939b318c106c4c1f55acaf8b41c74c:
- - rev: "0d66eadcc15e0d7f6cfd4289329a7749a1309982"
- is_head: False
- in_history: False
+ - "0d66eadcc15e0d7f6cfd4289329a7749a1309982"
0d66eadcc15e0d7f6cfd4289329a7749a1309982:
- - rev: "20f4da0f48609d9f7f908ebbcac3b3741a0f25cb"
- is_head: True
- in_history: False
+ - "20f4da0f48609d9f7f908ebbcac3b3741a0f25cb"
20f4da0f48609d9f7f908ebbcac3b3741a0f25cb:
- - rev: "1c533587277731236616cac0d44f3b46c1da0f8a"
- is_head: False
- in_history: True
+ - "1c533587277731236616cac0d44f3b46c1da0f8a"
1c533587277731236616cac0d44f3b46c1da0f8a:
- - rev: "1444db96cbd8cd791abe83527becee73d3c64e86"
- is_head: True
- in_history: True
+ - "1444db96cbd8cd791abe83527becee73d3c64e86"
1444db96cbd8cd791abe83527becee73d3c64e86:
- - rev: "c0d8929936631ecbcf9147be6b8aa13b13b014e4"
- is_head: False
- in_history: True
+ - "c0d8929936631ecbcf9147be6b8aa13b13b014e4"
c0d8929936631ecbcf9147be6b8aa13b13b014e4: []
- - head:
- rev: "72d92d41a9095db2dd6b8fb1c62d92c8251753ff"
- is_head: True
- in_history: False
+ - head: "72d92d41a9095db2dd6b8fb1c62d92c8251753ff"
graph:
72d92d41a9095db2dd6b8fb1c62d92c8251753ff:
- - rev: "1444db96cbd8cd791abe83527becee73d3c64e86"
- is_head: True
- in_history: True
+ - "1444db96cbd8cd791abe83527becee73d3c64e86"
1444db96cbd8cd791abe83527becee73d3c64e86:
- - rev: "c0d8929936631ecbcf9147be6b8aa13b13b014e4"
- is_head: False
- in_history: True
+ - "c0d8929936631ecbcf9147be6b8aa13b13b014e4"
c0d8929936631ecbcf9147be6b8aa13b13b014e4: []
# History graph for snapshot with branches: R06 and R07
- origin: "https://repo_with_merges/1/"
snapshot: "e2520f0dbf34c92754f00c5a60241dfa7d612868"
graphs:
- - head:
- rev: "72d92d41a9095db2dd6b8fb1c62d92c8251753ff"
- is_head: True
- in_history: False
+ - head: "72d92d41a9095db2dd6b8fb1c62d92c8251753ff"
graph:
72d92d41a9095db2dd6b8fb1c62d92c8251753ff:
- - rev: "1444db96cbd8cd791abe83527becee73d3c64e86"
- is_head: True
- in_history: True
+ - "1444db96cbd8cd791abe83527becee73d3c64e86"
1444db96cbd8cd791abe83527becee73d3c64e86:
- - rev: "c0d8929936631ecbcf9147be6b8aa13b13b014e4"
- is_head: False
- in_history: True
+ - "c0d8929936631ecbcf9147be6b8aa13b13b014e4"
c0d8929936631ecbcf9147be6b8aa13b13b014e4: []
- - head:
- rev: "fff0089fad98e8f5b46ec5c9025a20a602851ba6"
- is_head: False
- in_history: False
+ - head: "fff0089fad98e8f5b46ec5c9025a20a602851ba6"
graph:
fff0089fad98e8f5b46ec5c9025a20a602851ba6:
- - rev: "20f4da0f48609d9f7f908ebbcac3b3741a0f25cb"
- is_head: True
- in_history: True
+ - "20f4da0f48609d9f7f908ebbcac3b3741a0f25cb"
20f4da0f48609d9f7f908ebbcac3b3741a0f25cb:
- - rev: "1c533587277731236616cac0d44f3b46c1da0f8a"
- is_head: False
- in_history: True
+ - "1c533587277731236616cac0d44f3b46c1da0f8a"
1c533587277731236616cac0d44f3b46c1da0f8a:
- - rev: "1444db96cbd8cd791abe83527becee73d3c64e86"
- is_head: True
- in_history: True
+ - "1444db96cbd8cd791abe83527becee73d3c64e86"
1444db96cbd8cd791abe83527becee73d3c64e86:
- - rev: "c0d8929936631ecbcf9147be6b8aa13b13b014e4"
- is_head: False
- in_history: True
+ - "c0d8929936631ecbcf9147be6b8aa13b13b014e4"
c0d8929936631ecbcf9147be6b8aa13b13b014e4: []
# History graph for snapshot with branches: R08
- origin: "https://repo_with_merges/1/"
snapshot: "e2520f0dbf34c92754f00c5a60241dfa7d612868"
graphs:
- - head:
- rev: "7c8f29237dded4f9d265e46ec7066503e7858e87"
- is_head: False
- in_history: False
+ - head: "7c8f29237dded4f9d265e46ec7066503e7858e87"
graph:
7c8f29237dded4f9d265e46ec7066503e7858e87:
- - rev: "65e58853df939b318c106c4c1f55acaf8b41c74c"
- is_head: True
- in_history: False
- - rev: "72d92d41a9095db2dd6b8fb1c62d92c8251753ff"
- is_head: True
- in_history: False
- - rev: "fff0089fad98e8f5b46ec5c9025a20a602851ba6"
- is_head: True
- in_history: False
+ - "65e58853df939b318c106c4c1f55acaf8b41c74c"
+ - "72d92d41a9095db2dd6b8fb1c62d92c8251753ff"
+ - "fff0089fad98e8f5b46ec5c9025a20a602851ba6"
65e58853df939b318c106c4c1f55acaf8b41c74c:
- - rev: "0d66eadcc15e0d7f6cfd4289329a7749a1309982"
- is_head: False
- in_history: True
+ - "0d66eadcc15e0d7f6cfd4289329a7749a1309982"
0d66eadcc15e0d7f6cfd4289329a7749a1309982:
- - rev: "20f4da0f48609d9f7f908ebbcac3b3741a0f25cb"
- is_head: True
- in_history: True
+ - "20f4da0f48609d9f7f908ebbcac3b3741a0f25cb"
20f4da0f48609d9f7f908ebbcac3b3741a0f25cb:
- - rev: "1c533587277731236616cac0d44f3b46c1da0f8a"
- is_head: False
- in_history: True
+ - "1c533587277731236616cac0d44f3b46c1da0f8a"
1c533587277731236616cac0d44f3b46c1da0f8a:
- - rev: "1444db96cbd8cd791abe83527becee73d3c64e86"
- is_head: True
- in_history: True
+ - "1444db96cbd8cd791abe83527becee73d3c64e86"
1444db96cbd8cd791abe83527becee73d3c64e86:
- - rev: "c0d8929936631ecbcf9147be6b8aa13b13b014e4"
- is_head: False
- in_history: True
+ - "c0d8929936631ecbcf9147be6b8aa13b13b014e4"
c0d8929936631ecbcf9147be6b8aa13b13b014e4: []
72d92d41a9095db2dd6b8fb1c62d92c8251753ff:
- - rev: "1444db96cbd8cd791abe83527becee73d3c64e86"
- is_head: True
- in_history: True
+ - "1444db96cbd8cd791abe83527becee73d3c64e86"
fff0089fad98e8f5b46ec5c9025a20a602851ba6:
- - rev: "20f4da0f48609d9f7f908ebbcac3b3741a0f25cb"
- is_head: True
- in_history: True
+ - "20f4da0f48609d9f7f908ebbcac3b3741a0f25cb"
# History graph for snapshot with branches: R08
- origin: "https://repo_with_merges/2/"
snapshot: "e2520f0dbf34c92754f00c5a60241dfa7d612868"
graphs:
- - head:
- rev: "7c8f29237dded4f9d265e46ec7066503e7858e87"
- is_head: True
- in_history: False
+ - head: "7c8f29237dded4f9d265e46ec7066503e7858e87"
graph:
7c8f29237dded4f9d265e46ec7066503e7858e87:
- - rev: "65e58853df939b318c106c4c1f55acaf8b41c74c"
- is_head: True
- in_history: True
- - rev: "72d92d41a9095db2dd6b8fb1c62d92c8251753ff"
- is_head: True
- in_history: True
- - rev: "fff0089fad98e8f5b46ec5c9025a20a602851ba6"
- is_head: True
- in_history: True
+ - "65e58853df939b318c106c4c1f55acaf8b41c74c"
+ - "72d92d41a9095db2dd6b8fb1c62d92c8251753ff"
+ - "fff0089fad98e8f5b46ec5c9025a20a602851ba6"
65e58853df939b318c106c4c1f55acaf8b41c74c:
- - rev: "0d66eadcc15e0d7f6cfd4289329a7749a1309982"
- is_head: False
- in_history: True
+ - "0d66eadcc15e0d7f6cfd4289329a7749a1309982"
0d66eadcc15e0d7f6cfd4289329a7749a1309982:
- - rev: "20f4da0f48609d9f7f908ebbcac3b3741a0f25cb"
- is_head: True
- in_history: True
+ - "20f4da0f48609d9f7f908ebbcac3b3741a0f25cb"
20f4da0f48609d9f7f908ebbcac3b3741a0f25cb:
- - rev: "1c533587277731236616cac0d44f3b46c1da0f8a"
- is_head: False
- in_history: True
+ - "1c533587277731236616cac0d44f3b46c1da0f8a"
1c533587277731236616cac0d44f3b46c1da0f8a:
- - rev: "1444db96cbd8cd791abe83527becee73d3c64e86"
- is_head: True
- in_history: True
+ - "1444db96cbd8cd791abe83527becee73d3c64e86"
1444db96cbd8cd791abe83527becee73d3c64e86:
- - rev: "c0d8929936631ecbcf9147be6b8aa13b13b014e4"
- is_head: False
- in_history: True
+ - "c0d8929936631ecbcf9147be6b8aa13b13b014e4"
c0d8929936631ecbcf9147be6b8aa13b13b014e4: []
72d92d41a9095db2dd6b8fb1c62d92c8251753ff:
- - rev: "1444db96cbd8cd791abe83527becee73d3c64e86"
- is_head: True
- in_history: True
+ - "1444db96cbd8cd791abe83527becee73d3c64e86"
fff0089fad98e8f5b46ec5c9025a20a602851ba6:
- - rev: "20f4da0f48609d9f7f908ebbcac3b3741a0f25cb"
- is_head: True
- in_history: True
+ - "20f4da0f48609d9f7f908ebbcac3b3741a0f25cb"
diff --git a/swh/provenance/tests/test_history_graph.py b/swh/provenance/tests/test_history_graph.py
--- a/swh/provenance/tests/test_history_graph.py
+++ b/swh/provenance/tests/test_history_graph.py
@@ -42,9 +42,8 @@
print("Expected graph:", expected_graph_as_dict)
computed_graph = HistoryGraph(
- provenance,
archive,
- RevisionEntry(hash_to_bytes(expected_graph_as_dict["head"]["rev"])),
+ RevisionEntry(hash_to_bytes(expected_graph_as_dict["head"])),
)
print("Computed graph:", computed_graph.as_dict())
assert computed_graph.as_dict() == expected_graph_as_dict

File Metadata

Mime Type
text/plain
Expires
Dec 20 2024, 6:19 AM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3218030

Event Timeline