Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7123992
D6862.id24875.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
19 KB
Subscribers
None
D6862.id24875.diff
View Options
diff --git a/swh/provenance/graph.py b/swh/provenance/graph.py
--- a/swh/provenance/graph.py
+++ b/swh/provenance/graph.py
@@ -23,47 +23,15 @@
UTCMIN = datetime.min.replace(tzinfo=timezone.utc)
-class HistoryNode:
- def __init__(
- self, entry: RevisionEntry, is_head: bool = False, in_history: bool = False
- ) -> None:
- self.entry = entry
- # A revision is `is_head` if it is directly pointed by an origin (ie. a head
- # revision for some snapshot)
- self.is_head = is_head
- # A revision is `in_history` if it appears in the history graph of an already
- # processed revision in the provenance database
- self.in_history = in_history
- # XXX: the current simplified version of the origin-revision layer algorithm
- # does not use this previous two flags at all. They are kept for now but might
- # be removed in the future (hence, RevisionEntry might be used instead of
- # HistoryNode).
-
- def __str__(self) -> str:
- return f"<{self.entry}: is_head={self.is_head}, in_history={self.in_history}>"
-
- def as_dict(self) -> Dict[str, Any]:
- return {
- "rev": hash_to_hex(self.entry.id),
- "is_head": self.is_head,
- "in_history": self.in_history,
- }
-
-
class HistoryGraph:
@statsd.timed(metric=GRAPH_DURATION_METRIC, tags={"method": "build_history_graph"})
def __init__(
self,
- provenance: ProvenanceInterface,
archive: ArchiveInterface,
revision: RevisionEntry,
) -> None:
- self._head = HistoryNode(
- revision,
- is_head=provenance.revision_visited(revision),
- in_history=provenance.revision_in_history(revision),
- )
- self._graph: Dict[HistoryNode, Set[HistoryNode]] = {}
+ self._head = revision
+ self._graph: Dict[RevisionEntry, Set[RevisionEntry]] = {}
stack = [self._head]
while stack:
@@ -71,22 +39,17 @@
if current not in self._graph:
self._graph[current] = set()
- current.entry.retrieve_parents(archive)
- for parent in current.entry.parents:
- node = HistoryNode(
- parent,
- is_head=provenance.revision_visited(parent),
- in_history=provenance.revision_in_history(parent),
- )
- self._graph[current].add(node)
- stack.append(node)
+ current.retrieve_parents(archive)
+ for parent in current.parents:
+ self._graph[current].add(parent)
+ stack.append(parent)
@property
- def head(self) -> HistoryNode:
+ def head(self) -> RevisionEntry:
return self._head
@property
- def parents(self) -> Dict[HistoryNode, Set[HistoryNode]]:
+ def parents(self) -> Dict[RevisionEntry, Set[RevisionEntry]]:
return self._graph
def __str__(self) -> str:
@@ -94,11 +57,10 @@
def as_dict(self) -> Dict[str, Any]:
return {
- "head": self.head.as_dict(),
+ "head": hash_to_hex(self.head.id),
"graph": {
- hash_to_hex(node.entry.id): sorted(
- [parent.as_dict() for parent in parents],
- key=lambda d: d["rev"],
+ hash_to_hex(node.id): sorted(
+ [hash_to_hex(parent.id) for parent in parents]
)
for node, parents in self._graph.items()
},
diff --git a/swh/provenance/interface.py b/swh/provenance/interface.py
--- a/swh/provenance/interface.py
+++ b/swh/provenance/interface.py
@@ -377,20 +377,8 @@
"""Retrieve the preferred origin associated to `revision`."""
...
- def revision_in_history(self, revision: RevisionEntry) -> bool:
- """Check if `revision` is known to be an ancestor of some head revision in the
- provenance model.
- """
- ...
-
def revision_set_preferred_origin(
self, origin: OriginEntry, revision: RevisionEntry
) -> None:
"""Associate `origin` as the preferred origin for `revision`."""
...
-
- def revision_visited(self, revision: RevisionEntry) -> bool:
- """Check if `revision` is known to be a head revision for some origin in the
- provenance model.
- """
- ...
diff --git a/swh/provenance/origin.py b/swh/provenance/origin.py
--- a/swh/provenance/origin.py
+++ b/swh/provenance/origin.py
@@ -55,7 +55,7 @@
provenance.origin_add(origin)
origin.retrieve_revisions(archive)
for revision in origin.revisions:
- graph = HistoryGraph(provenance, archive, revision)
+ graph = HistoryGraph(archive, revision)
origin_add_revision(provenance, origin, graph)
provenance.flush()
@@ -66,24 +66,19 @@
origin: OriginEntry,
graph: HistoryGraph,
) -> None:
- # XXX: simplified version of the origin-revision algorithm. This is generating flat
- # models for the history of all head revisions. No previous result is reused now!
- # The previous implementation was missing some paths from origins to certain
- # revisions due to a wrong reuse logic.
-
# head is treated separately since it should always be added to the given origin
- check_preferred_origin(provenance, origin, graph.head.entry)
- provenance.revision_add_to_origin(origin, graph.head.entry)
+ check_preferred_origin(provenance, origin, graph.head)
+ provenance.revision_add_to_origin(origin, graph.head)
visited = {graph.head}
# head's history should be recursively iterated starting from its parents
stack = list(graph.parents[graph.head])
while stack:
current = stack.pop()
- check_preferred_origin(provenance, origin, current.entry)
+ check_preferred_origin(provenance, origin, current)
# create a link between it and the head, and recursively walk its history
- provenance.revision_add_before_revision(graph.head.entry, current.entry)
+ provenance.revision_add_before_revision(graph.head, current)
visited.add(current)
for parent in graph.parents[current]:
if parent not in visited:
diff --git a/swh/provenance/provenance.py b/swh/provenance/provenance.py
--- a/swh/provenance/provenance.py
+++ b/swh/provenance/provenance.py
@@ -487,18 +487,8 @@
cache[revision.id] = origin
return cache.get(revision.id)
- def revision_in_history(self, revision: RevisionEntry) -> bool:
- return revision.id in self.cache["revision_before_revision"] or bool(
- self.storage.relation_get(RelationType.REV_BEFORE_REV, [revision.id])
- )
-
def revision_set_preferred_origin(
self, origin: OriginEntry, revision: RevisionEntry
) -> None:
self.cache["revision_origin"]["data"][revision.id] = origin.id
self.cache["revision_origin"]["added"].add(revision.id)
-
- def revision_visited(self, revision: RevisionEntry) -> bool:
- return revision.id in dict(self.cache["revision_in_origin"]) or bool(
- self.storage.relation_get(RelationType.REV_IN_ORG, [revision.id])
- )
diff --git a/swh/provenance/tests/data/history_graphs_with-merges_visits-01.yaml b/swh/provenance/tests/data/history_graphs_with-merges_visits-01.yaml
--- a/swh/provenance/tests/data/history_graphs_with-merges_visits-01.yaml
+++ b/swh/provenance/tests/data/history_graphs_with-merges_visits-01.yaml
@@ -2,229 +2,124 @@
- origin: "https://repo_with_merges/1/"
snapshot: "e2520f0dbf34c92754f00c5a60241dfa7d612868"
graphs:
- - head:
- rev: "1444db96cbd8cd791abe83527becee73d3c64e86"
- is_head: False
- in_history: False
+ - head: "1444db96cbd8cd791abe83527becee73d3c64e86"
graph:
1444db96cbd8cd791abe83527becee73d3c64e86:
- - rev: "c0d8929936631ecbcf9147be6b8aa13b13b014e4"
- is_head: False
- in_history: False
+ - "c0d8929936631ecbcf9147be6b8aa13b13b014e4"
c0d8929936631ecbcf9147be6b8aa13b13b014e4: []
# History graph for snapshot with branches: R03 and R06
- origin: "https://repo_with_merges/1/"
snapshot: "e2520f0dbf34c92754f00c5a60241dfa7d612868"
graphs:
- - head:
- rev: "20f4da0f48609d9f7f908ebbcac3b3741a0f25cb"
- is_head: False
- in_history: False
+ - head: "20f4da0f48609d9f7f908ebbcac3b3741a0f25cb"
graph:
20f4da0f48609d9f7f908ebbcac3b3741a0f25cb:
- - rev: "1c533587277731236616cac0d44f3b46c1da0f8a"
- is_head: False
- in_history: False
+ - "1c533587277731236616cac0d44f3b46c1da0f8a"
1c533587277731236616cac0d44f3b46c1da0f8a:
- - rev: "1444db96cbd8cd791abe83527becee73d3c64e86"
- is_head: True
- in_history: False
+ - "1444db96cbd8cd791abe83527becee73d3c64e86"
1444db96cbd8cd791abe83527becee73d3c64e86:
- - rev: "c0d8929936631ecbcf9147be6b8aa13b13b014e4"
- is_head: False
- in_history: True
+ - "c0d8929936631ecbcf9147be6b8aa13b13b014e4"
c0d8929936631ecbcf9147be6b8aa13b13b014e4: []
- - head:
- rev: "72d92d41a9095db2dd6b8fb1c62d92c8251753ff"
- is_head: False
- in_history: False
+ - head: "72d92d41a9095db2dd6b8fb1c62d92c8251753ff"
graph:
72d92d41a9095db2dd6b8fb1c62d92c8251753ff:
- - rev: "1444db96cbd8cd791abe83527becee73d3c64e86"
- is_head: True
- in_history: True
+ - "1444db96cbd8cd791abe83527becee73d3c64e86"
1444db96cbd8cd791abe83527becee73d3c64e86:
- - rev: "c0d8929936631ecbcf9147be6b8aa13b13b014e4"
- is_head: False
- in_history: True
+ - "c0d8929936631ecbcf9147be6b8aa13b13b014e4"
c0d8929936631ecbcf9147be6b8aa13b13b014e4: []
# History graph for snapshot with branches: R05 and R06
- origin: "https://repo_with_merges/2/"
snapshot: "e2520f0dbf34c92754f00c5a60241dfa7d612868"
graphs:
- - head:
- rev: "65e58853df939b318c106c4c1f55acaf8b41c74c"
- is_head: False
- in_history: False
+ - head: "65e58853df939b318c106c4c1f55acaf8b41c74c"
graph:
65e58853df939b318c106c4c1f55acaf8b41c74c:
- - rev: "0d66eadcc15e0d7f6cfd4289329a7749a1309982"
- is_head: False
- in_history: False
+ - "0d66eadcc15e0d7f6cfd4289329a7749a1309982"
0d66eadcc15e0d7f6cfd4289329a7749a1309982:
- - rev: "20f4da0f48609d9f7f908ebbcac3b3741a0f25cb"
- is_head: True
- in_history: False
+ - "20f4da0f48609d9f7f908ebbcac3b3741a0f25cb"
20f4da0f48609d9f7f908ebbcac3b3741a0f25cb:
- - rev: "1c533587277731236616cac0d44f3b46c1da0f8a"
- is_head: False
- in_history: True
+ - "1c533587277731236616cac0d44f3b46c1da0f8a"
1c533587277731236616cac0d44f3b46c1da0f8a:
- - rev: "1444db96cbd8cd791abe83527becee73d3c64e86"
- is_head: True
- in_history: True
+ - "1444db96cbd8cd791abe83527becee73d3c64e86"
1444db96cbd8cd791abe83527becee73d3c64e86:
- - rev: "c0d8929936631ecbcf9147be6b8aa13b13b014e4"
- is_head: False
- in_history: True
+ - "c0d8929936631ecbcf9147be6b8aa13b13b014e4"
c0d8929936631ecbcf9147be6b8aa13b13b014e4: []
- - head:
- rev: "72d92d41a9095db2dd6b8fb1c62d92c8251753ff"
- is_head: True
- in_history: False
+ - head: "72d92d41a9095db2dd6b8fb1c62d92c8251753ff"
graph:
72d92d41a9095db2dd6b8fb1c62d92c8251753ff:
- - rev: "1444db96cbd8cd791abe83527becee73d3c64e86"
- is_head: True
- in_history: True
+ - "1444db96cbd8cd791abe83527becee73d3c64e86"
1444db96cbd8cd791abe83527becee73d3c64e86:
- - rev: "c0d8929936631ecbcf9147be6b8aa13b13b014e4"
- is_head: False
- in_history: True
+ - "c0d8929936631ecbcf9147be6b8aa13b13b014e4"
c0d8929936631ecbcf9147be6b8aa13b13b014e4: []
# History graph for snapshot with branches: R06 and R07
- origin: "https://repo_with_merges/1/"
snapshot: "e2520f0dbf34c92754f00c5a60241dfa7d612868"
graphs:
- - head:
- rev: "72d92d41a9095db2dd6b8fb1c62d92c8251753ff"
- is_head: True
- in_history: False
+ - head: "72d92d41a9095db2dd6b8fb1c62d92c8251753ff"
graph:
72d92d41a9095db2dd6b8fb1c62d92c8251753ff:
- - rev: "1444db96cbd8cd791abe83527becee73d3c64e86"
- is_head: True
- in_history: True
+ - "1444db96cbd8cd791abe83527becee73d3c64e86"
1444db96cbd8cd791abe83527becee73d3c64e86:
- - rev: "c0d8929936631ecbcf9147be6b8aa13b13b014e4"
- is_head: False
- in_history: True
+ - "c0d8929936631ecbcf9147be6b8aa13b13b014e4"
c0d8929936631ecbcf9147be6b8aa13b13b014e4: []
- - head:
- rev: "fff0089fad98e8f5b46ec5c9025a20a602851ba6"
- is_head: False
- in_history: False
+ - head: "fff0089fad98e8f5b46ec5c9025a20a602851ba6"
graph:
fff0089fad98e8f5b46ec5c9025a20a602851ba6:
- - rev: "20f4da0f48609d9f7f908ebbcac3b3741a0f25cb"
- is_head: True
- in_history: True
+ - "20f4da0f48609d9f7f908ebbcac3b3741a0f25cb"
20f4da0f48609d9f7f908ebbcac3b3741a0f25cb:
- - rev: "1c533587277731236616cac0d44f3b46c1da0f8a"
- is_head: False
- in_history: True
+ - "1c533587277731236616cac0d44f3b46c1da0f8a"
1c533587277731236616cac0d44f3b46c1da0f8a:
- - rev: "1444db96cbd8cd791abe83527becee73d3c64e86"
- is_head: True
- in_history: True
+ - "1444db96cbd8cd791abe83527becee73d3c64e86"
1444db96cbd8cd791abe83527becee73d3c64e86:
- - rev: "c0d8929936631ecbcf9147be6b8aa13b13b014e4"
- is_head: False
- in_history: True
+ - "c0d8929936631ecbcf9147be6b8aa13b13b014e4"
c0d8929936631ecbcf9147be6b8aa13b13b014e4: []
# History graph for snapshot with branches: R08
- origin: "https://repo_with_merges/1/"
snapshot: "e2520f0dbf34c92754f00c5a60241dfa7d612868"
graphs:
- - head:
- rev: "7c8f29237dded4f9d265e46ec7066503e7858e87"
- is_head: False
- in_history: False
+ - head: "7c8f29237dded4f9d265e46ec7066503e7858e87"
graph:
7c8f29237dded4f9d265e46ec7066503e7858e87:
- - rev: "65e58853df939b318c106c4c1f55acaf8b41c74c"
- is_head: True
- in_history: False
- - rev: "72d92d41a9095db2dd6b8fb1c62d92c8251753ff"
- is_head: True
- in_history: False
- - rev: "fff0089fad98e8f5b46ec5c9025a20a602851ba6"
- is_head: True
- in_history: False
+ - "65e58853df939b318c106c4c1f55acaf8b41c74c"
+ - "72d92d41a9095db2dd6b8fb1c62d92c8251753ff"
+ - "fff0089fad98e8f5b46ec5c9025a20a602851ba6"
65e58853df939b318c106c4c1f55acaf8b41c74c:
- - rev: "0d66eadcc15e0d7f6cfd4289329a7749a1309982"
- is_head: False
- in_history: True
+ - "0d66eadcc15e0d7f6cfd4289329a7749a1309982"
0d66eadcc15e0d7f6cfd4289329a7749a1309982:
- - rev: "20f4da0f48609d9f7f908ebbcac3b3741a0f25cb"
- is_head: True
- in_history: True
+ - "20f4da0f48609d9f7f908ebbcac3b3741a0f25cb"
20f4da0f48609d9f7f908ebbcac3b3741a0f25cb:
- - rev: "1c533587277731236616cac0d44f3b46c1da0f8a"
- is_head: False
- in_history: True
+ - "1c533587277731236616cac0d44f3b46c1da0f8a"
1c533587277731236616cac0d44f3b46c1da0f8a:
- - rev: "1444db96cbd8cd791abe83527becee73d3c64e86"
- is_head: True
- in_history: True
+ - "1444db96cbd8cd791abe83527becee73d3c64e86"
1444db96cbd8cd791abe83527becee73d3c64e86:
- - rev: "c0d8929936631ecbcf9147be6b8aa13b13b014e4"
- is_head: False
- in_history: True
+ - "c0d8929936631ecbcf9147be6b8aa13b13b014e4"
c0d8929936631ecbcf9147be6b8aa13b13b014e4: []
72d92d41a9095db2dd6b8fb1c62d92c8251753ff:
- - rev: "1444db96cbd8cd791abe83527becee73d3c64e86"
- is_head: True
- in_history: True
+ - "1444db96cbd8cd791abe83527becee73d3c64e86"
fff0089fad98e8f5b46ec5c9025a20a602851ba6:
- - rev: "20f4da0f48609d9f7f908ebbcac3b3741a0f25cb"
- is_head: True
- in_history: True
+ - "20f4da0f48609d9f7f908ebbcac3b3741a0f25cb"
# History graph for snapshot with branches: R08
- origin: "https://repo_with_merges/2/"
snapshot: "e2520f0dbf34c92754f00c5a60241dfa7d612868"
graphs:
- - head:
- rev: "7c8f29237dded4f9d265e46ec7066503e7858e87"
- is_head: True
- in_history: False
+ - head: "7c8f29237dded4f9d265e46ec7066503e7858e87"
graph:
7c8f29237dded4f9d265e46ec7066503e7858e87:
- - rev: "65e58853df939b318c106c4c1f55acaf8b41c74c"
- is_head: True
- in_history: True
- - rev: "72d92d41a9095db2dd6b8fb1c62d92c8251753ff"
- is_head: True
- in_history: True
- - rev: "fff0089fad98e8f5b46ec5c9025a20a602851ba6"
- is_head: True
- in_history: True
+ - "65e58853df939b318c106c4c1f55acaf8b41c74c"
+ - "72d92d41a9095db2dd6b8fb1c62d92c8251753ff"
+ - "fff0089fad98e8f5b46ec5c9025a20a602851ba6"
65e58853df939b318c106c4c1f55acaf8b41c74c:
- - rev: "0d66eadcc15e0d7f6cfd4289329a7749a1309982"
- is_head: False
- in_history: True
+ - "0d66eadcc15e0d7f6cfd4289329a7749a1309982"
0d66eadcc15e0d7f6cfd4289329a7749a1309982:
- - rev: "20f4da0f48609d9f7f908ebbcac3b3741a0f25cb"
- is_head: True
- in_history: True
+ - "20f4da0f48609d9f7f908ebbcac3b3741a0f25cb"
20f4da0f48609d9f7f908ebbcac3b3741a0f25cb:
- - rev: "1c533587277731236616cac0d44f3b46c1da0f8a"
- is_head: False
- in_history: True
+ - "1c533587277731236616cac0d44f3b46c1da0f8a"
1c533587277731236616cac0d44f3b46c1da0f8a:
- - rev: "1444db96cbd8cd791abe83527becee73d3c64e86"
- is_head: True
- in_history: True
+ - "1444db96cbd8cd791abe83527becee73d3c64e86"
1444db96cbd8cd791abe83527becee73d3c64e86:
- - rev: "c0d8929936631ecbcf9147be6b8aa13b13b014e4"
- is_head: False
- in_history: True
+ - "c0d8929936631ecbcf9147be6b8aa13b13b014e4"
c0d8929936631ecbcf9147be6b8aa13b13b014e4: []
72d92d41a9095db2dd6b8fb1c62d92c8251753ff:
- - rev: "1444db96cbd8cd791abe83527becee73d3c64e86"
- is_head: True
- in_history: True
+ - "1444db96cbd8cd791abe83527becee73d3c64e86"
fff0089fad98e8f5b46ec5c9025a20a602851ba6:
- - rev: "20f4da0f48609d9f7f908ebbcac3b3741a0f25cb"
- is_head: True
- in_history: True
+ - "20f4da0f48609d9f7f908ebbcac3b3741a0f25cb"
diff --git a/swh/provenance/tests/test_history_graph.py b/swh/provenance/tests/test_history_graph.py
--- a/swh/provenance/tests/test_history_graph.py
+++ b/swh/provenance/tests/test_history_graph.py
@@ -42,9 +42,8 @@
print("Expected graph:", expected_graph_as_dict)
computed_graph = HistoryGraph(
- provenance,
archive,
- RevisionEntry(hash_to_bytes(expected_graph_as_dict["head"]["rev"])),
+ RevisionEntry(hash_to_bytes(expected_graph_as_dict["head"])),
)
print("Computed graph:", computed_graph.as_dict())
assert computed_graph.as_dict() == expected_graph_as_dict
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Dec 20 2024, 8:47 AM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3218030
Attached To
D6862: Clean up history graph implementation
Event Timeline
Log In to Comment