Page MenuHomeSoftware Heritage

D7166.id25973.diff
No OneTemporary

D7166.id25973.diff

diff --git a/swh/provenance/interface.py b/swh/provenance/interface.py
--- a/swh/provenance/interface.py
+++ b/swh/provenance/interface.py
@@ -367,6 +367,10 @@
target of an snapshot for `origin` in the archive)."""
...
+ def revision_is_head(self, revision: RevisionEntry) -> bool:
+ """Check if `revision` is associated as a head revision for some origin."""
+ ...
+
def revision_get_date(self, revision: RevisionEntry) -> Optional[datetime]:
"""Retrieve the date associated to `revision`."""
...
diff --git a/swh/provenance/origin.py b/swh/provenance/origin.py
--- a/swh/provenance/origin.py
+++ b/swh/provenance/origin.py
@@ -50,14 +50,20 @@
provenance: ProvenanceInterface,
archive: ArchiveInterface,
origins: List[OriginEntry],
+ commit: bool = True,
) -> None:
for origin in origins:
provenance.origin_add(origin)
origin.retrieve_revisions(archive)
for revision in origin.revisions:
- graph = HistoryGraph(archive, revision)
- origin_add_revision(provenance, origin, graph)
- provenance.flush()
+ if not provenance.revision_is_head(revision):
+ graph = HistoryGraph(archive, revision)
+ origin_add_revision(provenance, origin, graph)
+ # head is treated separately
+ check_preferred_origin(provenance, origin, revision)
+ provenance.revision_add_to_origin(origin, revision)
+ if commit:
+ provenance.flush()
@statsd.timed(metric=ORIGIN_DURATION_METRIC, tags={"method": "process_revision"})
@@ -66,11 +72,7 @@
origin: OriginEntry,
graph: HistoryGraph,
) -> None:
- # head is treated separately since it should always be added to the given origin
- check_preferred_origin(provenance, origin, graph.head)
- provenance.revision_add_to_origin(origin, graph.head)
visited = {graph.head}
-
# head's history should be recursively iterated starting from its parents
stack = list(graph.parents[graph.head])
while stack:
diff --git a/swh/provenance/provenance.py b/swh/provenance/provenance.py
--- a/swh/provenance/provenance.py
+++ b/swh/provenance/provenance.py
@@ -472,6 +472,9 @@
) -> None:
self.cache["revision_in_origin"].add((revision.id, origin.id))
+ def revision_is_head(self, revision: RevisionEntry) -> bool:
+ return bool(self.storage.relation_get(RelationType.REV_IN_ORG, [revision.id]))
+
def revision_get_date(self, revision: RevisionEntry) -> Optional[datetime]:
return self.get_dates("revision", [revision.id]).get(revision.id)

File Metadata

Mime Type
text/plain
Expires
Dec 20 2024, 3:15 AM (11 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3216325

Event Timeline