diff --git a/swh/provenance/interface.py b/swh/provenance/interface.py --- a/swh/provenance/interface.py +++ b/swh/provenance/interface.py @@ -367,6 +367,10 @@ target of an snapshot for `origin` in the archive).""" ... + def revision_is_head(self, revision: RevisionEntry) -> bool: + """Check if `revision` is associated as a head revision for some origin.""" + ... + def revision_get_date(self, revision: RevisionEntry) -> Optional[datetime]: """Retrieve the date associated to `revision`.""" ... diff --git a/swh/provenance/origin.py b/swh/provenance/origin.py --- a/swh/provenance/origin.py +++ b/swh/provenance/origin.py @@ -50,14 +50,20 @@ provenance: ProvenanceInterface, archive: ArchiveInterface, origins: List[OriginEntry], + commit: bool = True, ) -> None: for origin in origins: provenance.origin_add(origin) origin.retrieve_revisions(archive) for revision in origin.revisions: - graph = HistoryGraph(archive, revision) - origin_add_revision(provenance, origin, graph) - provenance.flush() + if not provenance.revision_is_head(revision): + graph = HistoryGraph(archive, revision) + origin_add_revision(provenance, origin, graph) + # head is treated separately + check_preferred_origin(provenance, origin, revision) + provenance.revision_add_to_origin(origin, revision) + if commit: + provenance.flush() @statsd.timed(metric=ORIGIN_DURATION_METRIC, tags={"method": "process_revision"}) @@ -66,11 +72,7 @@ origin: OriginEntry, graph: HistoryGraph, ) -> None: - # head is treated separately since it should always be added to the given origin - check_preferred_origin(provenance, origin, graph.head) - provenance.revision_add_to_origin(origin, graph.head) visited = {graph.head} - # head's history should be recursively iterated starting from its parents stack = list(graph.parents[graph.head]) while stack: diff --git a/swh/provenance/provenance.py b/swh/provenance/provenance.py --- a/swh/provenance/provenance.py +++ b/swh/provenance/provenance.py @@ -472,6 +472,9 @@ ) -> None: self.cache["revision_in_origin"].add((revision.id, origin.id)) + def revision_is_head(self, revision: RevisionEntry) -> bool: + return bool(self.storage.relation_get(RelationType.REV_IN_ORG, [revision.id])) + def revision_get_date(self, revision: RevisionEntry) -> Optional[datetime]: return self.get_dates("revision", [revision.id]).get(revision.id) diff --git a/swh/provenance/tests/conftest.py b/swh/provenance/tests/conftest.py --- a/swh/provenance/tests/conftest.py +++ b/swh/provenance/tests/conftest.py @@ -37,7 +37,10 @@ postgresql: psycopg2.extensions.connection, ) -> Dict[str, str]: """return a working and initialized provenance db""" - from swh.core.cli.db import init_admin_extensions, populate_database_for_package + from swh.core.db.db_utils import ( + init_admin_extensions, + populate_database_for_package, + ) init_admin_extensions("swh.provenance", postgresql.dsn) populate_database_for_package( @@ -104,7 +107,10 @@ ) -> Generator[ProvenanceInterface, None, None]: """Return a working and initialized ProvenanceInterface object""" - from swh.core.cli.db import init_admin_extensions, populate_database_for_package + from swh.core.db.db_utils import ( + init_admin_extensions, + populate_database_for_package, + ) init_admin_extensions("swh.provenance", provenance_postgresql.dsn) populate_database_for_package( diff --git a/swh/provenance/tests/test_cli.py b/swh/provenance/tests/test_cli.py --- a/swh/provenance/tests/test_cli.py +++ b/swh/provenance/tests/test_cli.py @@ -12,8 +12,8 @@ from swh.core.cli import swh as swhmain import swh.core.cli.db # noqa ; ensure cli is loaded -from swh.core.cli.db import init_admin_extensions from swh.core.db import BaseDb +from swh.core.db.db_utils import init_admin_extensions import swh.provenance.cli # noqa ; ensure cli is loaded @@ -35,6 +35,7 @@ TABLES = { "dbflavor", + "dbmodule", "dbversion", "content", "content_in_revision", @@ -50,7 +51,7 @@ @pytest.mark.parametrize( - "flavor, dbtables", (("with-path", TABLES | {"location"}), ("without-path", TABLES)) + "flavor, dbtables", (("with-path", TABLES), ("without-path", TABLES)) ) def test_cli_db_create_and_init_db_with_flavor( monkeypatch: MonkeyPatch,