diff --git a/swh/provenance/archive.py b/swh/provenance/archive.py --- a/swh/provenance/archive.py +++ b/swh/provenance/archive.py @@ -2,7 +2,7 @@ from typing_extensions import Protocol, runtime_checkable -from swh.model.model import Revision, Sha1Git +from swh.model.model import Sha1Git @runtime_checkable @@ -19,15 +19,14 @@ """ ... - def revision_get(self, ids: Iterable[Sha1Git]) -> Iterable[Revision]: - """Given a list of sha1, return the revisions' information + def revision_get_parents(self, id: Sha1Git) -> Iterable[Sha1Git]: + """List parents of one revision. Args: - revisions: list of sha1s for the revisions to be retrieved + revisions: sha1 id of the revision to list parents from. Yields: - revisions matching the identifiers. If a revision does - not exist, the provided sha1 is simply ignored. + sha1 ids for the parents for such revision. """ ... diff --git a/swh/provenance/model.py b/swh/provenance/model.py --- a/swh/provenance/model.py +++ b/swh/provenance/model.py @@ -57,22 +57,8 @@ def retrieve_parents(self, archive: ArchiveInterface): if self._parents_entries is None: if self._parents_ids is None: - revision = list(archive.revision_get([self.id])) - if revision: - self._parents_ids = revision[0].parents - else: - self._parents_ids = [] - - self._parents_entries = [ - RevisionEntry( - id=rev.id, - root=rev.directory, - date=rev.date.to_datetime(), - parents=rev.parents, - ) - for rev in archive.revision_get(self._parents_ids) - if rev.date is not None - ] + self._parents_ids = archive.revision_get_parents(self.id) + self._parents_entries = [RevisionEntry(id) for id in self._parents_ids] @property def parents(self) -> Iterator["RevisionEntry"]: diff --git a/swh/provenance/postgresql/archive.py b/swh/provenance/postgresql/archive.py --- a/swh/provenance/postgresql/archive.py +++ b/swh/provenance/postgresql/archive.py @@ -3,7 +3,7 @@ from methodtools import lru_cache import psycopg2 -from swh.model.model import ObjectType, Revision, Sha1Git, TargetType +from swh.model.model import ObjectType, Sha1Git, TargetType from swh.storage.postgresql.storage import Storage @@ -62,39 +62,19 @@ for row in cursor.fetchall() ] - def revision_get(self, ids: Iterable[Sha1Git]) -> Iterable[Revision]: + def revision_get_parents(self, id: Sha1Git) -> Iterable[Sha1Git]: with self.conn.cursor() as cursor: - psycopg2.extras.execute_values( - cursor, + cursor.execute( """ - SELECT t.id, revision.date, revision.directory, - ARRAY( - SELECT rh.parent_id::bytea - FROM revision_history rh - WHERE rh.id = t.id - ORDER BY rh.parent_rank - ) - FROM (VALUES %s) as t(sortkey, id) - LEFT JOIN revision ON t.id = revision.id - LEFT JOIN person author ON revision.author = author.id - LEFT JOIN person committer ON revision.committer = committer.id - ORDER BY sortkey + SELECT RH.parent_id::bytea + FROM revision_history AS RH + WHERE RH.id=%s + ORDER BY RH.parent_rank """, - ((sortkey, id) for sortkey, id in enumerate(ids)), + (id,), ) - for row in cursor.fetchall(): - parents = [] - for parent in row[3]: - if parent: - parents.append(parent) - yield Revision.from_dict( - { - "id": row[0], - "date": row[1], - "directory": row[2], - "parents": tuple(parents), - } - ) + # There should be at most one row anyway + yield from (row[0] for row in cursor.fetchall()) def snapshot_get_heads(self, id: Sha1Git) -> Iterable[Sha1Git]: # TODO: this code is duplicated here (same as in swh.provenance.storage.archive) diff --git a/swh/provenance/storage/archive.py b/swh/provenance/storage/archive.py --- a/swh/provenance/storage/archive.py +++ b/swh/provenance/storage/archive.py @@ -1,6 +1,6 @@ from typing import Any, Dict, Iterable, Set -from swh.model.model import ObjectType, Revision, Sha1Git, TargetType +from swh.model.model import ObjectType, Sha1Git, TargetType from swh.storage.interface import StorageInterface @@ -12,11 +12,10 @@ # TODO: filter unused fields yield from self.storage.directory_ls(id) - def revision_get(self, ids: Iterable[Sha1Git]) -> Iterable[Revision]: - # TODO: filter unused fields - yield from ( - rev for rev in self.storage.revision_get(list(ids)) if rev is not None - ) + def revision_get_parents(self, id: Sha1Git) -> Iterable[Sha1Git]: + rev = self.storage.revision_get([id])[0] + if rev is not None: + yield from rev.parents def snapshot_get_heads(self, id: Sha1Git) -> Iterable[Sha1Git]: from swh.core.utils import grouper