diff --git a/swh/provenance/archive.py b/swh/provenance/archive.py --- a/swh/provenance/archive.py +++ b/swh/provenance/archive.py @@ -39,7 +39,8 @@ id: sha1 id of the snapshot. Yields: - sha1 ids of revisions that a target of such snapshot. + sha1 ids of revisions that are a target of such snapshot. Revisions are + guaranteed to be retrieved in chronological order """ ... diff --git a/swh/provenance/postgresql/archive.py b/swh/provenance/postgresql/archive.py --- a/swh/provenance/postgresql/archive.py +++ b/swh/provenance/postgresql/archive.py @@ -78,22 +78,33 @@ with self.conn.cursor() as cursor: cursor.execute( """ - WITH S AS (SELECT object_id FROM snapshot WHERE snapshot.id=%s) - (SELECT B.target AS head - FROM S - JOIN snapshot_branches AS BS ON (S.object_id=BS.snapshot_id) - JOIN snapshot_branch AS B ON (BS.branch_id=B.object_id) - WHERE B.target_type='revision'::snapshot_target) - UNION - (SELECT R.target AS head - FROM S - JOIN snapshot_branches AS BS ON (S.object_id=BS.snapshot_id) - JOIN snapshot_branch AS B ON (BS.branch_id=B.object_id) - JOIN release AS R ON (B.target=R.id) - WHERE B.target_type='release'::snapshot_target - AND R.target_type='revision'::object_type) + WITH + snaps AS (SELECT object_id FROM snapshot WHERE snapshot.id=%s), + heads AS ((SELECT R.id, R.date + FROM snaps + JOIN snapshot_branches AS BS + ON (snaps.object_id=BS.snapshot_id) + JOIN snapshot_branch AS B + ON (BS.branch_id=B.object_id) + JOIN revision AS R + ON (B.target=R.id) + WHERE B.target_type='revision'::snapshot_target) + UNION + (SELECT RV.id, RV.date + FROM snaps + JOIN snapshot_branches AS BS + ON (snaps.object_id=BS.snapshot_id) + JOIN snapshot_branch AS B + ON (BS.branch_id=B.object_id) + JOIN release AS RL + ON (B.target=RL.id) + JOIN revision AS RV + ON (RL.target=RV.id) + WHERE B.target_type='release'::snapshot_target + AND RL.target_type='revision'::object_type) + ORDER BY date, id) + SELECT id FROM heads """, (id,), ) - heads = [row[0] for row in cursor.fetchall()] - yield from heads + yield from (row[0] for row in cursor.fetchall()) diff --git a/swh/provenance/storage/archive.py b/swh/provenance/storage/archive.py --- a/swh/provenance/storage/archive.py +++ b/swh/provenance/storage/archive.py @@ -1,4 +1,5 @@ -from typing import Any, Dict, Iterable, Set +from datetime import datetime +from typing import Any, Dict, Iterable, Set, Tuple from swh.model.model import ObjectType, Sha1Git, TargetType from swh.storage.interface import StorageInterface @@ -46,12 +47,12 @@ if release is not None and release.target_type == ObjectType.REVISION ) - revisions: Set[Sha1Git] = set() + revisions: Set[Tuple[datetime, Sha1Git]] = set() for targets in grouper(targets_set, batchsize): revisions.update( - revision.id + (revision.date.to_datetime(), revision.id) for revision in self.storage.revision_get(list(targets)) - if revision is not None + if revision is not None and revision.date is not None ) - yield from revisions + yield from (head for _, head in sorted(revisions))