diff --git a/requirements-swh-graph.txt b/requirements-swh-graph.txt new file mode 100644 --- /dev/null +++ b/requirements-swh-graph.txt @@ -0,0 +1 @@ +swh.graph >= 0.3.2 diff --git a/setup.py b/setup.py --- a/setup.py +++ b/setup.py @@ -36,9 +36,6 @@ return requirements -# Edit this part to match your module. -# Full sample: -# https://forge.softwareheritage.org/diffusion/DCORE/browse/master/setup.py setup( name="swh.provenance", description="Software Heritage code provenance", @@ -48,12 +45,15 @@ author="Software Heritage developers", author_email="swh-devel@inria.fr", url="https://forge.softwareheritage.org/diffusion/222/", - packages=find_packages(), # packages's modules + packages=find_packages(), install_requires=parse_requirements() + parse_requirements("swh"), tests_require=parse_requirements("test"), setup_requires=["setuptools-scm"], use_scm_version=True, - extras_require={"testing": parse_requirements("test")}, + extras_require={ + "testing": parse_requirements("test"), + "graph": parse_requirements("swh-graph"), + }, include_package_data=True, entry_points=""" [swh.cli.subcommands] diff --git a/swh/provenance/__init__.py b/swh/provenance/__init__.py --- a/swh/provenance/__init__.py +++ b/swh/provenance/__init__.py @@ -17,7 +17,7 @@ """Get an archive object of class ``cls`` with arguments ``args``. Args: - cls: archive's class, either 'api' or 'direct' + cls: archive's class, either 'api', 'direct' or 'graph' args: dictionary of arguments passed to the archive class constructor Returns: @@ -39,6 +39,13 @@ from .postgresql.archive import ArchivePostgreSQL return ArchivePostgreSQL(BaseDb.connect(**kwargs["db"]).conn) + + elif cls == "graph": + from .swhgraph.archive import ArchiveGraph + + url = kwargs.get("url") + assert url is not None + return ArchiveGraph(url, get_storage(**kwargs["storage"])) else: raise ValueError diff --git a/swh/provenance/archive.py b/swh/provenance/archive.py --- a/swh/provenance/archive.py +++ b/swh/provenance/archive.py @@ -47,8 +47,7 @@ id: sha1 id of the snapshot. Yields: - sha1 ids of revisions that are a target of such snapshot. Revisions are - guaranteed to be retrieved in chronological order + sha1 ids of revisions that are a target of such snapshot. """ ... diff --git a/swh/provenance/postgresql/archive.py b/swh/provenance/postgresql/archive.py --- a/swh/provenance/postgresql/archive.py +++ b/swh/provenance/postgresql/archive.py @@ -135,7 +135,7 @@ ON (RL.target=RV.id) WHERE B.target_type='release'::snapshot_target AND RL.target_type='revision'::object_type) - ORDER BY date, id) + ) SELECT id FROM heads """, (id,), diff --git a/swh/provenance/storage/archive.py b/swh/provenance/storage/archive.py --- a/swh/provenance/storage/archive.py +++ b/swh/provenance/storage/archive.py @@ -70,4 +70,4 @@ if revision is not None and revision.date is not None ) - yield from (head for _, head in sorted(revisions)) + yield from (head for _, head in revisions) diff --git a/swh/provenance/swhgraph/__init__.py b/swh/provenance/swhgraph/__init__.py new file mode 100644 diff --git a/swh/provenance/swhgraph/archive.py b/swh/provenance/swhgraph/archive.py new file mode 100644 --- /dev/null +++ b/swh/provenance/swhgraph/archive.py @@ -0,0 +1,46 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from typing import Any, Dict, Iterable + +from swh.core.statsd import statsd +from swh.graph.client import RemoteGraphClient +from swh.model.model import Sha1Git +from swh.model.swhids import CoreSWHID, ObjectType +from swh.storage.interface import StorageInterface + +ARCHIVE_DURATION_METRIC = "swh_provenance_archive_graph_duration_seconds" + + +class ArchiveGraph: + def __init__(self, url: str, storage: StorageInterface) -> None: + self.graph = RemoteGraphClient(url) + self.storage = storage # required by ArchiveInterface + + @statsd.timed(metric=ARCHIVE_DURATION_METRIC, tags={"method": "directory_ls"}) + def directory_ls(self, id: Sha1Git, minsize: int = 0) -> Iterable[Dict[str, Any]]: + raise NotImplementedError + + @statsd.timed( + metric=ARCHIVE_DURATION_METRIC, tags={"method": "revision_get_parents"} + ) + def revision_get_parents(self, id: Sha1Git) -> Iterable[Sha1Git]: + src = CoreSWHID(object_type=ObjectType.REVISION, object_id=id) + request = self.graph.visit_nodes(str(src), edges="rev:rev", return_types="rev") + + yield from ( + CoreSWHID.from_string(swhid).object_id + for swhid in request + if swhid != str(src) + ) + + @statsd.timed(metric=ARCHIVE_DURATION_METRIC, tags={"method": "snapshot_get_heads"}) + def snapshot_get_heads(self, id: Sha1Git) -> Iterable[Sha1Git]: + src = CoreSWHID(object_type=ObjectType.SNAPSHOT, object_id=id) + request = self.graph.visit_nodes( + str(src), edges="snp:rev,rel:rev", return_types="rev" + ) + + yield from (CoreSWHID.from_string(swhid).object_id for swhid in request) diff --git a/tox.ini b/tox.ini --- a/tox.ini +++ b/tox.ini @@ -4,6 +4,7 @@ [testenv] extras = testing + graph deps = pytest-cov commands = @@ -30,6 +31,7 @@ [testenv:mypy] extras = testing + graph deps = mypy==0.920 commands = @@ -43,6 +45,7 @@ usedevelop = true extras = testing + graph deps = # fetch and install swh-docs in develop mode -e git+https://forge.softwareheritage.org/source/swh-docs#egg=swh.docs @@ -60,6 +63,7 @@ usedevelop = true extras = testing + graph deps = # install swh-docs in develop mode -e ../swh-docs