Page MenuHomeSoftware Heritage

D6990.id25356.diff
No OneTemporary

D6990.id25356.diff

diff --git a/requirements-swh.txt b/requirements-swh.txt
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -1,4 +1,5 @@
# Add here internal Software Heritage dependencies, one per line.
swh.core[db,http] >= 0.14
+swh.graph
swh.model >= 2.6.1
swh.storage
diff --git a/swh/provenance/__init__.py b/swh/provenance/__init__.py
--- a/swh/provenance/__init__.py
+++ b/swh/provenance/__init__.py
@@ -17,7 +17,7 @@
"""Get an archive object of class ``cls`` with arguments ``args``.
Args:
- cls: archive's class, either 'api' or 'direct'
+ cls: archive's class, either 'api', 'direct' or 'graph'
args: dictionary of arguments passed to the archive class constructor
Returns:
@@ -39,6 +39,13 @@
from .postgresql.archive import ArchivePostgreSQL
return ArchivePostgreSQL(BaseDb.connect(**kwargs["db"]).conn)
+
+ elif cls == "graph":
+ from .swhgraph.archive import ArchiveGraph
+
+ url = kwargs.get("url")
+ assert url is not None
+ return ArchiveGraph(url, get_storage(**kwargs["storage"]))
else:
raise ValueError
diff --git a/swh/provenance/archive.py b/swh/provenance/archive.py
--- a/swh/provenance/archive.py
+++ b/swh/provenance/archive.py
@@ -47,8 +47,7 @@
id: sha1 id of the snapshot.
Yields:
- sha1 ids of revisions that are a target of such snapshot. Revisions are
- guaranteed to be retrieved in chronological order
+ sha1 ids of revisions that are a target of such snapshot.
"""
...
diff --git a/swh/provenance/postgresql/archive.py b/swh/provenance/postgresql/archive.py
--- a/swh/provenance/postgresql/archive.py
+++ b/swh/provenance/postgresql/archive.py
@@ -135,7 +135,7 @@
ON (RL.target=RV.id)
WHERE B.target_type='release'::snapshot_target
AND RL.target_type='revision'::object_type)
- ORDER BY date, id)
+ )
SELECT id FROM heads
""",
(id,),
diff --git a/swh/provenance/storage/archive.py b/swh/provenance/storage/archive.py
--- a/swh/provenance/storage/archive.py
+++ b/swh/provenance/storage/archive.py
@@ -70,4 +70,4 @@
if revision is not None and revision.date is not None
)
- yield from (head for _, head in sorted(revisions))
+ yield from (head for _, head in revisions)
diff --git a/swh/provenance/swhgraph/__init__.py b/swh/provenance/swhgraph/__init__.py
new file mode 100644
diff --git a/swh/provenance/swhgraph/archive.py b/swh/provenance/swhgraph/archive.py
new file mode 100644
--- /dev/null
+++ b/swh/provenance/swhgraph/archive.py
@@ -0,0 +1,46 @@
+# Copyright (C) 2021 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from typing import Any, Dict, Iterable
+
+from swh.core.statsd import statsd
+from swh.graph.client import RemoteGraphClient
+from swh.model.model import Sha1Git
+from swh.model.swhids import CoreSWHID, ObjectType
+from swh.storage.interface import StorageInterface
+
+ARCHIVE_DURATION_METRIC = "swh_provenance_archive_graph_duration_seconds"
+
+
+class ArchiveGraph:
+ def __init__(self, url: str, storage: StorageInterface) -> None:
+ self.graph = RemoteGraphClient(url)
+ self.storage = storage # required by ArchiveInterface
+
+ @statsd.timed(metric=ARCHIVE_DURATION_METRIC, tags={"method": "directory_ls"})
+ def directory_ls(self, id: Sha1Git, minsize: int = 0) -> Iterable[Dict[str, Any]]:
+ raise NotImplementedError
+
+ @statsd.timed(
+ metric=ARCHIVE_DURATION_METRIC, tags={"method": "revision_get_parents"}
+ )
+ def revision_get_parents(self, id: Sha1Git) -> Iterable[Sha1Git]:
+ src = CoreSWHID(object_type=ObjectType.REVISION, object_id=id)
+ request = self.graph.visit_nodes(str(src), edges="rev:rev", return_types="rev")
+
+ yield from (
+ CoreSWHID.from_string(swhid).object_id
+ for swhid in request
+ if swhid != str(src)
+ )
+
+ @statsd.timed(metric=ARCHIVE_DURATION_METRIC, tags={"method": "snapshot_get_heads"})
+ def snapshot_get_heads(self, id: Sha1Git) -> Iterable[Sha1Git]:
+ src = CoreSWHID(object_type=ObjectType.SNAPSHOT, object_id=id)
+ request = self.graph.visit_nodes(
+ str(src), edges="snp:rev,rel:rev", return_types="rev"
+ )
+
+ yield from (CoreSWHID.from_string(swhid).object_id for swhid in request)

File Metadata

Mime Type
text/plain
Expires
Jan 30 2025, 12:12 PM (6 w, 2 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3233045

Event Timeline