Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7163663
D6990.id25356.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
4 KB
Subscribers
None
D6990.id25356.diff
View Options
diff --git a/requirements-swh.txt b/requirements-swh.txt
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -1,4 +1,5 @@
# Add here internal Software Heritage dependencies, one per line.
swh.core[db,http] >= 0.14
+swh.graph
swh.model >= 2.6.1
swh.storage
diff --git a/swh/provenance/__init__.py b/swh/provenance/__init__.py
--- a/swh/provenance/__init__.py
+++ b/swh/provenance/__init__.py
@@ -17,7 +17,7 @@
"""Get an archive object of class ``cls`` with arguments ``args``.
Args:
- cls: archive's class, either 'api' or 'direct'
+ cls: archive's class, either 'api', 'direct' or 'graph'
args: dictionary of arguments passed to the archive class constructor
Returns:
@@ -39,6 +39,13 @@
from .postgresql.archive import ArchivePostgreSQL
return ArchivePostgreSQL(BaseDb.connect(**kwargs["db"]).conn)
+
+ elif cls == "graph":
+ from .swhgraph.archive import ArchiveGraph
+
+ url = kwargs.get("url")
+ assert url is not None
+ return ArchiveGraph(url, get_storage(**kwargs["storage"]))
else:
raise ValueError
diff --git a/swh/provenance/archive.py b/swh/provenance/archive.py
--- a/swh/provenance/archive.py
+++ b/swh/provenance/archive.py
@@ -47,8 +47,7 @@
id: sha1 id of the snapshot.
Yields:
- sha1 ids of revisions that are a target of such snapshot. Revisions are
- guaranteed to be retrieved in chronological order
+ sha1 ids of revisions that are a target of such snapshot.
"""
...
diff --git a/swh/provenance/postgresql/archive.py b/swh/provenance/postgresql/archive.py
--- a/swh/provenance/postgresql/archive.py
+++ b/swh/provenance/postgresql/archive.py
@@ -135,7 +135,7 @@
ON (RL.target=RV.id)
WHERE B.target_type='release'::snapshot_target
AND RL.target_type='revision'::object_type)
- ORDER BY date, id)
+ )
SELECT id FROM heads
""",
(id,),
diff --git a/swh/provenance/storage/archive.py b/swh/provenance/storage/archive.py
--- a/swh/provenance/storage/archive.py
+++ b/swh/provenance/storage/archive.py
@@ -70,4 +70,4 @@
if revision is not None and revision.date is not None
)
- yield from (head for _, head in sorted(revisions))
+ yield from (head for _, head in revisions)
diff --git a/swh/provenance/swhgraph/__init__.py b/swh/provenance/swhgraph/__init__.py
new file mode 100644
diff --git a/swh/provenance/swhgraph/archive.py b/swh/provenance/swhgraph/archive.py
new file mode 100644
--- /dev/null
+++ b/swh/provenance/swhgraph/archive.py
@@ -0,0 +1,46 @@
+# Copyright (C) 2021 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from typing import Any, Dict, Iterable
+
+from swh.core.statsd import statsd
+from swh.graph.client import RemoteGraphClient
+from swh.model.model import Sha1Git
+from swh.model.swhids import CoreSWHID, ObjectType
+from swh.storage.interface import StorageInterface
+
+ARCHIVE_DURATION_METRIC = "swh_provenance_archive_graph_duration_seconds"
+
+
+class ArchiveGraph:
+ def __init__(self, url: str, storage: StorageInterface) -> None:
+ self.graph = RemoteGraphClient(url)
+ self.storage = storage # required by ArchiveInterface
+
+ @statsd.timed(metric=ARCHIVE_DURATION_METRIC, tags={"method": "directory_ls"})
+ def directory_ls(self, id: Sha1Git, minsize: int = 0) -> Iterable[Dict[str, Any]]:
+ raise NotImplementedError
+
+ @statsd.timed(
+ metric=ARCHIVE_DURATION_METRIC, tags={"method": "revision_get_parents"}
+ )
+ def revision_get_parents(self, id: Sha1Git) -> Iterable[Sha1Git]:
+ src = CoreSWHID(object_type=ObjectType.REVISION, object_id=id)
+ request = self.graph.visit_nodes(str(src), edges="rev:rev", return_types="rev")
+
+ yield from (
+ CoreSWHID.from_string(swhid).object_id
+ for swhid in request
+ if swhid != str(src)
+ )
+
+ @statsd.timed(metric=ARCHIVE_DURATION_METRIC, tags={"method": "snapshot_get_heads"})
+ def snapshot_get_heads(self, id: Sha1Git) -> Iterable[Sha1Git]:
+ src = CoreSWHID(object_type=ObjectType.SNAPSHOT, object_id=id)
+ request = self.graph.visit_nodes(
+ str(src), edges="snp:rev,rel:rev", return_types="rev"
+ )
+
+ yield from (CoreSWHID.from_string(swhid).object_id for swhid in request)
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Jan 30 2025, 12:12 PM (6 w, 2 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3233045
Attached To
D6990: Add partial implementation of `ArchiveGraph` class
Event Timeline
Log In to Comment