Page MenuHomeSoftware Heritage

D8239.id29797.diff
No OneTemporary

D8239.id29797.diff

diff --git a/swh/graphql/app.py b/swh/graphql/app.py
--- a/swh/graphql/app.py
+++ b/swh/graphql/app.py
@@ -39,4 +39,5 @@
scalars.id_scalar,
scalars.datetime_scalar,
scalars.swhid_scalar,
+ scalars.content_hash_scalar,
)
diff --git a/swh/graphql/backends/archive.py b/swh/graphql/backends/archive.py
--- a/swh/graphql/backends/archive.py
+++ b/swh/graphql/backends/archive.py
@@ -69,10 +69,6 @@
directory_id, limit=first, page_token=after
)
- def get_content(self, content_id):
- # FIXME, only for tests
- return self.storage.content_find({"sha1_git": content_id})
-
def is_object_available(self, object_id: str, object_type: ObjectType) -> bool:
mapping = {
ObjectType.CONTENT: self.storage.content_missing_per_sha1_git,
@@ -82,3 +78,9 @@
ObjectType.SNAPSHOT: self.storage.snapshot_missing,
}
return not list(mapping[object_type]([object_id]))
+
+ def get_contents(self, checksums: dict):
+ return self.storage.content_find(checksums)
+
+ def get_content_data(self, content_sha1):
+ return self.storage.content_get_data(content_sha1)
diff --git a/swh/graphql/resolvers/content.py b/swh/graphql/resolvers/content.py
--- a/swh/graphql/resolvers/content.py
+++ b/swh/graphql/resolvers/content.py
@@ -18,19 +18,46 @@
Base resolver for all the content nodes
"""
- def _get_content_by_id(self, content_id):
- content = archive.Archive().get_content(content_id)
+ def _get_content_by_hash(self, checksums: dict):
+ content = archive.Archive().get_contents(checksums)
+ # in case of a conflict, return the first element
return content[0] if content else None
@property
def checksum(self):
- # FIXME, return a Node object
+ # FIXME, use a Node instead
return {k: v.hex() for (k, v) in self._node.hashes().items()}
@property
def id(self):
return self._node.sha1_git
+ @property
+ def data(self):
+ # FIXME, return a Node object
+ # FIXME, add more ways to retrieve data
+ archive_url = "https://archive.softwareheritage.org/api/1/"
+ content_sha1 = self._node.hashes()["sha1"]
+ return {
+ "raw": archive.Archive().get_content_data(content_sha1),
+ "url": f"{archive_url}content/sha1:{content_sha1.hex()}/raw/",
+ }
+
+ @property
+ def ContentFileType(self):
+ # FIXME, fetch data from the indexers
+ return None
+
+ @property
+ def ConetentLanguage(self):
+ # FIXME, fetch data from the indexers
+ return None
+
+ @property
+ def ConetentLicense(self):
+ # FIXME, fetch data from the indexers
+ return None
+
def is_type_of(self):
# is_type_of is required only when resolving a UNION type
# This is for ariadne to return the right type
@@ -43,17 +70,27 @@
"""
def _get_node_data(self):
- return self._get_content_by_id(self.kwargs.get("swhid").object_id)
+ checksums = {"sha1_git": self.kwargs.get("swhid").object_id}
+ return self._get_content_by_hash(checksums)
+
+
+class HashContentNode(BaseContentNode):
+ """
+ Node resolver for a content requested with one or more checksums
+ """
+
+ def _get_node_data(self):
+ checksums = dict(self.kwargs.get("checksums"))
+ return self._get_content_by_hash(checksums)
class TargetContentNode(BaseContentNode):
"""
- Node resolver for a content requested from a
- directory entry or from a release target
+ Node resolver for a content requested as a target
+ This request could be from directory entry, release or a branch
"""
obj: Union[DirectoryEntryNode, BaseReleaseNode, SnapshotBranchNode]
def _get_node_data(self):
- content_id = self.obj.target_hash
- return self._get_content_by_id(content_id)
+ return self._get_content_by_hash(checksums={"sha1_git": self.obj.target_hash})
diff --git a/swh/graphql/resolvers/resolver_factory.py b/swh/graphql/resolvers/resolver_factory.py
--- a/swh/graphql/resolvers/resolver_factory.py
+++ b/swh/graphql/resolvers/resolver_factory.py
@@ -3,7 +3,7 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-from .content import ContentNode, TargetContentNode
+from .content import ContentNode, HashContentNode, TargetContentNode
from .directory import DirectoryNode, RevisionDirectoryNode, TargetDirectoryNode
from .directory_entry import DirectoryEntryConnection
from .origin import OriginConnection, OriginNode
@@ -49,6 +49,7 @@
"release-content": TargetContentNode,
"directory": DirectoryNode,
"content": ContentNode,
+ "content-by-hash": HashContentNode,
"dir-entry-dir": TargetDirectoryNode,
"dir-entry-file": TargetContentNode,
"search-result-snapshot": TargetSnapshotNode,
diff --git a/swh/graphql/resolvers/resolvers.py b/swh/graphql/resolvers/resolvers.py
--- a/swh/graphql/resolvers/resolvers.py
+++ b/swh/graphql/resolvers/resolvers.py
@@ -183,6 +183,14 @@
return resolver(obj, info, **kw)
+@query.field("contentByHash")
+def content_by_hash_resolver(
+ obj: None, info: GraphQLResolveInfo, **kw
+) -> rs.content.ContentNode:
+ resolver = get_node_resolver("content-by-hash")
+ return resolver(obj, info, **kw)
+
+
# Connection resolvers
# A connection resolver should return an instance of BaseConnection
diff --git a/swh/graphql/resolvers/scalars.py b/swh/graphql/resolvers/scalars.py
--- a/swh/graphql/resolvers/scalars.py
+++ b/swh/graphql/resolvers/scalars.py
@@ -8,12 +8,14 @@
from ariadne import ScalarType
from swh.graphql.utils import utils
+from swh.model import hashutil
from swh.model.model import TimestampWithTimezone
from swh.model.swhids import CoreSWHID
datetime_scalar = ScalarType("DateTime")
swhid_scalar = ScalarType("SWHID")
id_scalar = ScalarType("ID")
+content_hash_scalar = ScalarType("ContentHash")
@id_scalar.serializer
@@ -41,3 +43,18 @@
@swhid_scalar.serializer
def serialize_swhid(value):
return str(value)
+
+
+@content_hash_scalar.value_parser
+def validate_content_hash(value):
+ try:
+ hash_type, hash_string = value.split(":")
+ hash_value = hashutil.hash_to_bytes(hash_string)
+ except ValueError as e:
+ # FIXME, log this error
+ raise AttributeError("Invalid content checksum", e)
+ except Exception as e:
+ # FIXME, log this error
+ raise AttributeError("Invalid content checksum", e)
+ # FIXME, add validation for the hash_type
+ return hash_type, hash_value
diff --git a/swh/graphql/schema/schema.graphql b/swh/graphql/schema/schema.graphql
--- a/swh/graphql/schema/schema.graphql
+++ b/swh/graphql/schema/schema.graphql
@@ -8,6 +8,11 @@
"""
scalar DateTime
+"""
+Content identifier in the form hash-type:hash-value
+"""
+scalar ContentHash
+
"""
Object with an id
"""
@@ -755,24 +760,54 @@
}
"""
-An object with different checksums
+An object with different content checksums
"""
type ContentChecksum {
+ blake2s256: String
+ sha1: String
+ sha1_git: String
+ sha256: String
+}
+
+"""
+Object with different content data representations
+"""
+type ContentData {
"""
+ File data as a string
"""
- blake2s256: String
+ raw: BinaryString
"""
+ URL to download the file data
"""
- sha1: String
+ url: String
+}
+type ContentFileType {
"""
+ Detected content encoding
"""
- sha1_git: String
+ encoding: String
"""
+ Detected MIME type of the content
"""
- sha256: String
+ mimetype: String
+}
+
+type ContentLanguage {
+ """
+ Detected programming language if any
+ """
+ lang: String
+}
+
+type ContentLicense {
+ """
+ Array of strings containing the detected license names
+ """
+ licenses: [String]
}
"""
@@ -803,6 +838,26 @@
Content status, visible or hidden
"""
status: String
+
+ """
+ File content
+ """
+ data: ContentData
+
+ """
+ Information about the content MIME type
+ """
+ fileType: ContentFileType
+
+ """
+ Information about the programming language used in the content
+ """
+ language: ContentLanguage
+
+ """
+ Information about the license of the content
+ """
+ license: ContentLicense
}
"""
@@ -970,6 +1025,17 @@
swhid: SWHID!
): Content
+ """
+ Get the content by one or more hashes
+ Use multiple hashes for an accurate result
+ """
+ contentByHash(
+ """
+ List of hashType:hashValue strings
+ """
+ checksums: [ContentHash]!
+ ): Content
+
"""
Resolve the given SWHID to an object
"""
diff --git a/swh/graphql/tests/functional/test_content.py b/swh/graphql/tests/functional/test_content.py
new file mode 100644
--- /dev/null
+++ b/swh/graphql/tests/functional/test_content.py
@@ -0,0 +1,153 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import pytest
+
+from . import utils
+from ..data import get_contents
+
+
+@pytest.mark.parametrize("content", get_contents())
+def test_get_contnet_with_swhid(client, content):
+ query_str = """
+ {
+ content(swhid: "%s") {
+ swhid
+ checksum {
+ blake2s256
+ sha1
+ sha1_git
+ sha256
+ }
+ length
+ status
+ data {
+ raw {
+ text
+ }
+ url
+ }
+ fileType {
+ encoding
+ }
+ language {
+ lang
+ }
+ license {
+ licenses
+ }
+ }
+ }
+ """
+ data, _ = utils.get_query_response(client, query_str % content.swhid())
+ archive_url = "https://archive.softwareheritage.org/api/1/"
+ response = {
+ "swhid": str(content.swhid()),
+ "checksum": {
+ "blake2s256": content.blake2s256.hex(),
+ "sha1": content.sha1.hex(),
+ "sha1_git": content.sha1_git.hex(),
+ "sha256": content.sha256.hex(),
+ },
+ "length": content.length,
+ "status": content.status,
+ "data": {
+ "raw": {"text": content.data.decode()},
+ "url": f"{archive_url}content/sha1:{content.sha1.hex()}/raw/",
+ },
+ "fileType": None,
+ "language": None,
+ "license": None,
+ }
+ assert data["content"] == response
+
+
+@pytest.mark.parametrize("content", get_contents())
+def test_get_content_with_hash(client, content):
+ query_str = """
+ {
+ contentByHash(checksums: ["blake2s256:%s", "sha1:%s", "sha1_git:%s", "sha256:%s"]) {
+ swhid
+ }
+ }
+ """
+ data, _ = utils.get_query_response(
+ client,
+ query_str
+ % (
+ content.blake2s256.hex(),
+ content.sha1.hex(),
+ content.sha1_git.hex(),
+ content.sha256.hex(),
+ ),
+ )
+ assert data["contentByHash"] == {"swhid": str(content.swhid())}
+
+
+def test_get_content_with_invalid_swhid(client):
+ query_str = """
+ {
+ content(swhid: "swh:1:cnt:invalid") {
+ swhid
+ }
+ }
+ """
+ errors = utils.get_error_response(client, query_str)
+ # API will throw an error in case of an invalid SWHID
+ assert len(errors) == 1
+ assert "Invalid SWHID: invalid syntax" in errors[0]["message"]
+
+
+def test_get_content_with_invalid_hashes(client):
+ content = get_contents()[0]
+ query_str = """
+ {
+ contentByHash(checksums: ["blake2s256:%s", "sha1:%s", "sha1_git:%s", "sha256:%s"]) {
+ swhid
+ }
+ }
+ """
+ errors = utils.get_error_response(
+ client,
+ query_str
+ % (
+ "invalid", # Only one hash is invalid
+ content.sha1.hex(),
+ content.sha1_git.hex(),
+ content.sha256.hex(),
+ ),
+ )
+ # API will throw an error in case of an invalid content hash
+ assert len(errors) == 1
+ assert "Invalid content checksum" in errors[0]["message"]
+
+
+def test_get_content_as_target(client):
+ # SWHID of a test dir with a file entry
+ directory_swhid = "swh:1:dir:87b339104f7dc2a8163dec988445e3987995545f"
+ query_str = """
+ {
+ directory(swhid: "%s") {
+ swhid
+ entries(first: 2) {
+ nodes {
+ type
+ target {
+ ...on Content {
+ swhid
+ length
+ }
+ }
+ }
+ }
+ }
+ }
+ """
+ data, _ = utils.get_query_response(client, query_str % directory_swhid)
+ content_obj = data["directory"]["entries"]["nodes"][1]["target"]
+ assert content_obj == {
+ "length": 4,
+ "swhid": "swh:1:cnt:86bc6b377e9d25f9d26777a4a28d08e63e7c5779",
+ }

File Metadata

Mime Type
text/plain
Expires
Mar 17 2025, 6:53 PM (7 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3227368

Event Timeline