Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7343037
D8239.id29797.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
12 KB
Subscribers
None
D8239.id29797.diff
View Options
diff --git a/swh/graphql/app.py b/swh/graphql/app.py
--- a/swh/graphql/app.py
+++ b/swh/graphql/app.py
@@ -39,4 +39,5 @@
scalars.id_scalar,
scalars.datetime_scalar,
scalars.swhid_scalar,
+ scalars.content_hash_scalar,
)
diff --git a/swh/graphql/backends/archive.py b/swh/graphql/backends/archive.py
--- a/swh/graphql/backends/archive.py
+++ b/swh/graphql/backends/archive.py
@@ -69,10 +69,6 @@
directory_id, limit=first, page_token=after
)
- def get_content(self, content_id):
- # FIXME, only for tests
- return self.storage.content_find({"sha1_git": content_id})
-
def is_object_available(self, object_id: str, object_type: ObjectType) -> bool:
mapping = {
ObjectType.CONTENT: self.storage.content_missing_per_sha1_git,
@@ -82,3 +78,9 @@
ObjectType.SNAPSHOT: self.storage.snapshot_missing,
}
return not list(mapping[object_type]([object_id]))
+
+ def get_contents(self, checksums: dict):
+ return self.storage.content_find(checksums)
+
+ def get_content_data(self, content_sha1):
+ return self.storage.content_get_data(content_sha1)
diff --git a/swh/graphql/resolvers/content.py b/swh/graphql/resolvers/content.py
--- a/swh/graphql/resolvers/content.py
+++ b/swh/graphql/resolvers/content.py
@@ -18,19 +18,46 @@
Base resolver for all the content nodes
"""
- def _get_content_by_id(self, content_id):
- content = archive.Archive().get_content(content_id)
+ def _get_content_by_hash(self, checksums: dict):
+ content = archive.Archive().get_contents(checksums)
+ # in case of a conflict, return the first element
return content[0] if content else None
@property
def checksum(self):
- # FIXME, return a Node object
+ # FIXME, use a Node instead
return {k: v.hex() for (k, v) in self._node.hashes().items()}
@property
def id(self):
return self._node.sha1_git
+ @property
+ def data(self):
+ # FIXME, return a Node object
+ # FIXME, add more ways to retrieve data
+ archive_url = "https://archive.softwareheritage.org/api/1/"
+ content_sha1 = self._node.hashes()["sha1"]
+ return {
+ "raw": archive.Archive().get_content_data(content_sha1),
+ "url": f"{archive_url}content/sha1:{content_sha1.hex()}/raw/",
+ }
+
+ @property
+ def ContentFileType(self):
+ # FIXME, fetch data from the indexers
+ return None
+
+ @property
+ def ConetentLanguage(self):
+ # FIXME, fetch data from the indexers
+ return None
+
+ @property
+ def ConetentLicense(self):
+ # FIXME, fetch data from the indexers
+ return None
+
def is_type_of(self):
# is_type_of is required only when resolving a UNION type
# This is for ariadne to return the right type
@@ -43,17 +70,27 @@
"""
def _get_node_data(self):
- return self._get_content_by_id(self.kwargs.get("swhid").object_id)
+ checksums = {"sha1_git": self.kwargs.get("swhid").object_id}
+ return self._get_content_by_hash(checksums)
+
+
+class HashContentNode(BaseContentNode):
+ """
+ Node resolver for a content requested with one or more checksums
+ """
+
+ def _get_node_data(self):
+ checksums = dict(self.kwargs.get("checksums"))
+ return self._get_content_by_hash(checksums)
class TargetContentNode(BaseContentNode):
"""
- Node resolver for a content requested from a
- directory entry or from a release target
+ Node resolver for a content requested as a target
+ This request could be from directory entry, release or a branch
"""
obj: Union[DirectoryEntryNode, BaseReleaseNode, SnapshotBranchNode]
def _get_node_data(self):
- content_id = self.obj.target_hash
- return self._get_content_by_id(content_id)
+ return self._get_content_by_hash(checksums={"sha1_git": self.obj.target_hash})
diff --git a/swh/graphql/resolvers/resolver_factory.py b/swh/graphql/resolvers/resolver_factory.py
--- a/swh/graphql/resolvers/resolver_factory.py
+++ b/swh/graphql/resolvers/resolver_factory.py
@@ -3,7 +3,7 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-from .content import ContentNode, TargetContentNode
+from .content import ContentNode, HashContentNode, TargetContentNode
from .directory import DirectoryNode, RevisionDirectoryNode, TargetDirectoryNode
from .directory_entry import DirectoryEntryConnection
from .origin import OriginConnection, OriginNode
@@ -49,6 +49,7 @@
"release-content": TargetContentNode,
"directory": DirectoryNode,
"content": ContentNode,
+ "content-by-hash": HashContentNode,
"dir-entry-dir": TargetDirectoryNode,
"dir-entry-file": TargetContentNode,
"search-result-snapshot": TargetSnapshotNode,
diff --git a/swh/graphql/resolvers/resolvers.py b/swh/graphql/resolvers/resolvers.py
--- a/swh/graphql/resolvers/resolvers.py
+++ b/swh/graphql/resolvers/resolvers.py
@@ -183,6 +183,14 @@
return resolver(obj, info, **kw)
+@query.field("contentByHash")
+def content_by_hash_resolver(
+ obj: None, info: GraphQLResolveInfo, **kw
+) -> rs.content.ContentNode:
+ resolver = get_node_resolver("content-by-hash")
+ return resolver(obj, info, **kw)
+
+
# Connection resolvers
# A connection resolver should return an instance of BaseConnection
diff --git a/swh/graphql/resolvers/scalars.py b/swh/graphql/resolvers/scalars.py
--- a/swh/graphql/resolvers/scalars.py
+++ b/swh/graphql/resolvers/scalars.py
@@ -8,12 +8,14 @@
from ariadne import ScalarType
from swh.graphql.utils import utils
+from swh.model import hashutil
from swh.model.model import TimestampWithTimezone
from swh.model.swhids import CoreSWHID
datetime_scalar = ScalarType("DateTime")
swhid_scalar = ScalarType("SWHID")
id_scalar = ScalarType("ID")
+content_hash_scalar = ScalarType("ContentHash")
@id_scalar.serializer
@@ -41,3 +43,18 @@
@swhid_scalar.serializer
def serialize_swhid(value):
return str(value)
+
+
+@content_hash_scalar.value_parser
+def validate_content_hash(value):
+ try:
+ hash_type, hash_string = value.split(":")
+ hash_value = hashutil.hash_to_bytes(hash_string)
+ except ValueError as e:
+ # FIXME, log this error
+ raise AttributeError("Invalid content checksum", e)
+ except Exception as e:
+ # FIXME, log this error
+ raise AttributeError("Invalid content checksum", e)
+ # FIXME, add validation for the hash_type
+ return hash_type, hash_value
diff --git a/swh/graphql/schema/schema.graphql b/swh/graphql/schema/schema.graphql
--- a/swh/graphql/schema/schema.graphql
+++ b/swh/graphql/schema/schema.graphql
@@ -8,6 +8,11 @@
"""
scalar DateTime
+"""
+Content identifier in the form hash-type:hash-value
+"""
+scalar ContentHash
+
"""
Object with an id
"""
@@ -755,24 +760,54 @@
}
"""
-An object with different checksums
+An object with different content checksums
"""
type ContentChecksum {
+ blake2s256: String
+ sha1: String
+ sha1_git: String
+ sha256: String
+}
+
+"""
+Object with different content data representations
+"""
+type ContentData {
"""
+ File data as a string
"""
- blake2s256: String
+ raw: BinaryString
"""
+ URL to download the file data
"""
- sha1: String
+ url: String
+}
+type ContentFileType {
"""
+ Detected content encoding
"""
- sha1_git: String
+ encoding: String
"""
+ Detected MIME type of the content
"""
- sha256: String
+ mimetype: String
+}
+
+type ContentLanguage {
+ """
+ Detected programming language if any
+ """
+ lang: String
+}
+
+type ContentLicense {
+ """
+ Array of strings containing the detected license names
+ """
+ licenses: [String]
}
"""
@@ -803,6 +838,26 @@
Content status, visible or hidden
"""
status: String
+
+ """
+ File content
+ """
+ data: ContentData
+
+ """
+ Information about the content MIME type
+ """
+ fileType: ContentFileType
+
+ """
+ Information about the programming language used in the content
+ """
+ language: ContentLanguage
+
+ """
+ Information about the license of the content
+ """
+ license: ContentLicense
}
"""
@@ -970,6 +1025,17 @@
swhid: SWHID!
): Content
+ """
+ Get the content by one or more hashes
+ Use multiple hashes for an accurate result
+ """
+ contentByHash(
+ """
+ List of hashType:hashValue strings
+ """
+ checksums: [ContentHash]!
+ ): Content
+
"""
Resolve the given SWHID to an object
"""
diff --git a/swh/graphql/tests/functional/test_content.py b/swh/graphql/tests/functional/test_content.py
new file mode 100644
--- /dev/null
+++ b/swh/graphql/tests/functional/test_content.py
@@ -0,0 +1,153 @@
+# Copyright (C) 2022 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import pytest
+
+from . import utils
+from ..data import get_contents
+
+
+@pytest.mark.parametrize("content", get_contents())
+def test_get_contnet_with_swhid(client, content):
+ query_str = """
+ {
+ content(swhid: "%s") {
+ swhid
+ checksum {
+ blake2s256
+ sha1
+ sha1_git
+ sha256
+ }
+ length
+ status
+ data {
+ raw {
+ text
+ }
+ url
+ }
+ fileType {
+ encoding
+ }
+ language {
+ lang
+ }
+ license {
+ licenses
+ }
+ }
+ }
+ """
+ data, _ = utils.get_query_response(client, query_str % content.swhid())
+ archive_url = "https://archive.softwareheritage.org/api/1/"
+ response = {
+ "swhid": str(content.swhid()),
+ "checksum": {
+ "blake2s256": content.blake2s256.hex(),
+ "sha1": content.sha1.hex(),
+ "sha1_git": content.sha1_git.hex(),
+ "sha256": content.sha256.hex(),
+ },
+ "length": content.length,
+ "status": content.status,
+ "data": {
+ "raw": {"text": content.data.decode()},
+ "url": f"{archive_url}content/sha1:{content.sha1.hex()}/raw/",
+ },
+ "fileType": None,
+ "language": None,
+ "license": None,
+ }
+ assert data["content"] == response
+
+
+@pytest.mark.parametrize("content", get_contents())
+def test_get_content_with_hash(client, content):
+ query_str = """
+ {
+ contentByHash(checksums: ["blake2s256:%s", "sha1:%s", "sha1_git:%s", "sha256:%s"]) {
+ swhid
+ }
+ }
+ """
+ data, _ = utils.get_query_response(
+ client,
+ query_str
+ % (
+ content.blake2s256.hex(),
+ content.sha1.hex(),
+ content.sha1_git.hex(),
+ content.sha256.hex(),
+ ),
+ )
+ assert data["contentByHash"] == {"swhid": str(content.swhid())}
+
+
+def test_get_content_with_invalid_swhid(client):
+ query_str = """
+ {
+ content(swhid: "swh:1:cnt:invalid") {
+ swhid
+ }
+ }
+ """
+ errors = utils.get_error_response(client, query_str)
+ # API will throw an error in case of an invalid SWHID
+ assert len(errors) == 1
+ assert "Invalid SWHID: invalid syntax" in errors[0]["message"]
+
+
+def test_get_content_with_invalid_hashes(client):
+ content = get_contents()[0]
+ query_str = """
+ {
+ contentByHash(checksums: ["blake2s256:%s", "sha1:%s", "sha1_git:%s", "sha256:%s"]) {
+ swhid
+ }
+ }
+ """
+ errors = utils.get_error_response(
+ client,
+ query_str
+ % (
+ "invalid", # Only one hash is invalid
+ content.sha1.hex(),
+ content.sha1_git.hex(),
+ content.sha256.hex(),
+ ),
+ )
+ # API will throw an error in case of an invalid content hash
+ assert len(errors) == 1
+ assert "Invalid content checksum" in errors[0]["message"]
+
+
+def test_get_content_as_target(client):
+ # SWHID of a test dir with a file entry
+ directory_swhid = "swh:1:dir:87b339104f7dc2a8163dec988445e3987995545f"
+ query_str = """
+ {
+ directory(swhid: "%s") {
+ swhid
+ entries(first: 2) {
+ nodes {
+ type
+ target {
+ ...on Content {
+ swhid
+ length
+ }
+ }
+ }
+ }
+ }
+ }
+ """
+ data, _ = utils.get_query_response(client, query_str % directory_swhid)
+ content_obj = data["directory"]["entries"]["nodes"][1]["target"]
+ assert content_obj == {
+ "length": 4,
+ "swhid": "swh:1:cnt:86bc6b377e9d25f9d26777a4a28d08e63e7c5779",
+ }
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Mar 17 2025, 6:53 PM (7 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3227368
Attached To
D8239: Add missing fields to the Content object
Event Timeline
Log In to Comment