diff --git a/swh/graphql/app.py b/swh/graphql/app.py --- a/swh/graphql/app.py +++ b/swh/graphql/app.py @@ -39,4 +39,5 @@ scalars.id_scalar, scalars.datetime_scalar, scalars.swhid_scalar, + scalars.content_hash_scalar, ) diff --git a/swh/graphql/backends/archive.py b/swh/graphql/backends/archive.py --- a/swh/graphql/backends/archive.py +++ b/swh/graphql/backends/archive.py @@ -69,10 +69,6 @@ directory_id, limit=first, page_token=after ) - def get_content(self, content_id): - # FIXME, only for tests - return self.storage.content_find({"sha1_git": content_id}) - def is_object_available(self, object_id: str, object_type: ObjectType) -> bool: mapping = { ObjectType.CONTENT: self.storage.content_missing_per_sha1_git, @@ -81,4 +77,10 @@ ObjectType.REVISION: self.storage.revision_missing, ObjectType.SNAPSHOT: self.storage.snapshot_missing, } - return not mapping[object_type]([object_id]) + return not list(mapping[object_type]([object_id])) + + def get_contents(self, checksums: dict): + return self.storage.content_find(checksums) + + def get_content_data(self, content_sha1): + return self.storage.content_get_data(content_sha1) diff --git a/swh/graphql/resolvers/content.py b/swh/graphql/resolvers/content.py --- a/swh/graphql/resolvers/content.py +++ b/swh/graphql/resolvers/content.py @@ -18,19 +18,39 @@ Base resolver for all the content nodes """ - def _get_content_by_id(self, content_id): - content = archive.Archive().get_content(content_id) + def _get_content_by_hash(self, checksums: dict): + content = archive.Archive().get_contents(checksums) + # in case of a conflict, return the first element return content[0] if content else None @property def checksum(self): - # FIXME, return a Node object + # FIXME, use a Node instead return {k: v.hex() for (k, v) in self._node.hashes().items()} @property def id(self): return self._node.sha1_git + @property + def data(self): + # FIXME, return a Node object + # FIXME, add more ways to retrieve data (eg: a static URL) + content_sha1 = self._node.hashes()["sha1"] + return {"raw": archive.Archive().get_content_data(content_sha1)} + + def ConetentFileType(self): + # FIXME, fetch data from the indexers + return None + + def ConetentLanguage(self): + # FIXME, fetch data from the indexers + return None + + def ConetentLicense(self): + # FIXME, fetch data from the indexers + return None + def is_type_of(self): # is_type_of is required only when resolving a UNION type # This is for ariadne to return the right type @@ -43,17 +63,27 @@ """ def _get_node_data(self): - return self._get_content_by_id(self.kwargs.get("swhid").object_id) + checksums = {"sha1_git": self.kwargs.get("swhid").object_id} + return self._get_content_by_hash(checksums) + + +class HashContentNode(BaseContentNode): + """ + Node resolver for a content requested with one or more checksums + """ + + def _get_node_data(self): + checksums = dict(self.kwargs.get("checksums")) + return self._get_content_by_hash(checksums) class TargetContentNode(BaseContentNode): """ - Node resolver for a content requested from a - directory entry or from a release target + Node resolver for a content requested as a target + This request could be from directory entry, release or a branch """ obj: Union[DirectoryEntryNode, BaseReleaseNode, SnapshotBranchNode] def _get_node_data(self): - content_id = self.obj.target_hash - return self._get_content_by_id(content_id) + return self._get_content_by_hash(hash_value=self.obj.target_hash) diff --git a/swh/graphql/resolvers/resolver_factory.py b/swh/graphql/resolvers/resolver_factory.py --- a/swh/graphql/resolvers/resolver_factory.py +++ b/swh/graphql/resolvers/resolver_factory.py @@ -3,7 +3,7 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from .content import ContentNode, TargetContentNode +from .content import ContentNode, TargetContentNode, HashContentNode from .directory import DirectoryNode, RevisionDirectoryNode, TargetDirectoryNode from .directory_entry import DirectoryEntryConnection from .origin import OriginConnection, OriginNode @@ -49,6 +49,7 @@ "release-content": TargetContentNode, "directory": DirectoryNode, "content": ContentNode, + "content-by-hash": HashContentNode, "dir-entry-dir": TargetDirectoryNode, "dir-entry-file": TargetContentNode, "search-result-snapshot": TargetSnapshotNode, diff --git a/swh/graphql/resolvers/resolvers.py b/swh/graphql/resolvers/resolvers.py --- a/swh/graphql/resolvers/resolvers.py +++ b/swh/graphql/resolvers/resolvers.py @@ -183,6 +183,14 @@ return resolver(obj, info, **kw) +@query.field("contentByHash") +def content_by_hash_resolver( + obj: None, info: GraphQLResolveInfo, **kw +) -> rs.content.ContentNode: + resolver = get_node_resolver("content-by-hash") + return resolver(obj, info, **kw) + + # Connection resolvers # A connection resolver should return an instance of BaseConnection diff --git a/swh/graphql/resolvers/scalars.py b/swh/graphql/resolvers/scalars.py --- a/swh/graphql/resolvers/scalars.py +++ b/swh/graphql/resolvers/scalars.py @@ -4,6 +4,7 @@ # See top-level LICENSE file for more information from datetime import datetime +from swh.model import hashutil from ariadne import ScalarType @@ -14,7 +15,7 @@ datetime_scalar = ScalarType("DateTime") swhid_scalar = ScalarType("SWHID") id_scalar = ScalarType("ID") - +content_hash_scalar = ScalarType("ContentHash") @id_scalar.serializer def serialize_id(value): @@ -41,3 +42,18 @@ @swhid_scalar.serializer def serialize_swhid(value): return str(value) + + +@content_hash_scalar.value_parser +def validate_content_hash(value): + try: + hash_type, hash_string = value.split(":") + hash_value = hashutil.hash_to_bytes(hash_string) + except ValueError as e: + # FIXME, log this error + raise AttributeError("Invalid content checksum", e) + except Exception as e: + # FIXME, log this error + raise AttributeError("Invalid content checksum", e) + # FIXME, add validation for the hash_type + return hash_type, hash_value diff --git a/swh/graphql/schema/schema.graphql b/swh/graphql/schema/schema.graphql --- a/swh/graphql/schema/schema.graphql +++ b/swh/graphql/schema/schema.graphql @@ -8,6 +8,11 @@ """ scalar DateTime +""" +Content identifier in the form hash-type:hash-value +""" +scalar ContentHash + """ Object with an id """ @@ -755,24 +760,49 @@ } """ -An object with different checksums +An object with different content checksums """ type ContentChecksum { + blake2s256: String + sha1: String + sha1_git: String + sha256: String +} + +""" +Object with different content data representations +""" +type ContentData { """ + Content as a base64 string """ - blake2s256: String + raw: BinaryString +} +type ContentFileType { """ + Detected content encoding """ - sha1: String + encoding: String """ + Detected MIME type of the content """ - sha1_git: String + mimetype: String +} +type ContentLanguage { """ + Detected programming language if any """ - sha256: String + lang: String +} + +type ContentLicense { + """ + Array of strings containing the detected license names + """ + licenses: [String] } """ @@ -803,6 +833,26 @@ Content status, visible or hidden """ status: String + + """ + File content + """ + data: ContentData + + """ + Information about the content MIME type + """ + fileType: ContentFileType + + """ + Information about the programming language used in the content + """ + language: ContentLanguage + + """ + Information about the license of the content + """ + license: ContentLicense } """ @@ -970,6 +1020,17 @@ swhid: SWHID! ): Content + """ + Get the content by one or more hashes + Use multiple hashes for an accurate result + """ + contentByHash( + """ + List of hashType:hashValue strings + """ + checksums: [ContentHash]! + ): Content + """ Resolve the given SWHID to an object """ diff --git a/swh/graphql/tests/functional/test_content.py b/swh/graphql/tests/functional/test_content.py new file mode 100644 --- /dev/null +++ b/swh/graphql/tests/functional/test_content.py @@ -0,0 +1,42 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import pytest + +from ..data import get_contents +from .utils import get_query_response + + +def test_content_response_data(): + pass + + +@pytest.mark.parametrize("content", get_contents()) +def test_get_contnet_with_swhid(client, content): + query_str = """ + { + content(swhid: "%s") { + swhid + } + } + """ + data, _ = get_query_response(client, query_str % content.swhid()) + assert data["content"] == {"swhid": str(content.swhid())} + + +def test_get_content_with_hash(client): + pass + + +def test_get_content_with_invalid_swhid(client): + pass + + +def test_get_content_with_invalid_hashes(client): + pass + + +def test_get_content_as_target(client): + pass diff --git a/swh/graphql/tests/functional/test_swhid_resolve.py b/swh/graphql/tests/functional/test_swhid_resolve.py --- a/swh/graphql/tests/functional/test_swhid_resolve.py +++ b/swh/graphql/tests/functional/test_swhid_resolve.py @@ -68,6 +68,7 @@ } """ data, _ = utils.get_query_response(client, query_str % snapshot.swhid()) + assert data == { "resolveSwhid": { "nodes": [