diff --git a/swh/graphql/errors/__init__.py b/swh/graphql/errors/__init__.py --- a/swh/graphql/errors/__init__.py +++ b/swh/graphql/errors/__init__.py @@ -4,6 +4,7 @@ # See top-level LICENSE file for more information from .errors import ( + DataError, InvalidInputError, NullableObjectError, ObjectNotFoundError, @@ -16,5 +17,6 @@ "PaginationError", "InvalidInputError", "NullableObjectError", + "DataError", "format_error", ] diff --git a/swh/graphql/errors/errors.py b/swh/graphql/errors/errors.py --- a/swh/graphql/errors/errors.py +++ b/swh/graphql/errors/errors.py @@ -33,3 +33,7 @@ class NullableObjectError(Exception): pass + + +class DataError(Exception): + pass diff --git a/swh/graphql/resolvers/content.py b/swh/graphql/resolvers/content.py --- a/swh/graphql/resolvers/content.py +++ b/swh/graphql/resolvers/content.py @@ -3,11 +3,13 @@ # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from typing import Union +from typing import Dict, List, Optional, Union -from swh.graphql.errors import InvalidInputError +from swh.graphql.errors import DataError, InvalidInputError from swh.model import hashutil +from swh.model.model import Content +from .base_connection import BaseList from .base_node import BaseSWHNode from .directory_entry import BaseDirectoryEntryNode from .release import BaseReleaseNode @@ -15,16 +17,22 @@ from .snapshot_branch import BaseSnapshotBranchNode +def read_and_validate_content_hashes(hashes) -> Dict[str, bytes]: + try: + return { + hash_type: hashutil.hash_to_bytes(hash_value) + for (hash_type, hash_value) in hashes + } + except ValueError as e: + # raise an input error in case of an invalid hash + raise InvalidInputError("Invalid content hash", e) + + class BaseContentNode(BaseSWHNode): """ Base resolver for all the content nodes """ - def _get_content_by_hashes(self, hashes: dict): - content = self.archive.get_contents(hashes) - # in case of a conflict, return the first element - return content[0] if content else None - @property def hashes(self): # FIXME, use a Node instead @@ -65,33 +73,19 @@ return "Content" -class ContentNode(BaseContentNode): - """ - Node resolver for a content requested directly with its SWHID - """ - - def _get_node_data(self): - hashes = {"sha1_git": self.kwargs.get("swhid").object_id} - return self._get_content_by_hashes(hashes) - - -class HashContentNode(BaseContentNode): +class ContentbyHashesNode(BaseContentNode): """ - Node resolver for a content requested with one or more hashes + Node resolver for a content requested with all of its hashes + A single content object will be returned """ - def _get_node_data(self): - try: - hashes = { - hash_type: hashutil.hash_to_bytes(hash_value) - for (hash_type, hash_value) in self.kwargs.items() - } - except ValueError as e: - # raise an input error in case of an invalid hash - raise InvalidInputError("Invalid content hash", e) - if not hashes: - raise InvalidInputError("At least one of the four hashes must be provided") - return self._get_content_by_hashes(hashes) + def _get_node_data(self) -> Optional[Content]: + hashes = read_and_validate_content_hashes(self.kwargs.items()) + contents = self.archive.get_contents(hashes=hashes) + if len(contents) > 1: + # This situation is not expected to happen IRL + raise DataError("Content hash conflict for the set ", hashes) + return contents[0] if contents else None class TargetContentNode(BaseContentNode): @@ -107,5 +101,37 @@ BaseSnapshotBranchNode, ] - def _get_node_data(self): - return self._get_content_by_hashes(hashes={"sha1_git": self.obj.target_hash}) + def _get_node_data(self) -> Optional[Content]: + # FIXME, this is not considering hash collisions + # and could return a wrong object in very rare situations + contents = self.archive.get_contents(hashes={"sha1_git": self.obj.target_hash}) + # always returning the first content from the storage + return contents[0] if contents else None + + +class ContentSwhidList(BaseList): + """ + Return a non paginated list of contents for the given SWHID + This will return a single item in most of the cases + """ + + _node_class = BaseContentNode + + def _get_results(self) -> List[Content]: + hashes = {"sha1_git": self.kwargs.get("swhid").object_id} + return self.archive.get_contents(hashes=hashes) + + +class ContentHashList(BaseList): + """ + Return a non paginated list of contents for the given hashes + This will return a single item in most of the cases + """ + + _node_class = BaseContentNode + + def _get_results(self) -> List[Content]: + hashes = read_and_validate_content_hashes(self.kwargs.items()) + if not hashes: + raise InvalidInputError("At least one of the four hashes must be provided") + return self.archive.get_contents(hashes=hashes) diff --git a/swh/graphql/resolvers/resolver_factory.py b/swh/graphql/resolvers/resolver_factory.py --- a/swh/graphql/resolvers/resolver_factory.py +++ b/swh/graphql/resolvers/resolver_factory.py @@ -9,7 +9,12 @@ from .base_connection import BaseConnection, BaseList from .base_node import BaseNode -from .content import ContentNode, HashContentNode, TargetContentNode +from .content import ( + ContentbyHashesNode, + ContentHashList, + ContentSwhidList, + TargetContentNode, +) from .directory import DirectoryNode, RevisionDirectoryNode, TargetDirectoryNode from .directory_entry import DirectoryEntryConnection, DirectoryEntryNode from .origin import OriginConnection, OriginNode, TargetOriginNode @@ -56,8 +61,7 @@ "release-content": TargetContentNode, "directory": DirectoryNode, "directory-entry": DirectoryEntryNode, - "content": ContentNode, - "content-by-hash": HashContentNode, + "content-by-hashes": ContentbyHashesNode, "dir-entry-content": TargetContentNode, "dir-entry-directory": TargetDirectoryNode, "dir-entry-revision": TargetRevisionNode, @@ -109,6 +113,8 @@ "revision-author": RevisionAuthorList, "revision-committer": RevisionCommitterList, "release-author": ReleaseAuthorList, + "contents-swhid": ContentSwhidList, + "contents-hashes": ContentHashList, } @classmethod diff --git a/swh/graphql/resolvers/resolvers.py b/swh/graphql/resolvers/resolvers.py --- a/swh/graphql/resolvers/resolvers.py +++ b/swh/graphql/resolvers/resolvers.py @@ -177,13 +177,6 @@ return NodeObjectFactory.create(f"dir-entry-{obj.targetType}", obj, info, **kw) -@query.field("content") -def content_resolver( - obj: None, info: GraphQLResolveInfo, **kw -) -> rs.content.ContentNode: - return NodeObjectFactory.create("content", obj, info, **kw) - - @search_result.field("target") def search_result_target_resolver( obj: rs.search.SearchResultNode, info: GraphQLResolveInfo, **kw @@ -203,10 +196,10 @@ @query.field("contentByHashes") -def content_by_hash_resolver( +def content_by_hashes_resolver( obj: None, info: GraphQLResolveInfo, **kw -) -> rs.content.ContentNode: - return NodeObjectFactory.create("content-by-hash", obj, info, **kw) +) -> rs.content.ContentbyHashesNode: + return NodeObjectFactory.create("content-by-hashes", obj, info, **kw) # Connection resolvers @@ -279,6 +272,20 @@ # Simple list resolvers (lists without paginations) +@query.field("contentsBySWHID") +def contnets_by_swhid_resolver( + obj: None, info: GraphQLResolveInfo, **kw +) -> rs.content.ContentSwhidList: + return SimpleListFactory.create("contents-swhid", obj, info, **kw) + + +@query.field("contentsByHashes") +def contnets_by_hashes_resolver( + obj: None, info: GraphQLResolveInfo, **kw +) -> rs.content.ContentHashList: + return SimpleListFactory.create("contents-hashes", obj, info, **kw) + + @query.field("resolveSWHID") def search_swhid_resolver( obj: None, info: GraphQLResolveInfo, **kw diff --git a/swh/graphql/schema/schema.graphql b/swh/graphql/schema/schema.graphql --- a/swh/graphql/schema/schema.graphql +++ b/swh/graphql/schema/schema.graphql @@ -1105,24 +1105,20 @@ ): DirectoryEntry """ - Get the content with a SWHID + Get a list of contents for the given SWHID """ - content( + contentsBySWHID ( """ - SWHID of the content object + SWHID to look for """ swhid: SWHID! - ): Content + ): [Content] """ - Get a content that match all the given hashes. - This entrypoint can be used to uniquely identify a content - in the event of hash conflicts. Use multiple hashes to - get an accurate result. - - At least one of the four hashes must be provided. + Get contents with hashes + At least one of the four hashes must be provided """ - contentByHashes( + contentsByHashes( sha1: String sha256: String @@ -1130,6 +1126,22 @@ sha1_git: String blake2s256: String + ): [Content] + + """ + Get a content that match all the given hashes. + All the four hashes must be provided + This entrypoint can be used to uniquely identify + a content in the event of hash collisions + """ + contentByHashes( + sha1: String! + + sha256: String! + + sha1_git: String! + + blake2s256: String! ): Content """ diff --git a/swh/graphql/tests/functional/test_content.py b/swh/graphql/tests/functional/test_content.py --- a/swh/graphql/tests/functional/test_content.py +++ b/swh/graphql/tests/functional/test_content.py @@ -10,10 +10,12 @@ @pytest.mark.parametrize("content", get_contents()) -def test_get_content_with_swhid(client, content): +def test_get_content_with_hashes(client, content): query_str = """ - query getContent($swhid: SWHID!) { - content(swhid: $swhid) { + query getContentByHashes($sha1: String!, $sha256: String!, + $sha1_git: String!, $blake2s256: String!) { + contentByHashes(sha1: $sha1, sha256: $sha256, sha1_git: $sha1_git, + blake2s256: $blake2s256) { swhid id hashes { @@ -39,7 +41,14 @@ } } """ - data, _ = utils.get_query_response(client, query_str, swhid=str(content.swhid())) + data, _ = utils.get_query_response( + client, + query_str, + blake2s256=content.blake2s256.hex(), + sha1=content.sha1.hex(), + sha1_git=content.sha1_git.hex(), + sha256=content.sha256.hex(), + ) archive_url = "https://archive.softwareheritage.org/api/1/" response = { "swhid": str(content.swhid()), @@ -59,13 +68,26 @@ "language": None, "license": None, } - assert data["content"] == response + assert data["contentByHashes"] == response -def test_get_content_with_invalid_swhid(client): +@pytest.mark.parametrize("content", get_contents()) +def test_get_contents_with_swhid(client, content): query_str = """ - query getContent($swhid: SWHID!) { - content(swhid: $swhid) { + query getContents($swhid: SWHID!) { + contentsBySWHID(swhid: $swhid) { + swhid + } + } + """ + data, _ = utils.get_query_response(client, query_str, swhid=str(content.swhid())) + assert data["contentsBySWHID"] == [{"swhid": str(content.swhid())}] + + +def test_get_contents_with_invalid_swhid(client): + query_str = """ + query getContents($swhid: SWHID!) { + contentsBySWHID(swhid: $swhid) { swhid } } @@ -76,11 +98,27 @@ assert "Input error: Invalid SWHID" in errors[0]["message"] +def test_get_contents_with_missing_swhid(client): + missing_sha1 = "1" * 40 + query_str = """ + query getContents($swhid: SWHID!) { + contentsBySWHID(swhid: $swhid) { + swhid + } + } + """ + data, _ = utils.get_query_response( + client, query_str, swhid=f"swh:1:cnt:{missing_sha1}" + ) + assert data["contentsBySWHID"] == [] + + @pytest.mark.parametrize("content", get_contents()) -def test_get_content_with_hash(client, content): +def test_get_contents_with_hashes(client, content): query_str = """ - query getContent($sha1: String, $sha1_git: String, $sha256: String, $blake2s256: String) { - contentByHashes(sha1: $sha1, sha1_git: $sha1_git, sha256: $sha256, + query getContents($sha1: String, $sha1_git: String, $sha256: String, + $blake2s256: String) { + contentsByHashes(sha1: $sha1, sha1_git: $sha1_git, sha256: $sha256, blake2s256: $blake2s256) { swhid } @@ -94,14 +132,15 @@ sha256=content.sha256.hex(), blake2s256=content.blake2s256.hex(), ) - assert data["contentByHashes"] == {"swhid": str(content.swhid())} + assert data["contentsByHashes"] == [{"swhid": str(content.swhid())}] @pytest.mark.parametrize("content", get_contents()) def test_get_content_with_single_hash(client, content): query_str = """ - query getContent($sha1: String, $sha1_git: String, $sha256: String, $blake2s256: String) { - contentByHashes(sha1: $sha1, sha1_git: $sha1_git, sha256: $sha256, + query getContents($sha1: String, $sha1_git: String, $sha256: String, + $blake2s256: String) { + contentsByHashes(sha1: $sha1, sha1_git: $sha1_git, sha256: $sha256, blake2s256: $blake2s256) { swhid } @@ -112,34 +151,43 @@ query_str, sha1=content.sha1.hex(), ) - assert data["contentByHashes"] == {"swhid": str(content.swhid())} + assert data["contentsByHashes"] == [{"swhid": str(content.swhid())}] + + data, _ = utils.get_query_response( + client, + query_str, + blake2s256=content.blake2s256.hex(), + ) + assert data["contentsByHashes"] == [{"swhid": str(content.swhid())}] @pytest.mark.parametrize("content", get_contents()) -def test_get_content_with_one_non_matching_hash(client, content): +def test_get_contents_with_one_non_matching_hash(client, content): query_str = """ - query getContent($sha1: String, $sha1_git: String, $sha256: String, $blake2s256: String) { - contentByHashes(sha1: $sha1, sha1_git: $sha1_git, sha256: $sha256, + query getContents($sha1: String, $sha1_git: String, $sha256: String, $blake2s256: String) { + contentsByHashes(sha1: $sha1, sha1_git: $sha1_git, sha256: $sha256, blake2s256: $blake2s256) { swhid } } """ - utils.assert_missing_object( + data, _ = utils.get_query_response( client, query_str, - obj_type="contentByHashes", + obj_type="contentsByHashes", sha1=content.sha1.hex(), sha1_git="a" * 20, # hash is valid, but not matching the object ) + assert data["contentsByHashes"] == [] def test_get_content_with_invalid_hashes(client): content = get_contents()[0] query_str = """ - query getContent($sha1: String, $sha1_git: String, $sha256: String, $blake2s256: String) { - contentByHashes(sha1: $sha1, sha1_git: $sha1_git, sha256: $sha256, - blake2s256: $blake2s256) { + query getContents($sha1: String, $sha1_git: String, $sha256: String, + $blake2s256: String) { + contentsByHashes(sha1: $sha1, sha1_git: $sha1_git, sha256: $sha256, + blake2s256: $blake2s256) { swhid } } @@ -158,8 +206,8 @@ def test_get_content_with_no_hashes(client): query_str = """ - query getContent($sha1: String, $sha1_git: String, $sha256: String, $blake2s256: String) { - contentByHashes(sha1: $sha1, sha1_git: $sha1_git, sha256: $sha256, + query getContents($sha1: String, $sha1_git: String, $sha256: String, $blake2s256: String) { + contentsByHashes(sha1: $sha1, sha1_git: $sha1_git, sha256: $sha256, blake2s256: $blake2s256) { swhid } @@ -203,20 +251,3 @@ "length": 4, "swhid": "swh:1:cnt:86bc6b377e9d25f9d26777a4a28d08e63e7c5779", } - - -def test_get_content_with_unknown_swhid(client): - unknown_sha1 = "1" * 40 - query_str = """ - query getDirectory($swhid: SWHID!) { - content(swhid: $swhid) { - swhid - } - } - """ - utils.assert_missing_object( - client, - query_str, - obj_type="content", - swhid=f"swh:1:cnt:{unknown_sha1}", - ) diff --git a/swh/graphql/tests/unit/resolvers/test_resolver_factory.py b/swh/graphql/tests/unit/resolvers/test_resolver_factory.py --- a/swh/graphql/tests/unit/resolvers/test_resolver_factory.py +++ b/swh/graphql/tests/unit/resolvers/test_resolver_factory.py @@ -17,6 +17,6 @@ with pytest.raises(AttributeError): resolver_factory.ConnectionObjectFactory().create("invalid", None, None) - def test_get_list_resolver_invalid_type(self): + def test_get_base_list_resolver_invalid_type(self): with pytest.raises(AttributeError): resolver_factory.SimpleListFactory().create("invalid", None, None) diff --git a/swh/graphql/tests/unit/resolvers/test_resolvers.py b/swh/graphql/tests/unit/resolvers/test_resolvers.py --- a/swh/graphql/tests/unit/resolvers/test_resolvers.py +++ b/swh/graphql/tests/unit/resolvers/test_resolvers.py @@ -31,7 +31,7 @@ (rs.revision_directory_resolver, resolvers.directory.RevisionDirectoryNode), (rs.release_resolver, resolvers.release.ReleaseNode), (rs.directory_resolver, resolvers.directory.DirectoryNode), - (rs.content_resolver, resolvers.content.ContentNode), + (rs.content_by_hashes_resolver, resolvers.content.ContentbyHashesNode), ], ) def test_node_resolver(self, mocker, dummy_node, resolver_func, node_cls):