Page MenuHomeSoftware Heritage

D8978.diff
No OneTemporary

D8978.diff

diff --git a/swh/graphql/errors/__init__.py b/swh/graphql/errors/__init__.py
--- a/swh/graphql/errors/__init__.py
+++ b/swh/graphql/errors/__init__.py
@@ -4,6 +4,7 @@
# See top-level LICENSE file for more information
from .errors import (
+ DataError,
InvalidInputError,
NullableObjectError,
ObjectNotFoundError,
@@ -16,5 +17,6 @@
"PaginationError",
"InvalidInputError",
"NullableObjectError",
+ "DataError",
"format_error",
]
diff --git a/swh/graphql/errors/errors.py b/swh/graphql/errors/errors.py
--- a/swh/graphql/errors/errors.py
+++ b/swh/graphql/errors/errors.py
@@ -33,3 +33,7 @@
class NullableObjectError(Exception):
pass
+
+
+class DataError(Exception):
+ pass
diff --git a/swh/graphql/resolvers/content.py b/swh/graphql/resolvers/content.py
--- a/swh/graphql/resolvers/content.py
+++ b/swh/graphql/resolvers/content.py
@@ -3,11 +3,13 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-from typing import Union
+from typing import Dict, List, Optional, Union
-from swh.graphql.errors import InvalidInputError
+from swh.graphql.errors import DataError, InvalidInputError
from swh.model import hashutil
+from swh.model.model import Content
+from .base_connection import BaseList
from .base_node import BaseSWHNode
from .directory_entry import BaseDirectoryEntryNode
from .release import BaseReleaseNode
@@ -15,16 +17,22 @@
from .snapshot_branch import BaseSnapshotBranchNode
+def read_and_validate_content_hashes(hashes) -> Dict[str, bytes]:
+ try:
+ return {
+ hash_type: hashutil.hash_to_bytes(hash_value)
+ for (hash_type, hash_value) in hashes
+ }
+ except ValueError as e:
+ # raise an input error in case of an invalid hash
+ raise InvalidInputError("Invalid content hash", e)
+
+
class BaseContentNode(BaseSWHNode):
"""
Base resolver for all the content nodes
"""
- def _get_content_by_hashes(self, hashes: dict):
- content = self.archive.get_contents(hashes)
- # in case of a conflict, return the first element
- return content[0] if content else None
-
@property
def hashes(self):
# FIXME, use a Node instead
@@ -65,33 +73,19 @@
return "Content"
-class ContentNode(BaseContentNode):
- """
- Node resolver for a content requested directly with its SWHID
- """
-
- def _get_node_data(self):
- hashes = {"sha1_git": self.kwargs.get("swhid").object_id}
- return self._get_content_by_hashes(hashes)
-
-
-class HashContentNode(BaseContentNode):
+class ContentbyHashesNode(BaseContentNode):
"""
- Node resolver for a content requested with one or more hashes
+ Node resolver for a content requested with all of its hashes
+ A single content object will be returned
"""
- def _get_node_data(self):
- try:
- hashes = {
- hash_type: hashutil.hash_to_bytes(hash_value)
- for (hash_type, hash_value) in self.kwargs.items()
- }
- except ValueError as e:
- # raise an input error in case of an invalid hash
- raise InvalidInputError("Invalid content hash", e)
- if not hashes:
- raise InvalidInputError("At least one of the four hashes must be provided")
- return self._get_content_by_hashes(hashes)
+ def _get_node_data(self) -> Optional[Content]:
+ hashes = read_and_validate_content_hashes(self.kwargs.items())
+ contents = self.archive.get_contents(hashes=hashes)
+ if len(contents) > 1:
+ # This situation is not expected to happen IRL
+ raise DataError("Content hash conflict for the set ", hashes)
+ return contents[0] if contents else None
class TargetContentNode(BaseContentNode):
@@ -107,5 +101,37 @@
BaseSnapshotBranchNode,
]
- def _get_node_data(self):
- return self._get_content_by_hashes(hashes={"sha1_git": self.obj.target_hash})
+ def _get_node_data(self) -> Optional[Content]:
+ # FIXME, this is not considering hash collisions
+ # and could return a wrong object in very rare situations
+ contents = self.archive.get_contents(hashes={"sha1_git": self.obj.target_hash})
+ # always returning the first content from the storage
+ return contents[0] if contents else None
+
+
+class ContentSwhidList(BaseList):
+ """
+ Return a non paginated list of contents for the given SWHID
+ This will return a single item in most of the cases
+ """
+
+ _node_class = BaseContentNode
+
+ def _get_results(self) -> List[Content]:
+ hashes = {"sha1_git": self.kwargs.get("swhid").object_id}
+ return self.archive.get_contents(hashes=hashes)
+
+
+class ContentHashList(BaseList):
+ """
+ Return a non paginated list of contents for the given hashes
+ This will return a single item in most of the cases
+ """
+
+ _node_class = BaseContentNode
+
+ def _get_results(self) -> List[Content]:
+ hashes = read_and_validate_content_hashes(self.kwargs.items())
+ if not hashes:
+ raise InvalidInputError("At least one of the four hashes must be provided")
+ return self.archive.get_contents(hashes=hashes)
diff --git a/swh/graphql/resolvers/resolver_factory.py b/swh/graphql/resolvers/resolver_factory.py
--- a/swh/graphql/resolvers/resolver_factory.py
+++ b/swh/graphql/resolvers/resolver_factory.py
@@ -9,7 +9,12 @@
from .base_connection import BaseConnection, BaseList
from .base_node import BaseNode
-from .content import ContentNode, HashContentNode, TargetContentNode
+from .content import (
+ ContentbyHashesNode,
+ ContentHashList,
+ ContentSwhidList,
+ TargetContentNode,
+)
from .directory import DirectoryNode, RevisionDirectoryNode, TargetDirectoryNode
from .directory_entry import DirectoryEntryConnection, DirectoryEntryNode
from .origin import OriginConnection, OriginNode, TargetOriginNode
@@ -56,8 +61,7 @@
"release-content": TargetContentNode,
"directory": DirectoryNode,
"directory-entry": DirectoryEntryNode,
- "content": ContentNode,
- "content-by-hash": HashContentNode,
+ "content-by-hashes": ContentbyHashesNode,
"dir-entry-content": TargetContentNode,
"dir-entry-directory": TargetDirectoryNode,
"dir-entry-revision": TargetRevisionNode,
@@ -109,6 +113,8 @@
"revision-author": RevisionAuthorList,
"revision-committer": RevisionCommitterList,
"release-author": ReleaseAuthorList,
+ "contents-swhid": ContentSwhidList,
+ "contents-hashes": ContentHashList,
}
@classmethod
diff --git a/swh/graphql/resolvers/resolvers.py b/swh/graphql/resolvers/resolvers.py
--- a/swh/graphql/resolvers/resolvers.py
+++ b/swh/graphql/resolvers/resolvers.py
@@ -177,13 +177,6 @@
return NodeObjectFactory.create(f"dir-entry-{obj.targetType}", obj, info, **kw)
-@query.field("content")
-def content_resolver(
- obj: None, info: GraphQLResolveInfo, **kw
-) -> rs.content.ContentNode:
- return NodeObjectFactory.create("content", obj, info, **kw)
-
-
@search_result.field("target")
def search_result_target_resolver(
obj: rs.search.SearchResultNode, info: GraphQLResolveInfo, **kw
@@ -203,10 +196,10 @@
@query.field("contentByHashes")
-def content_by_hash_resolver(
+def content_by_hashes_resolver(
obj: None, info: GraphQLResolveInfo, **kw
-) -> rs.content.ContentNode:
- return NodeObjectFactory.create("content-by-hash", obj, info, **kw)
+) -> rs.content.ContentbyHashesNode:
+ return NodeObjectFactory.create("content-by-hashes", obj, info, **kw)
# Connection resolvers
@@ -279,6 +272,20 @@
# Simple list resolvers (lists without paginations)
+@query.field("contentsBySWHID")
+def contnets_by_swhid_resolver(
+ obj: None, info: GraphQLResolveInfo, **kw
+) -> rs.content.ContentSwhidList:
+ return SimpleListFactory.create("contents-swhid", obj, info, **kw)
+
+
+@query.field("contentsByHashes")
+def contnets_by_hashes_resolver(
+ obj: None, info: GraphQLResolveInfo, **kw
+) -> rs.content.ContentHashList:
+ return SimpleListFactory.create("contents-hashes", obj, info, **kw)
+
+
@query.field("resolveSWHID")
def search_swhid_resolver(
obj: None, info: GraphQLResolveInfo, **kw
diff --git a/swh/graphql/schema/schema.graphql b/swh/graphql/schema/schema.graphql
--- a/swh/graphql/schema/schema.graphql
+++ b/swh/graphql/schema/schema.graphql
@@ -1105,24 +1105,20 @@
): DirectoryEntry
"""
- Get the content with a SWHID
+ Get a list of contents for the given SWHID
"""
- content(
+ contentsBySWHID (
"""
- SWHID of the content object
+ SWHID to look for
"""
swhid: SWHID!
- ): Content
+ ): [Content]
"""
- Get a content that match all the given hashes.
- This entrypoint can be used to uniquely identify a content
- in the event of hash conflicts. Use multiple hashes to
- get an accurate result.
-
- At least one of the four hashes must be provided.
+ Get contents with hashes
+ At least one of the four hashes must be provided
"""
- contentByHashes(
+ contentsByHashes(
sha1: String
sha256: String
@@ -1130,6 +1126,22 @@
sha1_git: String
blake2s256: String
+ ): [Content]
+
+ """
+ Get a content that match all the given hashes.
+ All the four hashes must be provided
+ This entrypoint can be used to uniquely identify
+ a content in the event of hash collisions
+ """
+ contentByHashes(
+ sha1: String!
+
+ sha256: String!
+
+ sha1_git: String!
+
+ blake2s256: String!
): Content
"""
diff --git a/swh/graphql/tests/functional/test_content.py b/swh/graphql/tests/functional/test_content.py
--- a/swh/graphql/tests/functional/test_content.py
+++ b/swh/graphql/tests/functional/test_content.py
@@ -10,10 +10,12 @@
@pytest.mark.parametrize("content", get_contents())
-def test_get_content_with_swhid(client, content):
+def test_get_content_with_hashes(client, content):
query_str = """
- query getContent($swhid: SWHID!) {
- content(swhid: $swhid) {
+ query getContentByHashes($sha1: String!, $sha256: String!,
+ $sha1_git: String!, $blake2s256: String!) {
+ contentByHashes(sha1: $sha1, sha256: $sha256, sha1_git: $sha1_git,
+ blake2s256: $blake2s256) {
swhid
id
hashes {
@@ -39,7 +41,14 @@
}
}
"""
- data, _ = utils.get_query_response(client, query_str, swhid=str(content.swhid()))
+ data, _ = utils.get_query_response(
+ client,
+ query_str,
+ blake2s256=content.blake2s256.hex(),
+ sha1=content.sha1.hex(),
+ sha1_git=content.sha1_git.hex(),
+ sha256=content.sha256.hex(),
+ )
archive_url = "https://archive.softwareheritage.org/api/1/"
response = {
"swhid": str(content.swhid()),
@@ -59,13 +68,26 @@
"language": None,
"license": None,
}
- assert data["content"] == response
+ assert data["contentByHashes"] == response
-def test_get_content_with_invalid_swhid(client):
+@pytest.mark.parametrize("content", get_contents())
+def test_get_contents_with_swhid(client, content):
query_str = """
- query getContent($swhid: SWHID!) {
- content(swhid: $swhid) {
+ query getContents($swhid: SWHID!) {
+ contentsBySWHID(swhid: $swhid) {
+ swhid
+ }
+ }
+ """
+ data, _ = utils.get_query_response(client, query_str, swhid=str(content.swhid()))
+ assert data["contentsBySWHID"] == [{"swhid": str(content.swhid())}]
+
+
+def test_get_contents_with_invalid_swhid(client):
+ query_str = """
+ query getContents($swhid: SWHID!) {
+ contentsBySWHID(swhid: $swhid) {
swhid
}
}
@@ -76,11 +98,27 @@
assert "Input error: Invalid SWHID" in errors[0]["message"]
+def test_get_contents_with_missing_swhid(client):
+ missing_sha1 = "1" * 40
+ query_str = """
+ query getContents($swhid: SWHID!) {
+ contentsBySWHID(swhid: $swhid) {
+ swhid
+ }
+ }
+ """
+ data, _ = utils.get_query_response(
+ client, query_str, swhid=f"swh:1:cnt:{missing_sha1}"
+ )
+ assert data["contentsBySWHID"] == []
+
+
@pytest.mark.parametrize("content", get_contents())
-def test_get_content_with_hash(client, content):
+def test_get_contents_with_hashes(client, content):
query_str = """
- query getContent($sha1: String, $sha1_git: String, $sha256: String, $blake2s256: String) {
- contentByHashes(sha1: $sha1, sha1_git: $sha1_git, sha256: $sha256,
+ query getContents($sha1: String, $sha1_git: String, $sha256: String,
+ $blake2s256: String) {
+ contentsByHashes(sha1: $sha1, sha1_git: $sha1_git, sha256: $sha256,
blake2s256: $blake2s256) {
swhid
}
@@ -94,14 +132,15 @@
sha256=content.sha256.hex(),
blake2s256=content.blake2s256.hex(),
)
- assert data["contentByHashes"] == {"swhid": str(content.swhid())}
+ assert data["contentsByHashes"] == [{"swhid": str(content.swhid())}]
@pytest.mark.parametrize("content", get_contents())
def test_get_content_with_single_hash(client, content):
query_str = """
- query getContent($sha1: String, $sha1_git: String, $sha256: String, $blake2s256: String) {
- contentByHashes(sha1: $sha1, sha1_git: $sha1_git, sha256: $sha256,
+ query getContents($sha1: String, $sha1_git: String, $sha256: String,
+ $blake2s256: String) {
+ contentsByHashes(sha1: $sha1, sha1_git: $sha1_git, sha256: $sha256,
blake2s256: $blake2s256) {
swhid
}
@@ -112,34 +151,43 @@
query_str,
sha1=content.sha1.hex(),
)
- assert data["contentByHashes"] == {"swhid": str(content.swhid())}
+ assert data["contentsByHashes"] == [{"swhid": str(content.swhid())}]
+
+ data, _ = utils.get_query_response(
+ client,
+ query_str,
+ blake2s256=content.blake2s256.hex(),
+ )
+ assert data["contentsByHashes"] == [{"swhid": str(content.swhid())}]
@pytest.mark.parametrize("content", get_contents())
-def test_get_content_with_one_non_matching_hash(client, content):
+def test_get_contents_with_one_non_matching_hash(client, content):
query_str = """
- query getContent($sha1: String, $sha1_git: String, $sha256: String, $blake2s256: String) {
- contentByHashes(sha1: $sha1, sha1_git: $sha1_git, sha256: $sha256,
+ query getContents($sha1: String, $sha1_git: String, $sha256: String, $blake2s256: String) {
+ contentsByHashes(sha1: $sha1, sha1_git: $sha1_git, sha256: $sha256,
blake2s256: $blake2s256) {
swhid
}
}
"""
- utils.assert_missing_object(
+ data, _ = utils.get_query_response(
client,
query_str,
- obj_type="contentByHashes",
+ obj_type="contentsByHashes",
sha1=content.sha1.hex(),
sha1_git="a" * 20, # hash is valid, but not matching the object
)
+ assert data["contentsByHashes"] == []
def test_get_content_with_invalid_hashes(client):
content = get_contents()[0]
query_str = """
- query getContent($sha1: String, $sha1_git: String, $sha256: String, $blake2s256: String) {
- contentByHashes(sha1: $sha1, sha1_git: $sha1_git, sha256: $sha256,
- blake2s256: $blake2s256) {
+ query getContents($sha1: String, $sha1_git: String, $sha256: String,
+ $blake2s256: String) {
+ contentsByHashes(sha1: $sha1, sha1_git: $sha1_git, sha256: $sha256,
+ blake2s256: $blake2s256) {
swhid
}
}
@@ -158,8 +206,8 @@
def test_get_content_with_no_hashes(client):
query_str = """
- query getContent($sha1: String, $sha1_git: String, $sha256: String, $blake2s256: String) {
- contentByHashes(sha1: $sha1, sha1_git: $sha1_git, sha256: $sha256,
+ query getContents($sha1: String, $sha1_git: String, $sha256: String, $blake2s256: String) {
+ contentsByHashes(sha1: $sha1, sha1_git: $sha1_git, sha256: $sha256,
blake2s256: $blake2s256) {
swhid
}
@@ -203,20 +251,3 @@
"length": 4,
"swhid": "swh:1:cnt:86bc6b377e9d25f9d26777a4a28d08e63e7c5779",
}
-
-
-def test_get_content_with_unknown_swhid(client):
- unknown_sha1 = "1" * 40
- query_str = """
- query getDirectory($swhid: SWHID!) {
- content(swhid: $swhid) {
- swhid
- }
- }
- """
- utils.assert_missing_object(
- client,
- query_str,
- obj_type="content",
- swhid=f"swh:1:cnt:{unknown_sha1}",
- )
diff --git a/swh/graphql/tests/unit/resolvers/test_resolver_factory.py b/swh/graphql/tests/unit/resolvers/test_resolver_factory.py
--- a/swh/graphql/tests/unit/resolvers/test_resolver_factory.py
+++ b/swh/graphql/tests/unit/resolvers/test_resolver_factory.py
@@ -17,6 +17,6 @@
with pytest.raises(AttributeError):
resolver_factory.ConnectionObjectFactory().create("invalid", None, None)
- def test_get_list_resolver_invalid_type(self):
+ def test_get_base_list_resolver_invalid_type(self):
with pytest.raises(AttributeError):
resolver_factory.SimpleListFactory().create("invalid", None, None)
diff --git a/swh/graphql/tests/unit/resolvers/test_resolvers.py b/swh/graphql/tests/unit/resolvers/test_resolvers.py
--- a/swh/graphql/tests/unit/resolvers/test_resolvers.py
+++ b/swh/graphql/tests/unit/resolvers/test_resolvers.py
@@ -31,7 +31,7 @@
(rs.revision_directory_resolver, resolvers.directory.RevisionDirectoryNode),
(rs.release_resolver, resolvers.release.ReleaseNode),
(rs.directory_resolver, resolvers.directory.DirectoryNode),
- (rs.content_resolver, resolvers.content.ContentNode),
+ (rs.content_by_hashes_resolver, resolvers.content.ContentbyHashesNode),
],
)
def test_node_resolver(self, mocker, dummy_node, resolver_func, node_cls):

File Metadata

Mime Type
text/plain
Expires
Thu, Dec 19, 9:06 AM (15 h, 17 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3214808

Event Timeline