Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7123446
D8978.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
17 KB
Subscribers
None
D8978.diff
View Options
diff --git a/swh/graphql/errors/__init__.py b/swh/graphql/errors/__init__.py
--- a/swh/graphql/errors/__init__.py
+++ b/swh/graphql/errors/__init__.py
@@ -4,6 +4,7 @@
# See top-level LICENSE file for more information
from .errors import (
+ DataError,
InvalidInputError,
NullableObjectError,
ObjectNotFoundError,
@@ -16,5 +17,6 @@
"PaginationError",
"InvalidInputError",
"NullableObjectError",
+ "DataError",
"format_error",
]
diff --git a/swh/graphql/errors/errors.py b/swh/graphql/errors/errors.py
--- a/swh/graphql/errors/errors.py
+++ b/swh/graphql/errors/errors.py
@@ -33,3 +33,7 @@
class NullableObjectError(Exception):
pass
+
+
+class DataError(Exception):
+ pass
diff --git a/swh/graphql/resolvers/content.py b/swh/graphql/resolvers/content.py
--- a/swh/graphql/resolvers/content.py
+++ b/swh/graphql/resolvers/content.py
@@ -3,11 +3,13 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-from typing import Union
+from typing import Dict, List, Optional, Union
-from swh.graphql.errors import InvalidInputError
+from swh.graphql.errors import DataError, InvalidInputError
from swh.model import hashutil
+from swh.model.model import Content
+from .base_connection import BaseList
from .base_node import BaseSWHNode
from .directory_entry import BaseDirectoryEntryNode
from .release import BaseReleaseNode
@@ -15,16 +17,22 @@
from .snapshot_branch import BaseSnapshotBranchNode
+def read_and_validate_content_hashes(hashes) -> Dict[str, bytes]:
+ try:
+ return {
+ hash_type: hashutil.hash_to_bytes(hash_value)
+ for (hash_type, hash_value) in hashes
+ }
+ except ValueError as e:
+ # raise an input error in case of an invalid hash
+ raise InvalidInputError("Invalid content hash", e)
+
+
class BaseContentNode(BaseSWHNode):
"""
Base resolver for all the content nodes
"""
- def _get_content_by_hashes(self, hashes: dict):
- content = self.archive.get_contents(hashes)
- # in case of a conflict, return the first element
- return content[0] if content else None
-
@property
def hashes(self):
# FIXME, use a Node instead
@@ -65,33 +73,19 @@
return "Content"
-class ContentNode(BaseContentNode):
- """
- Node resolver for a content requested directly with its SWHID
- """
-
- def _get_node_data(self):
- hashes = {"sha1_git": self.kwargs.get("swhid").object_id}
- return self._get_content_by_hashes(hashes)
-
-
-class HashContentNode(BaseContentNode):
+class ContentbyHashesNode(BaseContentNode):
"""
- Node resolver for a content requested with one or more hashes
+ Node resolver for a content requested with all of its hashes
+ A single content object will be returned
"""
- def _get_node_data(self):
- try:
- hashes = {
- hash_type: hashutil.hash_to_bytes(hash_value)
- for (hash_type, hash_value) in self.kwargs.items()
- }
- except ValueError as e:
- # raise an input error in case of an invalid hash
- raise InvalidInputError("Invalid content hash", e)
- if not hashes:
- raise InvalidInputError("At least one of the four hashes must be provided")
- return self._get_content_by_hashes(hashes)
+ def _get_node_data(self) -> Optional[Content]:
+ hashes = read_and_validate_content_hashes(self.kwargs.items())
+ contents = self.archive.get_contents(hashes=hashes)
+ if len(contents) > 1:
+ # This situation is not expected to happen IRL
+ raise DataError("Content hash conflict for the set ", hashes)
+ return contents[0] if contents else None
class TargetContentNode(BaseContentNode):
@@ -107,5 +101,37 @@
BaseSnapshotBranchNode,
]
- def _get_node_data(self):
- return self._get_content_by_hashes(hashes={"sha1_git": self.obj.target_hash})
+ def _get_node_data(self) -> Optional[Content]:
+ # FIXME, this is not considering hash collisions
+ # and could return a wrong object in very rare situations
+ contents = self.archive.get_contents(hashes={"sha1_git": self.obj.target_hash})
+ # always returning the first content from the storage
+ return contents[0] if contents else None
+
+
+class ContentSwhidList(BaseList):
+ """
+ Return a non paginated list of contents for the given SWHID
+ This will return a single item in most of the cases
+ """
+
+ _node_class = BaseContentNode
+
+ def _get_results(self) -> List[Content]:
+ hashes = {"sha1_git": self.kwargs.get("swhid").object_id}
+ return self.archive.get_contents(hashes=hashes)
+
+
+class ContentHashList(BaseList):
+ """
+ Return a non paginated list of contents for the given hashes
+ This will return a single item in most of the cases
+ """
+
+ _node_class = BaseContentNode
+
+ def _get_results(self) -> List[Content]:
+ hashes = read_and_validate_content_hashes(self.kwargs.items())
+ if not hashes:
+ raise InvalidInputError("At least one of the four hashes must be provided")
+ return self.archive.get_contents(hashes=hashes)
diff --git a/swh/graphql/resolvers/resolver_factory.py b/swh/graphql/resolvers/resolver_factory.py
--- a/swh/graphql/resolvers/resolver_factory.py
+++ b/swh/graphql/resolvers/resolver_factory.py
@@ -9,7 +9,12 @@
from .base_connection import BaseConnection, BaseList
from .base_node import BaseNode
-from .content import ContentNode, HashContentNode, TargetContentNode
+from .content import (
+ ContentbyHashesNode,
+ ContentHashList,
+ ContentSwhidList,
+ TargetContentNode,
+)
from .directory import DirectoryNode, RevisionDirectoryNode, TargetDirectoryNode
from .directory_entry import DirectoryEntryConnection, DirectoryEntryNode
from .origin import OriginConnection, OriginNode, TargetOriginNode
@@ -56,8 +61,7 @@
"release-content": TargetContentNode,
"directory": DirectoryNode,
"directory-entry": DirectoryEntryNode,
- "content": ContentNode,
- "content-by-hash": HashContentNode,
+ "content-by-hashes": ContentbyHashesNode,
"dir-entry-content": TargetContentNode,
"dir-entry-directory": TargetDirectoryNode,
"dir-entry-revision": TargetRevisionNode,
@@ -109,6 +113,8 @@
"revision-author": RevisionAuthorList,
"revision-committer": RevisionCommitterList,
"release-author": ReleaseAuthorList,
+ "contents-swhid": ContentSwhidList,
+ "contents-hashes": ContentHashList,
}
@classmethod
diff --git a/swh/graphql/resolvers/resolvers.py b/swh/graphql/resolvers/resolvers.py
--- a/swh/graphql/resolvers/resolvers.py
+++ b/swh/graphql/resolvers/resolvers.py
@@ -177,13 +177,6 @@
return NodeObjectFactory.create(f"dir-entry-{obj.targetType}", obj, info, **kw)
-@query.field("content")
-def content_resolver(
- obj: None, info: GraphQLResolveInfo, **kw
-) -> rs.content.ContentNode:
- return NodeObjectFactory.create("content", obj, info, **kw)
-
-
@search_result.field("target")
def search_result_target_resolver(
obj: rs.search.SearchResultNode, info: GraphQLResolveInfo, **kw
@@ -203,10 +196,10 @@
@query.field("contentByHashes")
-def content_by_hash_resolver(
+def content_by_hashes_resolver(
obj: None, info: GraphQLResolveInfo, **kw
-) -> rs.content.ContentNode:
- return NodeObjectFactory.create("content-by-hash", obj, info, **kw)
+) -> rs.content.ContentbyHashesNode:
+ return NodeObjectFactory.create("content-by-hashes", obj, info, **kw)
# Connection resolvers
@@ -279,6 +272,20 @@
# Simple list resolvers (lists without paginations)
+@query.field("contentsBySWHID")
+def contnets_by_swhid_resolver(
+ obj: None, info: GraphQLResolveInfo, **kw
+) -> rs.content.ContentSwhidList:
+ return SimpleListFactory.create("contents-swhid", obj, info, **kw)
+
+
+@query.field("contentsByHashes")
+def contnets_by_hashes_resolver(
+ obj: None, info: GraphQLResolveInfo, **kw
+) -> rs.content.ContentHashList:
+ return SimpleListFactory.create("contents-hashes", obj, info, **kw)
+
+
@query.field("resolveSWHID")
def search_swhid_resolver(
obj: None, info: GraphQLResolveInfo, **kw
diff --git a/swh/graphql/schema/schema.graphql b/swh/graphql/schema/schema.graphql
--- a/swh/graphql/schema/schema.graphql
+++ b/swh/graphql/schema/schema.graphql
@@ -1105,24 +1105,20 @@
): DirectoryEntry
"""
- Get the content with a SWHID
+ Get a list of contents for the given SWHID
"""
- content(
+ contentsBySWHID (
"""
- SWHID of the content object
+ SWHID to look for
"""
swhid: SWHID!
- ): Content
+ ): [Content]
"""
- Get a content that match all the given hashes.
- This entrypoint can be used to uniquely identify a content
- in the event of hash conflicts. Use multiple hashes to
- get an accurate result.
-
- At least one of the four hashes must be provided.
+ Get contents with hashes
+ At least one of the four hashes must be provided
"""
- contentByHashes(
+ contentsByHashes(
sha1: String
sha256: String
@@ -1130,6 +1126,22 @@
sha1_git: String
blake2s256: String
+ ): [Content]
+
+ """
+ Get a content that match all the given hashes.
+ All the four hashes must be provided
+ This entrypoint can be used to uniquely identify
+ a content in the event of hash collisions
+ """
+ contentByHashes(
+ sha1: String!
+
+ sha256: String!
+
+ sha1_git: String!
+
+ blake2s256: String!
): Content
"""
diff --git a/swh/graphql/tests/functional/test_content.py b/swh/graphql/tests/functional/test_content.py
--- a/swh/graphql/tests/functional/test_content.py
+++ b/swh/graphql/tests/functional/test_content.py
@@ -10,10 +10,12 @@
@pytest.mark.parametrize("content", get_contents())
-def test_get_content_with_swhid(client, content):
+def test_get_content_with_hashes(client, content):
query_str = """
- query getContent($swhid: SWHID!) {
- content(swhid: $swhid) {
+ query getContentByHashes($sha1: String!, $sha256: String!,
+ $sha1_git: String!, $blake2s256: String!) {
+ contentByHashes(sha1: $sha1, sha256: $sha256, sha1_git: $sha1_git,
+ blake2s256: $blake2s256) {
swhid
id
hashes {
@@ -39,7 +41,14 @@
}
}
"""
- data, _ = utils.get_query_response(client, query_str, swhid=str(content.swhid()))
+ data, _ = utils.get_query_response(
+ client,
+ query_str,
+ blake2s256=content.blake2s256.hex(),
+ sha1=content.sha1.hex(),
+ sha1_git=content.sha1_git.hex(),
+ sha256=content.sha256.hex(),
+ )
archive_url = "https://archive.softwareheritage.org/api/1/"
response = {
"swhid": str(content.swhid()),
@@ -59,13 +68,26 @@
"language": None,
"license": None,
}
- assert data["content"] == response
+ assert data["contentByHashes"] == response
-def test_get_content_with_invalid_swhid(client):
+@pytest.mark.parametrize("content", get_contents())
+def test_get_contents_with_swhid(client, content):
query_str = """
- query getContent($swhid: SWHID!) {
- content(swhid: $swhid) {
+ query getContents($swhid: SWHID!) {
+ contentsBySWHID(swhid: $swhid) {
+ swhid
+ }
+ }
+ """
+ data, _ = utils.get_query_response(client, query_str, swhid=str(content.swhid()))
+ assert data["contentsBySWHID"] == [{"swhid": str(content.swhid())}]
+
+
+def test_get_contents_with_invalid_swhid(client):
+ query_str = """
+ query getContents($swhid: SWHID!) {
+ contentsBySWHID(swhid: $swhid) {
swhid
}
}
@@ -76,11 +98,27 @@
assert "Input error: Invalid SWHID" in errors[0]["message"]
+def test_get_contents_with_missing_swhid(client):
+ missing_sha1 = "1" * 40
+ query_str = """
+ query getContents($swhid: SWHID!) {
+ contentsBySWHID(swhid: $swhid) {
+ swhid
+ }
+ }
+ """
+ data, _ = utils.get_query_response(
+ client, query_str, swhid=f"swh:1:cnt:{missing_sha1}"
+ )
+ assert data["contentsBySWHID"] == []
+
+
@pytest.mark.parametrize("content", get_contents())
-def test_get_content_with_hash(client, content):
+def test_get_contents_with_hashes(client, content):
query_str = """
- query getContent($sha1: String, $sha1_git: String, $sha256: String, $blake2s256: String) {
- contentByHashes(sha1: $sha1, sha1_git: $sha1_git, sha256: $sha256,
+ query getContents($sha1: String, $sha1_git: String, $sha256: String,
+ $blake2s256: String) {
+ contentsByHashes(sha1: $sha1, sha1_git: $sha1_git, sha256: $sha256,
blake2s256: $blake2s256) {
swhid
}
@@ -94,14 +132,15 @@
sha256=content.sha256.hex(),
blake2s256=content.blake2s256.hex(),
)
- assert data["contentByHashes"] == {"swhid": str(content.swhid())}
+ assert data["contentsByHashes"] == [{"swhid": str(content.swhid())}]
@pytest.mark.parametrize("content", get_contents())
def test_get_content_with_single_hash(client, content):
query_str = """
- query getContent($sha1: String, $sha1_git: String, $sha256: String, $blake2s256: String) {
- contentByHashes(sha1: $sha1, sha1_git: $sha1_git, sha256: $sha256,
+ query getContents($sha1: String, $sha1_git: String, $sha256: String,
+ $blake2s256: String) {
+ contentsByHashes(sha1: $sha1, sha1_git: $sha1_git, sha256: $sha256,
blake2s256: $blake2s256) {
swhid
}
@@ -112,34 +151,43 @@
query_str,
sha1=content.sha1.hex(),
)
- assert data["contentByHashes"] == {"swhid": str(content.swhid())}
+ assert data["contentsByHashes"] == [{"swhid": str(content.swhid())}]
+
+ data, _ = utils.get_query_response(
+ client,
+ query_str,
+ blake2s256=content.blake2s256.hex(),
+ )
+ assert data["contentsByHashes"] == [{"swhid": str(content.swhid())}]
@pytest.mark.parametrize("content", get_contents())
-def test_get_content_with_one_non_matching_hash(client, content):
+def test_get_contents_with_one_non_matching_hash(client, content):
query_str = """
- query getContent($sha1: String, $sha1_git: String, $sha256: String, $blake2s256: String) {
- contentByHashes(sha1: $sha1, sha1_git: $sha1_git, sha256: $sha256,
+ query getContents($sha1: String, $sha1_git: String, $sha256: String, $blake2s256: String) {
+ contentsByHashes(sha1: $sha1, sha1_git: $sha1_git, sha256: $sha256,
blake2s256: $blake2s256) {
swhid
}
}
"""
- utils.assert_missing_object(
+ data, _ = utils.get_query_response(
client,
query_str,
- obj_type="contentByHashes",
+ obj_type="contentsByHashes",
sha1=content.sha1.hex(),
sha1_git="a" * 20, # hash is valid, but not matching the object
)
+ assert data["contentsByHashes"] == []
def test_get_content_with_invalid_hashes(client):
content = get_contents()[0]
query_str = """
- query getContent($sha1: String, $sha1_git: String, $sha256: String, $blake2s256: String) {
- contentByHashes(sha1: $sha1, sha1_git: $sha1_git, sha256: $sha256,
- blake2s256: $blake2s256) {
+ query getContents($sha1: String, $sha1_git: String, $sha256: String,
+ $blake2s256: String) {
+ contentsByHashes(sha1: $sha1, sha1_git: $sha1_git, sha256: $sha256,
+ blake2s256: $blake2s256) {
swhid
}
}
@@ -158,8 +206,8 @@
def test_get_content_with_no_hashes(client):
query_str = """
- query getContent($sha1: String, $sha1_git: String, $sha256: String, $blake2s256: String) {
- contentByHashes(sha1: $sha1, sha1_git: $sha1_git, sha256: $sha256,
+ query getContents($sha1: String, $sha1_git: String, $sha256: String, $blake2s256: String) {
+ contentsByHashes(sha1: $sha1, sha1_git: $sha1_git, sha256: $sha256,
blake2s256: $blake2s256) {
swhid
}
@@ -203,20 +251,3 @@
"length": 4,
"swhid": "swh:1:cnt:86bc6b377e9d25f9d26777a4a28d08e63e7c5779",
}
-
-
-def test_get_content_with_unknown_swhid(client):
- unknown_sha1 = "1" * 40
- query_str = """
- query getDirectory($swhid: SWHID!) {
- content(swhid: $swhid) {
- swhid
- }
- }
- """
- utils.assert_missing_object(
- client,
- query_str,
- obj_type="content",
- swhid=f"swh:1:cnt:{unknown_sha1}",
- )
diff --git a/swh/graphql/tests/unit/resolvers/test_resolver_factory.py b/swh/graphql/tests/unit/resolvers/test_resolver_factory.py
--- a/swh/graphql/tests/unit/resolvers/test_resolver_factory.py
+++ b/swh/graphql/tests/unit/resolvers/test_resolver_factory.py
@@ -17,6 +17,6 @@
with pytest.raises(AttributeError):
resolver_factory.ConnectionObjectFactory().create("invalid", None, None)
- def test_get_list_resolver_invalid_type(self):
+ def test_get_base_list_resolver_invalid_type(self):
with pytest.raises(AttributeError):
resolver_factory.SimpleListFactory().create("invalid", None, None)
diff --git a/swh/graphql/tests/unit/resolvers/test_resolvers.py b/swh/graphql/tests/unit/resolvers/test_resolvers.py
--- a/swh/graphql/tests/unit/resolvers/test_resolvers.py
+++ b/swh/graphql/tests/unit/resolvers/test_resolvers.py
@@ -31,7 +31,7 @@
(rs.revision_directory_resolver, resolvers.directory.RevisionDirectoryNode),
(rs.release_resolver, resolvers.release.ReleaseNode),
(rs.directory_resolver, resolvers.directory.DirectoryNode),
- (rs.content_resolver, resolvers.content.ContentNode),
+ (rs.content_by_hashes_resolver, resolvers.content.ContentbyHashesNode),
],
)
def test_node_resolver(self, mocker, dummy_node, resolver_func, node_cls):
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Dec 19, 9:06 AM (15 h, 17 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3214808
Attached To
D8978: Return a content list instead of an object to handle hash collisions
Event Timeline
Log In to Comment