Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7163773
D8978.id32355.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
18 KB
Subscribers
None
D8978.id32355.diff
View Options
diff --git a/swh/graphql/errors/__init__.py b/swh/graphql/errors/__init__.py
--- a/swh/graphql/errors/__init__.py
+++ b/swh/graphql/errors/__init__.py
@@ -4,6 +4,7 @@
# See top-level LICENSE file for more information
from .errors import (
+ DataError,
InvalidInputError,
NullableObjectError,
ObjectNotFoundError,
@@ -16,5 +17,6 @@
"PaginationError",
"InvalidInputError",
"NullableObjectError",
+ "DataError",
"format_error",
]
diff --git a/swh/graphql/errors/errors.py b/swh/graphql/errors/errors.py
--- a/swh/graphql/errors/errors.py
+++ b/swh/graphql/errors/errors.py
@@ -33,3 +33,7 @@
class NullableObjectError(Exception):
pass
+
+
+class DataError(Exception):
+ pass
diff --git a/swh/graphql/resolvers/content.py b/swh/graphql/resolvers/content.py
--- a/swh/graphql/resolvers/content.py
+++ b/swh/graphql/resolvers/content.py
@@ -5,9 +5,11 @@
from typing import Union
-from swh.graphql.errors import InvalidInputError
+from swh.graphql.errors import DataError, InvalidInputError
+from swh.graphql.utils import utils
from swh.model import hashutil
+from .base_connection import BaseConnection
from .base_node import BaseSWHNode
from .directory_entry import BaseDirectoryEntryNode
from .release import BaseReleaseNode
@@ -15,16 +17,24 @@
from .snapshot_branch import BaseSnapshotBranchNode
+def read_and_validate_content_hashes(hashes):
+ try:
+ hashes = {
+ hash_type: hashutil.hash_to_bytes(hash_value)
+ for (hash_type, hash_value) in hashes
+ }
+ except ValueError as e:
+ # raise an input error in case of an invalid hash
+ raise InvalidInputError("Invalid content hash", e)
+ else:
+ return hashes
+
+
class BaseContentNode(BaseSWHNode):
"""
Base resolver for all the content nodes
"""
- def _get_content_by_hashes(self, hashes: dict):
- content = self.archive.get_contents(hashes)
- # in case of a conflict, return the first element
- return content[0] if content else None
-
@property
def hashes(self):
# FIXME, use a Node instead
@@ -65,33 +75,19 @@
return "Content"
-class ContentNode(BaseContentNode):
- """
- Node resolver for a content requested directly with its SWHID
- """
-
- def _get_node_data(self):
- hashes = {"sha1_git": self.kwargs.get("swhid").object_id}
- return self._get_content_by_hashes(hashes)
-
-
-class HashContentNode(BaseContentNode):
+class ContentbyHashesNode(BaseContentNode):
"""
- Node resolver for a content requested with one or more hashes
+ Node resolver for a content requested with all of its hashes
+ A single content object will be returned
"""
def _get_node_data(self):
- try:
- hashes = {
- hash_type: hashutil.hash_to_bytes(hash_value)
- for (hash_type, hash_value) in self.kwargs.items()
- }
- except ValueError as e:
- # raise an input error in case of an invalid hash
- raise InvalidInputError("Invalid content hash", e)
- if not hashes:
- raise InvalidInputError("At least one of the four hashes must be provided")
- return self._get_content_by_hashes(hashes)
+ hashes = read_and_validate_content_hashes(self.kwargs.items())
+ contents = self.archive.get_contents(hashes=hashes)
+ if len(contents) > 1:
+ # This situation is not expected to happen IRL
+ raise DataError("Content hash conflict for the set ", hashes)
+ return contents[0] if contents else None
class TargetContentNode(BaseContentNode):
@@ -108,4 +104,44 @@
]
def _get_node_data(self):
- return self._get_content_by_hashes(hashes={"sha1_git": self.obj.target_hash})
+ # FIXME, this is not considering hash collisions
+ # and could return a wrong object in very rare situations
+ contents = self.archive.get_contents(hashes={"sha1_git": self.obj.target_hash})
+ # always returning the first content from the storage
+ return contents[0] if contents else None
+
+
+class ContentSwhidConnection(BaseConnection):
+ """
+ Return a paginated list of contents for the given SWHID
+ This will return a single item in most of the cases
+ """
+
+ _node_class = BaseContentNode
+
+ def _get_paged_result(self):
+ hashes = {"sha1_git": self.kwargs.get("swhid").object_id}
+ return utils.paginated(
+ self.archive.get_contents(hashes=hashes),
+ self._get_first_arg(),
+ self._get_after_arg(),
+ )
+
+
+class ContentHashConnection(BaseConnection):
+ """
+ Return a paginated list of contents for the given hashes
+ This will return a single item in most of the cases
+ """
+
+ _node_class = BaseContentNode
+
+ def _get_paged_result(self):
+ hashes = read_and_validate_content_hashes(self.kwargs.items())
+ if not hashes:
+ raise InvalidInputError("At least one of the four hashes must be provided")
+ return utils.paginated(
+ self.archive.get_contents(hashes=hashes),
+ self._get_first_arg(),
+ self._get_after_arg(),
+ )
diff --git a/swh/graphql/resolvers/resolver_factory.py b/swh/graphql/resolvers/resolver_factory.py
--- a/swh/graphql/resolvers/resolver_factory.py
+++ b/swh/graphql/resolvers/resolver_factory.py
@@ -9,7 +9,12 @@
from .base_connection import BaseConnection
from .base_node import BaseNode
-from .content import ContentNode, HashContentNode, TargetContentNode
+from .content import (
+ ContentbyHashesNode,
+ ContentHashConnection,
+ ContentSwhidConnection,
+ TargetContentNode,
+)
from .directory import DirectoryNode, RevisionDirectoryNode, TargetDirectoryNode
from .directory_entry import DirectoryEntryConnection, DirectoryEntryNode
from .origin import OriginConnection, OriginNode, TargetOriginNode
@@ -55,8 +60,8 @@
"release-content": TargetContentNode,
"directory": DirectoryNode,
"directory-entry": DirectoryEntryNode,
- "content": ContentNode,
- "content-by-hash": HashContentNode,
+ # "content": ContentNode,
+ "content-by-hashes": ContentbyHashesNode,
"dir-entry-content": TargetContentNode,
"dir-entry-directory": TargetDirectoryNode,
"dir-entry-revision": TargetRevisionNode,
@@ -92,6 +97,8 @@
"revision-log": LogRevisionConnection,
"directory-entries": DirectoryEntryConnection,
"resolve-swhid": ResolveSwhidConnection,
+ "contents-swhid": ContentSwhidConnection,
+ "contents-hashes": ContentHashConnection,
"search": SearchConnection,
}
diff --git a/swh/graphql/resolvers/resolvers.py b/swh/graphql/resolvers/resolvers.py
--- a/swh/graphql/resolvers/resolvers.py
+++ b/swh/graphql/resolvers/resolvers.py
@@ -173,13 +173,6 @@
return NodeObjectFactory.create(f"dir-entry-{obj.targetType}", obj, info, **kw)
-@query.field("content")
-def content_resolver(
- obj: None, info: GraphQLResolveInfo, **kw
-) -> rs.content.ContentNode:
- return NodeObjectFactory.create("content", obj, info, **kw)
-
-
@search_result.field("target")
def search_result_target_resolver(
obj: rs.search.SearchResultNode, info: GraphQLResolveInfo, **kw
@@ -199,10 +192,10 @@
@query.field("contentByHashes")
-def content_by_hash_resolver(
+def content_by_hashes_resolver(
obj: None, info: GraphQLResolveInfo, **kw
-) -> rs.content.ContentNode:
- return NodeObjectFactory.create("content-by-hash", obj, info, **kw)
+) -> rs.content.ContentbyHashesNode:
+ return NodeObjectFactory.create("content-by-hashes", obj, info, **kw)
# Connection resolvers
@@ -272,6 +265,20 @@
return ConnectionObjectFactory.create("resolve-swhid", obj, info, **kw)
+@query.field("contentsBySWHID")
+def contnets_by_swhid_resolver(
+ obj: None, info: GraphQLResolveInfo, **kw
+) -> rs.content.ContentSwhidConnection:
+ return ConnectionObjectFactory.create("contents-swhid", obj, info, **kw)
+
+
+@query.field("contentsByHashes")
+def contnets_by_hashes_resolver(
+ obj: None, info: GraphQLResolveInfo, **kw
+) -> rs.content.ContentHashConnection:
+ return ConnectionObjectFactory.create("contents-hashes", obj, info, **kw)
+
+
@query.field("search")
def search_resolver(
obj: None, info: GraphQLResolveInfo, **kw
diff --git a/swh/graphql/schema/schema.graphql b/swh/graphql/schema/schema.graphql
--- a/swh/graphql/schema/schema.graphql
+++ b/swh/graphql/schema/schema.graphql
@@ -835,6 +835,43 @@
): DirectoryEntryConnection
}
+type ContentConnection {
+ """
+ List of content edges
+ """
+ edges: [ContentEdge]
+
+ """
+ List of content objects
+ """
+ nodes: [Content]
+
+ """
+ Information for pagination
+ """
+ pageInfo: PageInfo!
+
+ """
+ Total number of content objects in the connection
+ """
+ totalCount: Int
+}
+
+"""
+Edge in content connection
+"""
+type ContentEdge {
+ """
+ Cursor to request the next page after the item
+ """
+ cursor: String!
+
+ """
+ Content object
+ """
+ node: Content
+}
+
"""
An object with different content hashes
"""
@@ -1105,24 +1142,30 @@
): DirectoryEntry
"""
- Get the content with a SWHID
+ Get contents for the given SWHID
"""
- content(
+ contentsBySWHID (
"""
- SWHID of the content object
+ SWHID of the content
"""
swhid: SWHID!
- ): Content
- """
- Get a content that match all the given hashes.
- This entrypoint can be used to uniquely identify a content
- in the event of hash conflicts. Use multiple hashes to
- get an accurate result.
+ """
+ Returns the first _n_ elements from the list
+ """
+ first: Int
+ """
+ Returns the page after the cursor
+ """
+ after: String
+ ): ContentConnection
+
+ """
+ Get contents with hashes
At least one of the four hashes must be provided.
"""
- contentByHashes(
+ contentsByHashes(
sha1: String
sha256: String
@@ -1130,6 +1173,32 @@
sha1_git: String
blake2s256: String
+
+ """
+ Returns the first _n_ elements from the list
+ """
+ first: Int
+
+ """
+ Returns the page after the cursor
+ """
+ after: String
+ ): ContentConnection
+
+ """
+ Get a content that match all the given hashes.
+ All the four hashes must be provided
+ This entrypoint can be used to uniquely identify a content
+ in the event of hash conflicts.
+ """
+ contentByHashes(
+ sha1: String!
+
+ sha256: String!
+
+ sha1_git: String!
+
+ blake2s256: String!
): Content
"""
diff --git a/swh/graphql/tests/functional/test_content.py b/swh/graphql/tests/functional/test_content.py
--- a/swh/graphql/tests/functional/test_content.py
+++ b/swh/graphql/tests/functional/test_content.py
@@ -10,10 +10,12 @@
@pytest.mark.parametrize("content", get_contents())
-def test_get_content_with_swhid(client, content):
+def test_get_content_with_hashes(client, content):
query_str = """
- query getContent($swhid: SWHID!) {
- content(swhid: $swhid) {
+ query getContentByHashes($sha1: String!, $sha256: String!,
+ $sha1_git: String!, $blake2s256: String!) {
+ contentByHashes(sha1: $sha1, sha256: $sha256, sha1_git: $sha1_git,
+ blake2s256: $blake2s256) {
swhid
id
hashes {
@@ -39,7 +41,14 @@
}
}
"""
- data, _ = utils.get_query_response(client, query_str, swhid=str(content.swhid()))
+ data, _ = utils.get_query_response(
+ client,
+ query_str,
+ blake2s256=content.blake2s256.hex(),
+ sha1=content.sha1.hex(),
+ sha1_git=content.sha1_git.hex(),
+ sha256=content.sha256.hex(),
+ )
archive_url = "https://archive.softwareheritage.org/api/1/"
response = {
"swhid": str(content.swhid()),
@@ -59,14 +68,31 @@
"language": None,
"license": None,
}
- assert data["content"] == response
+ assert data["contentByHashes"] == response
-def test_get_content_with_invalid_swhid(client):
+@pytest.mark.parametrize("content", get_contents())
+def test_get_contents_with_swhid(client, content):
query_str = """
- query getContent($swhid: SWHID!) {
- content(swhid: $swhid) {
- swhid
+ query getContents($swhid: SWHID!) {
+ contentsBySWHID(swhid: $swhid) {
+ nodes {
+ swhid
+ }
+ }
+ }
+ """
+ data, _ = utils.get_query_response(client, query_str, swhid=str(content.swhid()))
+ assert data["contentsBySWHID"]["nodes"] == [{"swhid": str(content.swhid())}]
+
+
+def test_get_contents_with_invalid_swhid(client):
+ query_str = """
+ query getContents($swhid: SWHID!) {
+ contentsBySWHID(swhid: $swhid) {
+ nodes {
+ swhid
+ }
}
}
"""
@@ -76,13 +102,33 @@
assert "Input error: Invalid SWHID" in errors[0]["message"]
+def test_get_contents_with_missing_swhid(client):
+ missing_sha1 = "1" * 40
+ query_str = """
+ query getContents($swhid: SWHID!) {
+ contentsBySWHID(swhid: $swhid) {
+ nodes {
+ swhid
+ }
+ }
+ }
+ """
+ data, _ = utils.get_query_response(
+ client, query_str, swhid=f"swh:1:cnt:{missing_sha1}"
+ )
+ assert data["contentsBySWHID"]["nodes"] == []
+
+
@pytest.mark.parametrize("content", get_contents())
-def test_get_content_with_hash(client, content):
+def test_get_contents_with_hashes(client, content):
query_str = """
- query getContent($sha1: String, $sha1_git: String, $sha256: String, $blake2s256: String) {
- contentByHashes(sha1: $sha1, sha1_git: $sha1_git, sha256: $sha256,
+ query getContents($sha1: String, $sha1_git: String, $sha256: String,
+ $blake2s256: String) {
+ contentsByHashes(sha1: $sha1, sha1_git: $sha1_git, sha256: $sha256,
blake2s256: $blake2s256) {
- swhid
+ nodes {
+ swhid
+ }
}
}
"""
@@ -94,16 +140,19 @@
sha256=content.sha256.hex(),
blake2s256=content.blake2s256.hex(),
)
- assert data["contentByHashes"] == {"swhid": str(content.swhid())}
+ assert data["contentsByHashes"]["nodes"] == [{"swhid": str(content.swhid())}]
@pytest.mark.parametrize("content", get_contents())
def test_get_content_with_single_hash(client, content):
query_str = """
- query getContent($sha1: String, $sha1_git: String, $sha256: String, $blake2s256: String) {
- contentByHashes(sha1: $sha1, sha1_git: $sha1_git, sha256: $sha256,
+ query getContents($sha1: String, $sha1_git: String, $sha256: String,
+ $blake2s256: String) {
+ contentsByHashes(sha1: $sha1, sha1_git: $sha1_git, sha256: $sha256,
blake2s256: $blake2s256) {
- swhid
+ nodes {
+ swhid
+ }
}
}
"""
@@ -112,35 +161,48 @@
query_str,
sha1=content.sha1.hex(),
)
- assert data["contentByHashes"] == {"swhid": str(content.swhid())}
+ assert data["contentsByHashes"]["nodes"] == [{"swhid": str(content.swhid())}]
+
+ data, _ = utils.get_query_response(
+ client,
+ query_str,
+ blake2s256=content.blake2s256.hex(),
+ )
+ assert data["contentsByHashes"]["nodes"] == [{"swhid": str(content.swhid())}]
@pytest.mark.parametrize("content", get_contents())
-def test_get_content_with_one_non_matching_hash(client, content):
+def test_get_contents_with_one_non_matching_hash(client, content):
query_str = """
- query getContent($sha1: String, $sha1_git: String, $sha256: String, $blake2s256: String) {
- contentByHashes(sha1: $sha1, sha1_git: $sha1_git, sha256: $sha256,
+ query getContents($sha1: String, $sha1_git: String, $sha256: String, $blake2s256: String) {
+ contentsByHashes(sha1: $sha1, sha1_git: $sha1_git, sha256: $sha256,
blake2s256: $blake2s256) {
- swhid
+ nodes {
+ swhid
+ }
}
}
"""
- utils.assert_missing_object(
+ data, _ = utils.get_query_response(
client,
query_str,
- obj_type="contentByHashes",
+ obj_type="contentsByHashes",
sha1=content.sha1.hex(),
sha1_git="a" * 20, # hash is valid, but not matching the object
)
+ assert data["contentsByHashes"]["nodes"] == []
def test_get_content_with_invalid_hashes(client):
content = get_contents()[0]
query_str = """
- query getContent($sha1: String, $sha1_git: String, $sha256: String, $blake2s256: String) {
- contentByHashes(sha1: $sha1, sha1_git: $sha1_git, sha256: $sha256,
- blake2s256: $blake2s256) {
- swhid
+ query getContents($sha1: String, $sha1_git: String, $sha256: String,
+ $blake2s256: String) {
+ contentsByHashes(sha1: $sha1, sha1_git: $sha1_git, sha256: $sha256,
+ blake2s256: $blake2s256) {
+ nodes {
+ swhid
+ }
}
}
"""
@@ -158,10 +220,12 @@
def test_get_content_with_no_hashes(client):
query_str = """
- query getContent($sha1: String, $sha1_git: String, $sha256: String, $blake2s256: String) {
- contentByHashes(sha1: $sha1, sha1_git: $sha1_git, sha256: $sha256,
+ query getContents($sha1: String, $sha1_git: String, $sha256: String, $blake2s256: String) {
+ contentsByHashes(sha1: $sha1, sha1_git: $sha1_git, sha256: $sha256,
blake2s256: $blake2s256) {
- swhid
+ nodes {
+ swhid
+ }
}
}
"""
@@ -203,20 +267,3 @@
"length": 4,
"swhid": "swh:1:cnt:86bc6b377e9d25f9d26777a4a28d08e63e7c5779",
}
-
-
-def test_get_content_with_unknown_swhid(client):
- unknown_sha1 = "1" * 40
- query_str = """
- query getDirectory($swhid: SWHID!) {
- content(swhid: $swhid) {
- swhid
- }
- }
- """
- utils.assert_missing_object(
- client,
- query_str,
- obj_type="content",
- swhid=f"swh:1:cnt:{unknown_sha1}",
- )
diff --git a/swh/graphql/tests/unit/resolvers/test_resolvers.py b/swh/graphql/tests/unit/resolvers/test_resolvers.py
--- a/swh/graphql/tests/unit/resolvers/test_resolvers.py
+++ b/swh/graphql/tests/unit/resolvers/test_resolvers.py
@@ -31,7 +31,7 @@
(rs.revision_directory_resolver, resolvers.directory.RevisionDirectoryNode),
(rs.release_resolver, resolvers.release.ReleaseNode),
(rs.directory_resolver, resolvers.directory.DirectoryNode),
- (rs.content_resolver, resolvers.content.ContentNode),
+ (rs.content_by_hashes_resolver, resolvers.content.ContentbyHashesNode),
],
)
def test_node_resolver(self, mocker, dummy_node, resolver_func, node_cls):
@@ -58,6 +58,8 @@
rs.directory_entries_resolver,
resolvers.directory_entry.DirectoryEntryConnection,
),
+ (rs.contnets_by_swhid_resolver, resolvers.content.ContentSwhidConnection),
+ (rs.contnets_by_hashes_resolver, resolvers.content.ContentHashConnection),
],
)
def test_connection_resolver(self, resolver_func, connection_cls):
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Jan 30, 3:14 PM (58 m, 51 s)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3227392
Attached To
D8978: Return a content list instead of an object to handle hash collisions
Event Timeline
Log In to Comment