Page MenuHomeSoftware Heritage

D8978.id32355.diff
No OneTemporary

D8978.id32355.diff

diff --git a/swh/graphql/errors/__init__.py b/swh/graphql/errors/__init__.py
--- a/swh/graphql/errors/__init__.py
+++ b/swh/graphql/errors/__init__.py
@@ -4,6 +4,7 @@
# See top-level LICENSE file for more information
from .errors import (
+ DataError,
InvalidInputError,
NullableObjectError,
ObjectNotFoundError,
@@ -16,5 +17,6 @@
"PaginationError",
"InvalidInputError",
"NullableObjectError",
+ "DataError",
"format_error",
]
diff --git a/swh/graphql/errors/errors.py b/swh/graphql/errors/errors.py
--- a/swh/graphql/errors/errors.py
+++ b/swh/graphql/errors/errors.py
@@ -33,3 +33,7 @@
class NullableObjectError(Exception):
pass
+
+
+class DataError(Exception):
+ pass
diff --git a/swh/graphql/resolvers/content.py b/swh/graphql/resolvers/content.py
--- a/swh/graphql/resolvers/content.py
+++ b/swh/graphql/resolvers/content.py
@@ -5,9 +5,11 @@
from typing import Union
-from swh.graphql.errors import InvalidInputError
+from swh.graphql.errors import DataError, InvalidInputError
+from swh.graphql.utils import utils
from swh.model import hashutil
+from .base_connection import BaseConnection
from .base_node import BaseSWHNode
from .directory_entry import BaseDirectoryEntryNode
from .release import BaseReleaseNode
@@ -15,16 +17,24 @@
from .snapshot_branch import BaseSnapshotBranchNode
+def read_and_validate_content_hashes(hashes):
+ try:
+ hashes = {
+ hash_type: hashutil.hash_to_bytes(hash_value)
+ for (hash_type, hash_value) in hashes
+ }
+ except ValueError as e:
+ # raise an input error in case of an invalid hash
+ raise InvalidInputError("Invalid content hash", e)
+ else:
+ return hashes
+
+
class BaseContentNode(BaseSWHNode):
"""
Base resolver for all the content nodes
"""
- def _get_content_by_hashes(self, hashes: dict):
- content = self.archive.get_contents(hashes)
- # in case of a conflict, return the first element
- return content[0] if content else None
-
@property
def hashes(self):
# FIXME, use a Node instead
@@ -65,33 +75,19 @@
return "Content"
-class ContentNode(BaseContentNode):
- """
- Node resolver for a content requested directly with its SWHID
- """
-
- def _get_node_data(self):
- hashes = {"sha1_git": self.kwargs.get("swhid").object_id}
- return self._get_content_by_hashes(hashes)
-
-
-class HashContentNode(BaseContentNode):
+class ContentbyHashesNode(BaseContentNode):
"""
- Node resolver for a content requested with one or more hashes
+ Node resolver for a content requested with all of its hashes
+ A single content object will be returned
"""
def _get_node_data(self):
- try:
- hashes = {
- hash_type: hashutil.hash_to_bytes(hash_value)
- for (hash_type, hash_value) in self.kwargs.items()
- }
- except ValueError as e:
- # raise an input error in case of an invalid hash
- raise InvalidInputError("Invalid content hash", e)
- if not hashes:
- raise InvalidInputError("At least one of the four hashes must be provided")
- return self._get_content_by_hashes(hashes)
+ hashes = read_and_validate_content_hashes(self.kwargs.items())
+ contents = self.archive.get_contents(hashes=hashes)
+ if len(contents) > 1:
+ # This situation is not expected to happen IRL
+ raise DataError("Content hash conflict for the set ", hashes)
+ return contents[0] if contents else None
class TargetContentNode(BaseContentNode):
@@ -108,4 +104,44 @@
]
def _get_node_data(self):
- return self._get_content_by_hashes(hashes={"sha1_git": self.obj.target_hash})
+ # FIXME, this is not considering hash collisions
+ # and could return a wrong object in very rare situations
+ contents = self.archive.get_contents(hashes={"sha1_git": self.obj.target_hash})
+ # always returning the first content from the storage
+ return contents[0] if contents else None
+
+
+class ContentSwhidConnection(BaseConnection):
+ """
+ Return a paginated list of contents for the given SWHID
+ This will return a single item in most of the cases
+ """
+
+ _node_class = BaseContentNode
+
+ def _get_paged_result(self):
+ hashes = {"sha1_git": self.kwargs.get("swhid").object_id}
+ return utils.paginated(
+ self.archive.get_contents(hashes=hashes),
+ self._get_first_arg(),
+ self._get_after_arg(),
+ )
+
+
+class ContentHashConnection(BaseConnection):
+ """
+ Return a paginated list of contents for the given hashes
+ This will return a single item in most of the cases
+ """
+
+ _node_class = BaseContentNode
+
+ def _get_paged_result(self):
+ hashes = read_and_validate_content_hashes(self.kwargs.items())
+ if not hashes:
+ raise InvalidInputError("At least one of the four hashes must be provided")
+ return utils.paginated(
+ self.archive.get_contents(hashes=hashes),
+ self._get_first_arg(),
+ self._get_after_arg(),
+ )
diff --git a/swh/graphql/resolvers/resolver_factory.py b/swh/graphql/resolvers/resolver_factory.py
--- a/swh/graphql/resolvers/resolver_factory.py
+++ b/swh/graphql/resolvers/resolver_factory.py
@@ -9,7 +9,12 @@
from .base_connection import BaseConnection
from .base_node import BaseNode
-from .content import ContentNode, HashContentNode, TargetContentNode
+from .content import (
+ ContentbyHashesNode,
+ ContentHashConnection,
+ ContentSwhidConnection,
+ TargetContentNode,
+)
from .directory import DirectoryNode, RevisionDirectoryNode, TargetDirectoryNode
from .directory_entry import DirectoryEntryConnection, DirectoryEntryNode
from .origin import OriginConnection, OriginNode, TargetOriginNode
@@ -55,8 +60,8 @@
"release-content": TargetContentNode,
"directory": DirectoryNode,
"directory-entry": DirectoryEntryNode,
- "content": ContentNode,
- "content-by-hash": HashContentNode,
+ # "content": ContentNode,
+ "content-by-hashes": ContentbyHashesNode,
"dir-entry-content": TargetContentNode,
"dir-entry-directory": TargetDirectoryNode,
"dir-entry-revision": TargetRevisionNode,
@@ -92,6 +97,8 @@
"revision-log": LogRevisionConnection,
"directory-entries": DirectoryEntryConnection,
"resolve-swhid": ResolveSwhidConnection,
+ "contents-swhid": ContentSwhidConnection,
+ "contents-hashes": ContentHashConnection,
"search": SearchConnection,
}
diff --git a/swh/graphql/resolvers/resolvers.py b/swh/graphql/resolvers/resolvers.py
--- a/swh/graphql/resolvers/resolvers.py
+++ b/swh/graphql/resolvers/resolvers.py
@@ -173,13 +173,6 @@
return NodeObjectFactory.create(f"dir-entry-{obj.targetType}", obj, info, **kw)
-@query.field("content")
-def content_resolver(
- obj: None, info: GraphQLResolveInfo, **kw
-) -> rs.content.ContentNode:
- return NodeObjectFactory.create("content", obj, info, **kw)
-
-
@search_result.field("target")
def search_result_target_resolver(
obj: rs.search.SearchResultNode, info: GraphQLResolveInfo, **kw
@@ -199,10 +192,10 @@
@query.field("contentByHashes")
-def content_by_hash_resolver(
+def content_by_hashes_resolver(
obj: None, info: GraphQLResolveInfo, **kw
-) -> rs.content.ContentNode:
- return NodeObjectFactory.create("content-by-hash", obj, info, **kw)
+) -> rs.content.ContentbyHashesNode:
+ return NodeObjectFactory.create("content-by-hashes", obj, info, **kw)
# Connection resolvers
@@ -272,6 +265,20 @@
return ConnectionObjectFactory.create("resolve-swhid", obj, info, **kw)
+@query.field("contentsBySWHID")
+def contnets_by_swhid_resolver(
+ obj: None, info: GraphQLResolveInfo, **kw
+) -> rs.content.ContentSwhidConnection:
+ return ConnectionObjectFactory.create("contents-swhid", obj, info, **kw)
+
+
+@query.field("contentsByHashes")
+def contnets_by_hashes_resolver(
+ obj: None, info: GraphQLResolveInfo, **kw
+) -> rs.content.ContentHashConnection:
+ return ConnectionObjectFactory.create("contents-hashes", obj, info, **kw)
+
+
@query.field("search")
def search_resolver(
obj: None, info: GraphQLResolveInfo, **kw
diff --git a/swh/graphql/schema/schema.graphql b/swh/graphql/schema/schema.graphql
--- a/swh/graphql/schema/schema.graphql
+++ b/swh/graphql/schema/schema.graphql
@@ -835,6 +835,43 @@
): DirectoryEntryConnection
}
+type ContentConnection {
+ """
+ List of content edges
+ """
+ edges: [ContentEdge]
+
+ """
+ List of content objects
+ """
+ nodes: [Content]
+
+ """
+ Information for pagination
+ """
+ pageInfo: PageInfo!
+
+ """
+ Total number of content objects in the connection
+ """
+ totalCount: Int
+}
+
+"""
+Edge in content connection
+"""
+type ContentEdge {
+ """
+ Cursor to request the next page after the item
+ """
+ cursor: String!
+
+ """
+ Content object
+ """
+ node: Content
+}
+
"""
An object with different content hashes
"""
@@ -1105,24 +1142,30 @@
): DirectoryEntry
"""
- Get the content with a SWHID
+ Get contents for the given SWHID
"""
- content(
+ contentsBySWHID (
"""
- SWHID of the content object
+ SWHID of the content
"""
swhid: SWHID!
- ): Content
- """
- Get a content that match all the given hashes.
- This entrypoint can be used to uniquely identify a content
- in the event of hash conflicts. Use multiple hashes to
- get an accurate result.
+ """
+ Returns the first _n_ elements from the list
+ """
+ first: Int
+ """
+ Returns the page after the cursor
+ """
+ after: String
+ ): ContentConnection
+
+ """
+ Get contents with hashes
At least one of the four hashes must be provided.
"""
- contentByHashes(
+ contentsByHashes(
sha1: String
sha256: String
@@ -1130,6 +1173,32 @@
sha1_git: String
blake2s256: String
+
+ """
+ Returns the first _n_ elements from the list
+ """
+ first: Int
+
+ """
+ Returns the page after the cursor
+ """
+ after: String
+ ): ContentConnection
+
+ """
+ Get a content that match all the given hashes.
+ All the four hashes must be provided
+ This entrypoint can be used to uniquely identify a content
+ in the event of hash conflicts.
+ """
+ contentByHashes(
+ sha1: String!
+
+ sha256: String!
+
+ sha1_git: String!
+
+ blake2s256: String!
): Content
"""
diff --git a/swh/graphql/tests/functional/test_content.py b/swh/graphql/tests/functional/test_content.py
--- a/swh/graphql/tests/functional/test_content.py
+++ b/swh/graphql/tests/functional/test_content.py
@@ -10,10 +10,12 @@
@pytest.mark.parametrize("content", get_contents())
-def test_get_content_with_swhid(client, content):
+def test_get_content_with_hashes(client, content):
query_str = """
- query getContent($swhid: SWHID!) {
- content(swhid: $swhid) {
+ query getContentByHashes($sha1: String!, $sha256: String!,
+ $sha1_git: String!, $blake2s256: String!) {
+ contentByHashes(sha1: $sha1, sha256: $sha256, sha1_git: $sha1_git,
+ blake2s256: $blake2s256) {
swhid
id
hashes {
@@ -39,7 +41,14 @@
}
}
"""
- data, _ = utils.get_query_response(client, query_str, swhid=str(content.swhid()))
+ data, _ = utils.get_query_response(
+ client,
+ query_str,
+ blake2s256=content.blake2s256.hex(),
+ sha1=content.sha1.hex(),
+ sha1_git=content.sha1_git.hex(),
+ sha256=content.sha256.hex(),
+ )
archive_url = "https://archive.softwareheritage.org/api/1/"
response = {
"swhid": str(content.swhid()),
@@ -59,14 +68,31 @@
"language": None,
"license": None,
}
- assert data["content"] == response
+ assert data["contentByHashes"] == response
-def test_get_content_with_invalid_swhid(client):
+@pytest.mark.parametrize("content", get_contents())
+def test_get_contents_with_swhid(client, content):
query_str = """
- query getContent($swhid: SWHID!) {
- content(swhid: $swhid) {
- swhid
+ query getContents($swhid: SWHID!) {
+ contentsBySWHID(swhid: $swhid) {
+ nodes {
+ swhid
+ }
+ }
+ }
+ """
+ data, _ = utils.get_query_response(client, query_str, swhid=str(content.swhid()))
+ assert data["contentsBySWHID"]["nodes"] == [{"swhid": str(content.swhid())}]
+
+
+def test_get_contents_with_invalid_swhid(client):
+ query_str = """
+ query getContents($swhid: SWHID!) {
+ contentsBySWHID(swhid: $swhid) {
+ nodes {
+ swhid
+ }
}
}
"""
@@ -76,13 +102,33 @@
assert "Input error: Invalid SWHID" in errors[0]["message"]
+def test_get_contents_with_missing_swhid(client):
+ missing_sha1 = "1" * 40
+ query_str = """
+ query getContents($swhid: SWHID!) {
+ contentsBySWHID(swhid: $swhid) {
+ nodes {
+ swhid
+ }
+ }
+ }
+ """
+ data, _ = utils.get_query_response(
+ client, query_str, swhid=f"swh:1:cnt:{missing_sha1}"
+ )
+ assert data["contentsBySWHID"]["nodes"] == []
+
+
@pytest.mark.parametrize("content", get_contents())
-def test_get_content_with_hash(client, content):
+def test_get_contents_with_hashes(client, content):
query_str = """
- query getContent($sha1: String, $sha1_git: String, $sha256: String, $blake2s256: String) {
- contentByHashes(sha1: $sha1, sha1_git: $sha1_git, sha256: $sha256,
+ query getContents($sha1: String, $sha1_git: String, $sha256: String,
+ $blake2s256: String) {
+ contentsByHashes(sha1: $sha1, sha1_git: $sha1_git, sha256: $sha256,
blake2s256: $blake2s256) {
- swhid
+ nodes {
+ swhid
+ }
}
}
"""
@@ -94,16 +140,19 @@
sha256=content.sha256.hex(),
blake2s256=content.blake2s256.hex(),
)
- assert data["contentByHashes"] == {"swhid": str(content.swhid())}
+ assert data["contentsByHashes"]["nodes"] == [{"swhid": str(content.swhid())}]
@pytest.mark.parametrize("content", get_contents())
def test_get_content_with_single_hash(client, content):
query_str = """
- query getContent($sha1: String, $sha1_git: String, $sha256: String, $blake2s256: String) {
- contentByHashes(sha1: $sha1, sha1_git: $sha1_git, sha256: $sha256,
+ query getContents($sha1: String, $sha1_git: String, $sha256: String,
+ $blake2s256: String) {
+ contentsByHashes(sha1: $sha1, sha1_git: $sha1_git, sha256: $sha256,
blake2s256: $blake2s256) {
- swhid
+ nodes {
+ swhid
+ }
}
}
"""
@@ -112,35 +161,48 @@
query_str,
sha1=content.sha1.hex(),
)
- assert data["contentByHashes"] == {"swhid": str(content.swhid())}
+ assert data["contentsByHashes"]["nodes"] == [{"swhid": str(content.swhid())}]
+
+ data, _ = utils.get_query_response(
+ client,
+ query_str,
+ blake2s256=content.blake2s256.hex(),
+ )
+ assert data["contentsByHashes"]["nodes"] == [{"swhid": str(content.swhid())}]
@pytest.mark.parametrize("content", get_contents())
-def test_get_content_with_one_non_matching_hash(client, content):
+def test_get_contents_with_one_non_matching_hash(client, content):
query_str = """
- query getContent($sha1: String, $sha1_git: String, $sha256: String, $blake2s256: String) {
- contentByHashes(sha1: $sha1, sha1_git: $sha1_git, sha256: $sha256,
+ query getContents($sha1: String, $sha1_git: String, $sha256: String, $blake2s256: String) {
+ contentsByHashes(sha1: $sha1, sha1_git: $sha1_git, sha256: $sha256,
blake2s256: $blake2s256) {
- swhid
+ nodes {
+ swhid
+ }
}
}
"""
- utils.assert_missing_object(
+ data, _ = utils.get_query_response(
client,
query_str,
- obj_type="contentByHashes",
+ obj_type="contentsByHashes",
sha1=content.sha1.hex(),
sha1_git="a" * 20, # hash is valid, but not matching the object
)
+ assert data["contentsByHashes"]["nodes"] == []
def test_get_content_with_invalid_hashes(client):
content = get_contents()[0]
query_str = """
- query getContent($sha1: String, $sha1_git: String, $sha256: String, $blake2s256: String) {
- contentByHashes(sha1: $sha1, sha1_git: $sha1_git, sha256: $sha256,
- blake2s256: $blake2s256) {
- swhid
+ query getContents($sha1: String, $sha1_git: String, $sha256: String,
+ $blake2s256: String) {
+ contentsByHashes(sha1: $sha1, sha1_git: $sha1_git, sha256: $sha256,
+ blake2s256: $blake2s256) {
+ nodes {
+ swhid
+ }
}
}
"""
@@ -158,10 +220,12 @@
def test_get_content_with_no_hashes(client):
query_str = """
- query getContent($sha1: String, $sha1_git: String, $sha256: String, $blake2s256: String) {
- contentByHashes(sha1: $sha1, sha1_git: $sha1_git, sha256: $sha256,
+ query getContents($sha1: String, $sha1_git: String, $sha256: String, $blake2s256: String) {
+ contentsByHashes(sha1: $sha1, sha1_git: $sha1_git, sha256: $sha256,
blake2s256: $blake2s256) {
- swhid
+ nodes {
+ swhid
+ }
}
}
"""
@@ -203,20 +267,3 @@
"length": 4,
"swhid": "swh:1:cnt:86bc6b377e9d25f9d26777a4a28d08e63e7c5779",
}
-
-
-def test_get_content_with_unknown_swhid(client):
- unknown_sha1 = "1" * 40
- query_str = """
- query getDirectory($swhid: SWHID!) {
- content(swhid: $swhid) {
- swhid
- }
- }
- """
- utils.assert_missing_object(
- client,
- query_str,
- obj_type="content",
- swhid=f"swh:1:cnt:{unknown_sha1}",
- )
diff --git a/swh/graphql/tests/unit/resolvers/test_resolvers.py b/swh/graphql/tests/unit/resolvers/test_resolvers.py
--- a/swh/graphql/tests/unit/resolvers/test_resolvers.py
+++ b/swh/graphql/tests/unit/resolvers/test_resolvers.py
@@ -31,7 +31,7 @@
(rs.revision_directory_resolver, resolvers.directory.RevisionDirectoryNode),
(rs.release_resolver, resolvers.release.ReleaseNode),
(rs.directory_resolver, resolvers.directory.DirectoryNode),
- (rs.content_resolver, resolvers.content.ContentNode),
+ (rs.content_by_hashes_resolver, resolvers.content.ContentbyHashesNode),
],
)
def test_node_resolver(self, mocker, dummy_node, resolver_func, node_cls):
@@ -58,6 +58,8 @@
rs.directory_entries_resolver,
resolvers.directory_entry.DirectoryEntryConnection,
),
+ (rs.contnets_by_swhid_resolver, resolvers.content.ContentSwhidConnection),
+ (rs.contnets_by_hashes_resolver, resolvers.content.ContentHashConnection),
],
)
def test_connection_resolver(self, resolver_func, connection_cls):

File Metadata

Mime Type
text/plain
Expires
Thu, Jan 30, 3:14 PM (58 m, 51 s)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3227392

Event Timeline