diff --git a/swh/graphql/app.py b/swh/graphql/app.py index 7d0b11d..34cb7c0 100644 --- a/swh/graphql/app.py +++ b/swh/graphql/app.py @@ -1,42 +1,43 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information # import pkg_resources import os from pathlib import Path from ariadne import gql, load_schema_from_path, make_executable_schema from .resolvers import resolvers, scalars type_defs = gql( # pkg_resources.resource_string("swh.graphql", "schem/schema.graphql").decode() load_schema_from_path( os.path.join(Path(__file__).parent.resolve(), "schema", "schema.graphql") ) ) schema = make_executable_schema( type_defs, resolvers.query, resolvers.origin, resolvers.visit, resolvers.visit_status, resolvers.snapshot, resolvers.snapshot_branch, resolvers.revision, resolvers.release, resolvers.directory, resolvers.directory_entry, resolvers.search_result, resolvers.branch_target, resolvers.release_target, resolvers.directory_entry_target, resolvers.search_result_target, resolvers.binary_string, scalars.id_scalar, scalars.datetime_scalar, scalars.swhid_scalar, + scalars.content_hash_scalar, ) diff --git a/swh/graphql/backends/archive.py b/swh/graphql/backends/archive.py index d49460b..cf1a4cb 100644 --- a/swh/graphql/backends/archive.py +++ b/swh/graphql/backends/archive.py @@ -1,84 +1,86 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.graphql import server from swh.model.swhids import ObjectType class Archive: def __init__(self): self.storage = server.get_storage() def get_origin(self, url): return self.storage.origin_get([url])[0] def get_origins(self, after=None, first=50, url_pattern=None): # STORAGE-TODO # Make them a single function in the backend if url_pattern is None: return self.storage.origin_list(page_token=after, limit=first) return self.storage.origin_search( url_pattern=url_pattern, page_token=after, limit=first ) def get_origin_visits(self, origin_url, after=None, first=50): return self.storage.origin_visit_get(origin_url, page_token=after, limit=first) def get_origin_visit(self, origin_url, visit_id): return self.storage.origin_visit_get_by(origin_url, visit_id) def get_origin_latest_visit(self, origin_url): return self.storage.origin_visit_get_latest(origin_url) def get_visit_status(self, origin_url, visit_id, after=None, first=50): return self.storage.origin_visit_status_get( origin_url, visit_id, page_token=after, limit=first ) def get_latest_visit_status(self, origin_url, visit_id): return self.storage.origin_visit_status_get_latest(origin_url, visit_id) def get_origin_snapshots(self, origin_url): return self.storage.origin_snapshot_get_all(origin_url) def get_snapshot_branches( self, snapshot, after=b"", first=50, target_types=[], name_include=None ): return self.storage.snapshot_get_branches( snapshot, branches_from=after, branches_count=first, target_types=target_types, branch_name_include_substring=name_include, ) def get_revisions(self, revision_ids): return self.storage.revision_get(revision_ids=revision_ids) def get_revision_log(self, revision_ids, after=None, first=50): return self.storage.revision_log(revisions=revision_ids, limit=first) def get_releases(self, release_ids): return self.storage.release_get(releases=release_ids) def get_directory_entries(self, directory_id, after=None, first=50): return self.storage.directory_get_entries( directory_id, limit=first, page_token=after ) - def get_content(self, content_id): - # FIXME, only for tests - return self.storage.content_find({"sha1_git": content_id}) - def is_object_available(self, object_id: str, object_type: ObjectType) -> bool: mapping = { ObjectType.CONTENT: self.storage.content_missing_per_sha1_git, ObjectType.DIRECTORY: self.storage.directory_missing, ObjectType.RELEASE: self.storage.release_missing, ObjectType.REVISION: self.storage.revision_missing, ObjectType.SNAPSHOT: self.storage.snapshot_missing, } return not list(mapping[object_type]([object_id])) + + def get_contents(self, checksums: dict): + return self.storage.content_find(checksums) + + def get_content_data(self, content_sha1): + return self.storage.content_get_data(content_sha1) diff --git a/swh/graphql/resolvers/content.py b/swh/graphql/resolvers/content.py index 12a86af..c7d6782 100644 --- a/swh/graphql/resolvers/content.py +++ b/swh/graphql/resolvers/content.py @@ -1,59 +1,95 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from typing import Union from swh.graphql.backends import archive from .base_node import BaseSWHNode from .directory_entry import DirectoryEntryNode from .release import BaseReleaseNode from .snapshot_branch import SnapshotBranchNode class BaseContentNode(BaseSWHNode): """ Base resolver for all the content nodes """ - def _get_content_by_id(self, content_id): - content = archive.Archive().get_content(content_id) + def _get_content_by_hash(self, checksums: dict): + content = archive.Archive().get_contents(checksums) + # in case of a conflict, return the first element return content[0] if content else None @property def checksum(self): - # FIXME, return a Node object + # FIXME, use a Node instead return {k: v.hex() for (k, v) in self._node.hashes().items()} @property def id(self): return self._node.sha1_git + @property + def data(self): + # FIXME, return a Node object + # FIXME, add more ways to retrieve data like binary string + archive_url = "https://archive.softwareheritage.org/api/1/" + content_sha1 = self._node.hashes()["sha1"] + return { + "url": f"{archive_url}content/sha1:{content_sha1.hex()}/raw/", + } + + @property + def fileType(self): + # FIXME, fetch data from the indexers + return None + + @property + def language(self): + # FIXME, fetch data from the indexers + return None + + @property + def license(self): + # FIXME, fetch data from the indexers + return None + def is_type_of(self): # is_type_of is required only when resolving a UNION type # This is for ariadne to return the right type return "Content" class ContentNode(BaseContentNode): """ Node resolver for a content requested directly with its SWHID """ def _get_node_data(self): - return self._get_content_by_id(self.kwargs.get("swhid").object_id) + checksums = {"sha1_git": self.kwargs.get("swhid").object_id} + return self._get_content_by_hash(checksums) + + +class HashContentNode(BaseContentNode): + """ + Node resolver for a content requested with one or more checksums + """ + + def _get_node_data(self): + checksums = dict(self.kwargs.get("checksums")) + return self._get_content_by_hash(checksums) class TargetContentNode(BaseContentNode): """ - Node resolver for a content requested from a - directory entry or from a release target + Node resolver for a content requested as a target + This request could be from directory entry, release or a branch """ obj: Union[DirectoryEntryNode, BaseReleaseNode, SnapshotBranchNode] def _get_node_data(self): - content_id = self.obj.target_hash - return self._get_content_by_id(content_id) + return self._get_content_by_hash(checksums={"sha1_git": self.obj.target_hash}) diff --git a/swh/graphql/resolvers/resolver_factory.py b/swh/graphql/resolvers/resolver_factory.py index 8f42e9e..ec5c566 100644 --- a/swh/graphql/resolvers/resolver_factory.py +++ b/swh/graphql/resolvers/resolver_factory.py @@ -1,80 +1,81 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from .content import ContentNode, TargetContentNode +from .content import ContentNode, HashContentNode, TargetContentNode from .directory import DirectoryNode, RevisionDirectoryNode, TargetDirectoryNode from .directory_entry import DirectoryEntryConnection from .origin import OriginConnection, OriginNode from .release import ReleaseNode, TargetReleaseNode from .revision import ( LogRevisionConnection, ParentRevisionConnection, RevisionNode, TargetRevisionNode, ) from .search import ResolveSwhidConnection from .snapshot import ( OriginSnapshotConnection, SnapshotNode, TargetSnapshotNode, VisitSnapshotNode, ) from .snapshot_branch import SnapshotBranchConnection from .visit import LatestVisitNode, OriginVisitConnection, OriginVisitNode from .visit_status import LatestVisitStatusNode, VisitStatusConnection def get_node_resolver(resolver_type): # FIXME, replace with a proper factory method mapping = { "origin": OriginNode, "visit": OriginVisitNode, "latest-visit": LatestVisitNode, "latest-status": LatestVisitStatusNode, "visit-snapshot": VisitSnapshotNode, "snapshot": SnapshotNode, "branch-revision": TargetRevisionNode, "branch-release": TargetReleaseNode, "branch-directory": TargetDirectoryNode, "branch-content": TargetContentNode, "branch-snapshot": TargetSnapshotNode, "revision": RevisionNode, "revision-directory": RevisionDirectoryNode, "release": ReleaseNode, "release-revision": TargetRevisionNode, "release-release": TargetReleaseNode, "release-directory": TargetDirectoryNode, "release-content": TargetContentNode, "directory": DirectoryNode, "content": ContentNode, + "content-by-hash": HashContentNode, "dir-entry-dir": TargetDirectoryNode, "dir-entry-file": TargetContentNode, "search-result-snapshot": TargetSnapshotNode, "search-result-revision": TargetRevisionNode, "search-result-release": TargetReleaseNode, "search-result-directory": TargetDirectoryNode, "search-result-content": TargetContentNode, } if resolver_type not in mapping: raise AttributeError(f"Invalid node type: {resolver_type}") return mapping[resolver_type] def get_connection_resolver(resolver_type): # FIXME, replace with a proper factory method mapping = { "origins": OriginConnection, "origin-visits": OriginVisitConnection, "origin-snapshots": OriginSnapshotConnection, "visit-status": VisitStatusConnection, "snapshot-branches": SnapshotBranchConnection, "revision-parents": ParentRevisionConnection, "revision-log": LogRevisionConnection, "directory-entries": DirectoryEntryConnection, "resolve-swhid": ResolveSwhidConnection, } if resolver_type not in mapping: raise AttributeError(f"Invalid connection type: {resolver_type}") return mapping[resolver_type] diff --git a/swh/graphql/resolvers/resolvers.py b/swh/graphql/resolvers/resolvers.py index ac0e315..cc0e795 100644 --- a/swh/graphql/resolvers/resolvers.py +++ b/swh/graphql/resolvers/resolvers.py @@ -1,282 +1,290 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """ High level resolvers """ # Any schema attribute can be resolved by any of the following ways # and in the following priority order # - In this module using a decorator (eg: @visitstatus.field("snapshot") # Every object (type) is expected to resolve this way as they can accept arguments # eg: origin.visits takes arguments to paginate # - As a property in the Node object (eg: resolvers.visit.BaseVisitNode.id) # Every scalar is expected to resolve this way # - As an attribute/item in the object/dict returned by a backend (eg: Origin.url) from ariadne import ObjectType, UnionType from graphql.type import GraphQLResolveInfo from swh.graphql import resolvers as rs from swh.graphql.utils import utils from .resolver_factory import get_connection_resolver, get_node_resolver query: ObjectType = ObjectType("Query") origin: ObjectType = ObjectType("Origin") visit: ObjectType = ObjectType("Visit") visit_status: ObjectType = ObjectType("VisitStatus") snapshot: ObjectType = ObjectType("Snapshot") snapshot_branch: ObjectType = ObjectType("Branch") revision: ObjectType = ObjectType("Revision") release: ObjectType = ObjectType("Release") directory: ObjectType = ObjectType("Directory") directory_entry: ObjectType = ObjectType("DirectoryEntry") search_result: ObjectType = ObjectType("SearchResult") binary_string: ObjectType = ObjectType("BinaryString") branch_target: UnionType = UnionType("BranchTarget") release_target: UnionType = UnionType("ReleaseTarget") directory_entry_target: UnionType = UnionType("DirectoryEntryTarget") search_result_target: UnionType = UnionType("SearchResultTarget") # Node resolvers # A node resolver should return an instance of BaseNode @query.field("origin") def origin_resolver(obj: None, info: GraphQLResolveInfo, **kw) -> rs.origin.OriginNode: """ """ resolver = get_node_resolver("origin") return resolver(obj, info, **kw) @origin.field("latestVisit") def latest_visit_resolver( obj: rs.origin.OriginNode, info: GraphQLResolveInfo, **kw ) -> rs.visit.LatestVisitNode: """ """ resolver = get_node_resolver("latest-visit") return resolver(obj, info, **kw) @query.field("visit") def visit_resolver( obj: None, info: GraphQLResolveInfo, **kw ) -> rs.visit.OriginVisitNode: """ """ resolver = get_node_resolver("visit") return resolver(obj, info, **kw) @visit.field("latestStatus") def latest_visit_status_resolver( obj, info: GraphQLResolveInfo, **kw ) -> rs.visit_status.LatestVisitStatusNode: """ """ resolver = get_node_resolver("latest-status") return resolver(obj, info, **kw) @query.field("snapshot") def snapshot_resolver( obj: None, info: GraphQLResolveInfo, **kw ) -> rs.snapshot.SnapshotNode: """ """ resolver = get_node_resolver("snapshot") return resolver(obj, info, **kw) @visit_status.field("snapshot") def visit_snapshot_resolver( obj, info: GraphQLResolveInfo, **kw ) -> rs.snapshot.VisitSnapshotNode: resolver = get_node_resolver("visit-snapshot") return resolver(obj, info, **kw) @snapshot_branch.field("target") def snapshot_branch_target_resolver( obj: rs.snapshot_branch.SnapshotBranchNode, info: GraphQLResolveInfo, **kw ): """ Snapshot branch target can be a revision or a release """ resolver_type = f"branch-{obj.type}" resolver = get_node_resolver(resolver_type) return resolver(obj, info, **kw) @query.field("revision") def revision_resolver( obj: None, info: GraphQLResolveInfo, **kw ) -> rs.revision.RevisionNode: resolver = get_node_resolver("revision") return resolver(obj, info, **kw) @revision.field("directory") def revision_directory_resolver( obj, info: GraphQLResolveInfo, **kw ) -> rs.directory.RevisionDirectoryNode: resolver = get_node_resolver("revision-directory") return resolver(obj, info, **kw) @query.field("release") def release_resolver( obj: None, info: GraphQLResolveInfo, **kw ) -> rs.release.ReleaseNode: resolver = get_node_resolver("release") return resolver(obj, info, **kw) @release.field("target") def release_target_resolver(obj, info: GraphQLResolveInfo, **kw): """ release target can be a release, revision, directory or content obj is release here, target type is obj.target_type """ resolver_type = f"release-{obj.target_type.value}" resolver = get_node_resolver(resolver_type) return resolver(obj, info, **kw) @query.field("directory") def directory_resolver( obj: None, info: GraphQLResolveInfo, **kw ) -> rs.directory.DirectoryNode: resolver = get_node_resolver("directory") return resolver(obj, info, **kw) @directory_entry.field("target") def directory_entry_target_resolver( obj: rs.directory_entry.DirectoryEntryNode, info: GraphQLResolveInfo, **kw ): """ directory entry target can be a directory or a content """ resolver_type = f"dir-entry-{obj.type}" resolver = get_node_resolver(resolver_type) return resolver(obj, info, **kw) @query.field("content") def content_resolver( obj: None, info: GraphQLResolveInfo, **kw ) -> rs.content.ContentNode: resolver = get_node_resolver("content") return resolver(obj, info, **kw) @search_result.field("target") def search_result_target_resolver( obj: rs.search.SearchResultNode, info: GraphQLResolveInfo, **kw ): resolver_type = f"search-result-{obj.type}" resolver = get_node_resolver(resolver_type) return resolver(obj, info, **kw) +@query.field("contentByHash") +def content_by_hash_resolver( + obj: None, info: GraphQLResolveInfo, **kw +) -> rs.content.ContentNode: + resolver = get_node_resolver("content-by-hash") + return resolver(obj, info, **kw) + + # Connection resolvers # A connection resolver should return an instance of BaseConnection @query.field("origins") def origins_resolver( obj: None, info: GraphQLResolveInfo, **kw ) -> rs.origin.OriginConnection: resolver = get_connection_resolver("origins") return resolver(obj, info, **kw) @origin.field("visits") def visits_resolver( obj: rs.origin.OriginNode, info: GraphQLResolveInfo, **kw ) -> rs.visit.OriginVisitConnection: resolver = get_connection_resolver("origin-visits") return resolver(obj, info, **kw) @origin.field("snapshots") def origin_snapshots_resolver( obj: rs.origin.OriginNode, info: GraphQLResolveInfo, **kw ) -> rs.snapshot.OriginSnapshotConnection: """ """ resolver = get_connection_resolver("origin-snapshots") return resolver(obj, info, **kw) @visit.field("status") def visitstatus_resolver( obj, info: GraphQLResolveInfo, **kw ) -> rs.visit_status.VisitStatusConnection: resolver = get_connection_resolver("visit-status") return resolver(obj, info, **kw) @snapshot.field("branches") def snapshot_branches_resolver( obj, info: GraphQLResolveInfo, **kw ) -> rs.snapshot_branch.SnapshotBranchConnection: resolver = get_connection_resolver("snapshot-branches") return resolver(obj, info, **kw) @revision.field("parents") def revision_parents_resolver( obj, info: GraphQLResolveInfo, **kw ) -> rs.revision.ParentRevisionConnection: resolver = get_connection_resolver("revision-parents") return resolver(obj, info, **kw) # @revision.field("revisionLog") # def revision_log_resolver(obj, info, **kw): # resolver = get_connection_resolver("revision-log") # return resolver(obj, info, **kw) @directory.field("entries") def directory_entry_resolver( obj, info: GraphQLResolveInfo, **kw ) -> rs.directory_entry.DirectoryEntryConnection: resolver = get_connection_resolver("directory-entries") return resolver(obj, info, **kw) @query.field("resolveSwhid") def search_swhid_resolver( obj, info: GraphQLResolveInfo, **kw ) -> rs.search.ResolveSwhidConnection: resolver = get_connection_resolver("resolve-swhid") return resolver(obj, info, **kw) # Any other type of resolver @release_target.type_resolver @directory_entry_target.type_resolver @branch_target.type_resolver @search_result_target.type_resolver def union_resolver(obj, *_) -> str: """ Generic resolver for all the union types """ return obj.is_type_of() @binary_string.field("text") def binary_string_text_resolver(obj, *args, **kw): return obj.decode(utils.ENCODING, "replace") @binary_string.field("base64") def binary_string_base64_resolver(obj, *args, **kw): return utils.get_b64_string(obj) diff --git a/swh/graphql/resolvers/scalars.py b/swh/graphql/resolvers/scalars.py index e083470..f832055 100644 --- a/swh/graphql/resolvers/scalars.py +++ b/swh/graphql/resolvers/scalars.py @@ -1,43 +1,60 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from datetime import datetime from ariadne import ScalarType from swh.graphql.utils import utils +from swh.model import hashutil from swh.model.model import TimestampWithTimezone from swh.model.swhids import CoreSWHID datetime_scalar = ScalarType("DateTime") swhid_scalar = ScalarType("SWHID") id_scalar = ScalarType("ID") +content_hash_scalar = ScalarType("ContentHash") @id_scalar.serializer def serialize_id(value): if type(value) is bytes: return value.hex() return value @datetime_scalar.serializer def serialize_datetime(value): # FIXME, handle error and return None if type(value) == TimestampWithTimezone: value = value.to_datetime() if type(value) == datetime: return utils.get_formatted_date(value) return None @swhid_scalar.value_parser def validate_swhid(value): return CoreSWHID.from_string(value) @swhid_scalar.serializer def serialize_swhid(value): return str(value) + + +@content_hash_scalar.value_parser +def validate_content_hash(value): + try: + hash_type, hash_string = value.split(":") + hash_value = hashutil.hash_to_bytes(hash_string) + except ValueError as e: + # FIXME, log this error + raise AttributeError("Invalid content checksum", e) + except Exception as e: + # FIXME, log this error + raise AttributeError("Invalid content checksum", e) + # FIXME, add validation for the hash_type + return hash_type, hash_value diff --git a/swh/graphql/schema/schema.graphql b/swh/graphql/schema/schema.graphql index df5383a..1843186 100644 --- a/swh/graphql/schema/schema.graphql +++ b/swh/graphql/schema/schema.graphql @@ -1,982 +1,1043 @@ """ SoftWare Heritage persistent Identifier """ scalar SWHID """ ISO-8601 encoded date string """ scalar DateTime +""" +Content identifier in the form hash-type:hash-value +""" +scalar ContentHash + """ Object with an id """ interface Node { """ Id of the object. This is for caching purpose and should not be used outside the GraphQL API """ id: ID! } """ SWH merkle node object with a SWHID """ interface MerkleNode { """ SWHID of the object """ swhid: SWHID! } """ Information about pagination """ type PageInfo { """ Cursor to request the next page in the connection """ endCursor: String """ Are there more pages in the connection? """ hasNextPage: Boolean! } """ Binary strings; different encodings """ type BinaryString { """ Utf-8 encoded value, any non Unicode character will be replaced """ text: String """ base64 encoded value """ base64: String } """ Connection to origins """ type OriginConnection { """ List of origin edges """ edges: [OriginEdge] """ List of origin objects """ nodes: [Origin] """ Information for pagination """ pageInfo: PageInfo! """ Total number of origin objects in the connection """ totalCount: Int } """ Edge in origin connection """ type OriginEdge { """ Cursor to request the next page after the item """ cursor: String! """ Origin object """ node: Origin } """ A software origin object """ type Origin implements Node { """ Unique identifier """ id: ID! """ Origin URL """ url: String! """ Connection to all the visit objects for the origin """ visits( """ Returns the first _n_ elements from the list """ first: Int! """ Returns the page after this cursor """ after: String ): VisitConnection! """ Latest visit object for the origin """ latestVisit: Visit """ Connection to all the snapshots for the origin """ snapshots( """ Returns the first _n_ elements from the list """ first: Int! """ Returns the page after this cursor """ after: String ): SnapshotConnection } """ Connection to origin visits """ type VisitConnection { """ List of visit edges """ edges: [VisitEdge] """ List of visit objects """ nodes: [Visit] """ Information for pagination """ pageInfo: PageInfo! """ Total number of visit objects in the connection """ totalCount: Int } """ Edge in origin visit connection """ type VisitEdge { """ Cursor to request the next page after the item """ cursor: String! """ Visit object """ node: Visit } """ An origin visit object """ type Visit implements Node { """ Unique identifier """ id: ID! """ Visit number for the origin """ visitId: Int """ Visit date ISO-8601 encoded """ date: DateTime! """ Type of the origin visited. Eg: git/hg/svn/tar/deb """ type: String """ Connection to all the status objects for the visit """ status( """ Returns the first _n_ elements from the list """ first: Int """ Returns the page after this cursor """ after: String ): VisitStatusConnection """ Latest status object for the Visit """ latestStatus: VisitStatus } """ Connection to visit status """ type VisitStatusConnection { """ List of visit status edges """ edges: [VisitStatusEdge] """ List of visit status objects """ nodes: [VisitStatus] """ Information for pagination """ pageInfo: PageInfo! """ Total number of visit status objects in the connection """ totalCount: Int } """ Edge in visit status connection """ type VisitStatusEdge { """ Cursor to request the next page after the item """ cursor: String! """ Visit status object """ node: VisitStatus } """ A visit status object """ type VisitStatus { """ Status string of the visit (either full, partial or ongoing) """ status: String! """ ISO-8601 encoded date string """ date: DateTime! """ Snapshot object """ snapshot: Snapshot """ Type of the origin visited. Eg: git/hg/svn/tar/deb """ type: String } """ Connection to snapshots """ type SnapshotConnection { """ List of snapshot edges """ edges: [SnapshotEdge] """ List of snapshot objects """ nodes: [Snapshot] """ Information for pagination """ pageInfo: PageInfo! """ Total number of snapshot objects in the connection """ totalCount: Int } """ Edge in snapshot connection """ type SnapshotEdge { """ Cursor to request the next page after the item """ cursor: String! """ Snapshot object """ node: Snapshot } """ A snapshot object """ type Snapshot implements MerkleNode & Node { """ Unique identifier """ id: ID! """ SWHID of the snapshot object """ swhid: SWHID! """ Connection to all the snapshot branches """ branches( """ Returns the first _n_ elements from the list """ first: Int! """ Returns the page after this cursor """ after: String """ Filter by branch target types """ types: [BranchTargetType] """ Filter by branch name """ nameInclude: String ): BranchConnection } """ Connection to snapshot branches """ type BranchConnection { """ List of branch edges """ edges: [BranchConnectionEdge] """ List of branch objects """ nodes: [Branch] """ Information for pagination """ pageInfo: PageInfo! """ Total number of branch objects in the connection """ totalCount: Int } """ Edge in snapshot branch connection """ type BranchConnectionEdge { """ Cursor to request the next page after the item """ cursor: String! """ Branch object """ node: Branch } """ A user object """ type Person { """ User's email address """ email: BinaryString """ User's name """ name: BinaryString """ User's full name """ fullname: BinaryString } """ Possible branch target objects """ union BranchTarget = Revision | Release | Branch | Content | Directory | Snapshot """ Possible Branch target types """ enum BranchTargetType { revision release alias content directory snapshot } """ A snapshot branch object """ type Branch { """ Branch name """ name: BinaryString """ Type of Branch target """ type: BranchTargetType """ Branch target object """ target: BranchTarget } """ Connection to revisions """ type RevisionConnection { """ List of revision edges """ edges: [RevisionEdge] """ List of revision objects """ nodes: [Revision] """ Information for pagination """ pageInfo: PageInfo! """ Total number of revision objects in the connection """ totalCount: Int } """ Edge in revision connection """ type RevisionEdge { """ Cursor to request the next page after the item """ cursor: String! """ Revision object """ node: Revision } """ A revision object """ type Revision implements MerkleNode & Node { """ Unique identifier """ id: ID! """ SWHID of the revision object """ swhid: SWHID! """ Message associated to the revision """ message: BinaryString """ """ author: Person """ """ committer: Person """ Revision date ISO-8601 encoded """ date: DateTime """ Type of the revision, eg: git/hg """ type: String """ The unique directory object that revision points to """ directory: Directory """ Connection to all the parents of the revision """ parents( """ Returns the first _n_ elements from the list """ first: Int """ Returns the page after this cursor """ after: String ): RevisionConnection """ Connection to all the revisions heading to this one aka the commit log """ revisionLog( """ Returns the first _n_ elements from the list """ first: Int! """ Returns the page after the cursor """ after: String ): RevisionConnection } """ Possible release target objects """ union ReleaseTarget = Release | Revision | Directory | Content """ Possible release target types """ enum ReleaseTargetType { release revision content directory } """ A release object """ type Release implements MerkleNode & Node { """ Unique identifier """ id: ID! """ SWHID of the release object """ swhid: SWHID! """ The name of the release """ name: BinaryString """ The message associated to the release """ message: BinaryString """ """ author: Person """ Release date ISO-8601 encoded """ date: DateTime """ Type of release target """ targetType: ReleaseTargetType """ Release target object """ target: ReleaseTarget } """ Connection to directory entries """ type DirectoryEntryConnection { """ List of directory entry edges """ edges: [DirectoryEntryEdge] """ List of directory entry objects """ nodes: [DirectoryEntry] """ Information for pagination """ pageInfo: PageInfo! """ Total number of directory entry objects in the connection """ totalCount: Int } """ Edge in directory entry connection """ type DirectoryEntryEdge { """ Cursor to request the next page after the item """ cursor: String! """ Directory entry object """ node: DirectoryEntry } """ Possible directory entry target objects """ union DirectoryEntryTarget = Directory | Content """ Possible directory entry types """ enum DirectoryEntryType { dir file rev } """ A directory entry object """ type DirectoryEntry { """ The directory entry name """ name: BinaryString """ Directory entry object type; can be file, dir or rev """ type: DirectoryEntryType """ Directory entry target object """ target: DirectoryEntryTarget } """ A directory object """ type Directory implements MerkleNode & Node { """ Unique identifier """ id: ID! """ SWHID of the directory object """ swhid: SWHID! """ Connection to the directory entries """ entries( """ Returns the first _n_ elements from the list """ first: Int """ Returns the page after this cursor """ after: String ): DirectoryEntryConnection } """ -An object with different checksums +An object with different content checksums """ type ContentChecksum { + blake2s256: String + sha1: String + sha1_git: String + sha256: String +} + +""" +Object with different content data representations +""" +type ContentData { """ + URL to download the file data """ - blake2s256: String + url: String +} +type ContentFileType { """ + Detected content encoding """ - sha1: String + encoding: String """ + Detected MIME type of the content """ - sha1_git: String + mimetype: String +} +type ContentLanguage { """ + Detected programming language if any """ - sha256: String + lang: String +} + +type ContentLicense { + """ + Array of strings containing the detected license names + """ + licenses: [String] } """ A content object """ type Content implements MerkleNode & Node { """ Unique identifier """ id: ID! """ SWHID of the content object """ swhid: SWHID! """ Checksums for the content """ checksum: ContentChecksum """ Length of the content in bytes """ length: Int """ Content status, visible or hidden """ status: String + + """ + File content + """ + data: ContentData + + """ + Information about the content MIME type + """ + fileType: ContentFileType + + """ + Information about the programming language used in the content + """ + language: ContentLanguage + + """ + Information about the license of the content + """ + license: ContentLicense } """ Connection to SearchResults """ type SearchResultConnection { """ List of SearchResult edges """ edges: [SearchResultEdge] """ List of SearchResult objects """ nodes: [SearchResult] """ Information for pagination """ pageInfo: PageInfo! """ Total number of result objects in the connection """ totalCount: Int } """ Edge in SearchResult connection """ type SearchResultEdge { """ Cursor to request the next page after the item """ cursor: String! """ SearchResult object """ node: SearchResult } union SearchResultTarget = Origin | Revision | Release | Content | Directory | Snapshot enum SearchResultTargetType { origin revision release content directory snapshot } """ A SearchResult object """ type SearchResult { """ Result target type """ type: SearchResultTargetType """ Result target object """ target: SearchResultTarget } """ The query root of the GraphQL interface. """ type Query { """ Get an origin with its url """ origin( """ URL of the Origin """ url: String! ): Origin """ Get a Connection to all the origins """ origins( """ Returns the first _n_ elements from the list """ first: Int! """ Returns the page after the cursor """ after: String """ Filter origins with a URL pattern """ urlPattern: String ): OriginConnection """ Get the visit object with an origin URL and a visit id """ visit( """ URL of the origin """ originUrl: String! """ Visit id to get """ visitId: Int! ): Visit """ Get the snapshot with a SWHID """ snapshot( """ SWHID of the snapshot object """ swhid: SWHID! ): Snapshot """ Get the revision with a SWHID """ revision( """ SWHID of the revision object """ swhid: SWHID! ): Revision """ Get the release with a SWHID """ release( """ SWHID of the release object """ swhid: SWHID! ): Release """ Get the directory with a SWHID """ directory( """ SWHID of the directory object """ swhid: SWHID! ): Directory """ Get the content with a SWHID """ content( """ SWHID of the content object """ swhid: SWHID! ): Content + """ + Get the content by one or more hashes + Use multiple hashes for an accurate result + """ + contentByHash( + """ + List of hashType:hashValue strings + """ + checksums: [ContentHash]! + ): Content + """ Resolve the given SWHID to an object """ resolveSwhid( """ SWHID to look for """ swhid: SWHID! ): SearchResultConnection! } diff --git a/swh/graphql/tests/functional/test_content.py b/swh/graphql/tests/functional/test_content.py new file mode 100644 index 0000000..901b15e --- /dev/null +++ b/swh/graphql/tests/functional/test_content.py @@ -0,0 +1,149 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import pytest + +from . import utils +from ..data import get_contents + + +@pytest.mark.parametrize("content", get_contents()) +def test_get_content_with_swhid(client, content): + query_str = """ + { + content(swhid: "%s") { + swhid + checksum { + blake2s256 + sha1 + sha1_git + sha256 + } + length + status + data { + url + } + fileType { + encoding + } + language { + lang + } + license { + licenses + } + } + } + """ + data, _ = utils.get_query_response(client, query_str % content.swhid()) + archive_url = "https://archive.softwareheritage.org/api/1/" + response = { + "swhid": str(content.swhid()), + "checksum": { + "blake2s256": content.blake2s256.hex(), + "sha1": content.sha1.hex(), + "sha1_git": content.sha1_git.hex(), + "sha256": content.sha256.hex(), + }, + "length": content.length, + "status": content.status, + "data": { + "url": f"{archive_url}content/sha1:{content.sha1.hex()}/raw/", + }, + "fileType": None, + "language": None, + "license": None, + } + assert data["content"] == response + + +@pytest.mark.parametrize("content", get_contents()) +def test_get_content_with_hash(client, content): + query_str = """ + { + contentByHash(checksums: ["blake2s256:%s", "sha1:%s", "sha1_git:%s", "sha256:%s"]) { + swhid + } + } + """ + data, _ = utils.get_query_response( + client, + query_str + % ( + content.blake2s256.hex(), + content.sha1.hex(), + content.sha1_git.hex(), + content.sha256.hex(), + ), + ) + assert data["contentByHash"] == {"swhid": str(content.swhid())} + + +def test_get_content_with_invalid_swhid(client): + query_str = """ + { + content(swhid: "swh:1:cnt:invalid") { + swhid + } + } + """ + errors = utils.get_error_response(client, query_str) + # API will throw an error in case of an invalid SWHID + assert len(errors) == 1 + assert "Invalid SWHID: invalid syntax" in errors[0]["message"] + + +def test_get_content_with_invalid_hashes(client): + content = get_contents()[0] + query_str = """ + { + contentByHash(checksums: ["blake2s256:%s", "sha1:%s", "sha1_git:%s", "sha256:%s"]) { + swhid + } + } + """ + errors = utils.get_error_response( + client, + query_str + % ( + "invalid", # Only one hash is invalid + content.sha1.hex(), + content.sha1_git.hex(), + content.sha256.hex(), + ), + ) + # API will throw an error in case of an invalid content hash + assert len(errors) == 1 + assert "Invalid content checksum" in errors[0]["message"] + + +def test_get_content_as_target(client): + # SWHID of a test dir with a file entry + directory_swhid = "swh:1:dir:87b339104f7dc2a8163dec988445e3987995545f" + query_str = """ + { + directory(swhid: "%s") { + swhid + entries(first: 2) { + nodes { + type + target { + ...on Content { + swhid + length + } + } + } + } + } + } + """ + data, _ = utils.get_query_response(client, query_str % directory_swhid) + content_obj = data["directory"]["entries"]["nodes"][1]["target"] + assert content_obj == { + "length": 4, + "swhid": "swh:1:cnt:86bc6b377e9d25f9d26777a4a28d08e63e7c5779", + }