diff --git a/swh/graphql/app.py b/swh/graphql/app.py index d1574e0..70554f4 100644 --- a/swh/graphql/app.py +++ b/swh/graphql/app.py @@ -1,21 +1,22 @@ from ariadne import gql, load_schema_from_path, make_executable_schema from .resolvers import resolvers, scalars type_defs = gql(load_schema_from_path("swh/graphql/schema/schema.graphql")) schema = make_executable_schema( type_defs, resolvers.query, resolvers.origin, resolvers.visit, resolvers.visit_status, resolvers.snapshot, resolvers.snapshot_branch, + resolvers.directory, resolvers.branch_target, scalars.datetime_scalar, scalars.swhid_scalar, scalars.sha1_scalar, scalars.binary_text_scalar, scalars.datetimezone_scalar, ) diff --git a/swh/graphql/backends/archive.py b/swh/graphql/backends/archive.py index 45b7b4e..e991c6e 100644 --- a/swh/graphql/backends/archive.py +++ b/swh/graphql/backends/archive.py @@ -1,48 +1,47 @@ from swh.storage import get_storage class Archive: def __init__(self): # FIXME, setup config self.storage = get_storage( cls="remote", url="http://moma.internal.softwareheritage.org:5002" ) def get_origin(self, url): return self.storage.origin_get([url])[0] def get_origins(self, after=None, first=50): return self.storage.origin_list(page_token=after, limit=first) def get_origin_visits(self, origin_url, after=None, first=50): return self.storage.origin_visit_get(origin_url, page_token=after, limit=first) def get_origin_visit(self, origin_url, visit_id): return self.storage.origin_visit_get_by(origin_url, visit_id) def get_visit_status(self, origin_url, visit_id, after=None, first=50): return self.storage.origin_visit_status_get( origin_url, visit_id, page_token=after, limit=first ) def get_snapshot(self, snapshot_swhid): return self.storage.snapshot_get(snapshot_swhid) def get_snapshot_branches(self, snapshot, after=None, first=50): return self.storage.snapshot_get_branches( snapshot, branches_from=after, branches_count=first ) def get_revision(self, revision_id): return self.storage.revision_get(revision_ids=[revision_id]) def get_release(self, release_id): return self.storage.release_get(releases=[release_id]) def get_directory_entries(self, directory_id): - # FIXME, only for tests return self.storage.directory_ls(directory_id) def get_content(self, content_id): # FIXME, only for tests return self.storage.content_find({"sha1_git": content_id}) diff --git a/swh/graphql/resolvers/base_connection.py b/swh/graphql/resolvers/base_connection.py index 4dd1800..cbb8818 100644 --- a/swh/graphql/resolvers/base_connection.py +++ b/swh/graphql/resolvers/base_connection.py @@ -1,116 +1,119 @@ from abc import ABC, abstractmethod from typing import Any from swh.graphql.utils import utils # from dataclasses import dataclass # @dataclass # class PageInfo: # nex_page_token: str # class Arguments: # """ # dataclass # """ # after # Elements that come after the specified cursor # first # Returns the first n elements class BaseConnection(ABC): """ Base class for all the connection resolvers """ _node_class: Any = None _page_size = 50 # default page size def __init__(self, obj, info, paged_data=None, **kwargs): self.obj = obj self.info = info self.kwargs = kwargs self._paged_data = paged_data def __call__(self, *args, **kw): return self @property def edges(self): return self._get_edges() @property def nodes(self): """ Override if needed return a list of objects If a node class is set, return a list of its (Node) instances else a list of raw results """ if self._node_class is not None: return [ self._node_class(self.obj, self.info, node_data=result, **self.kwargs) for result in self.get_paged_data().results ] return self.get_paged_data().results @property def pageInfo(self): # To support the schema naming convention # FIXME Replace with a dataclass # return PageInfo(self.page_data.next_page_token) # FIXME, add more details like startCursor return { "hasNextPage": bool(self.get_paged_data().next_page_token), "endCursor": utils.get_encoded_cursor( self.get_paged_data().next_page_token ), } @property def totalCount(self): # To support the schema naming convention + return self._get_total_count() + + def _get_total_count(self): """ Will be None for most of the connections override if needed/possible """ return None def get_paged_data(self): """ Cache to avoid multiple calls to the backend (_get_paged_result) return a PagedResult object """ if self._paged_data is None: # FIXME, make this call async (not for v1) self._paged_data = self._get_paged_result() return self._paged_data @abstractmethod def _get_paged_result(self): """ Override for desired behaviour return a PagedResult object """ # FIXME, make this call async (not for v1) return None def _get_edges(self): # FIXME, make cursor work per item # Cursor can't be None here return [{"cursor": "dummy", "node": node} for node in self.nodes] def _get_after_arg(self): """ Return the decoded next page token override to use a specific token """ return utils.get_decoded_cursor(self.kwargs.get("after")) def _get_first_arg(self): """ page_size is set to 50 by default """ return self.kwargs.get("first", self._page_size) diff --git a/swh/graphql/resolvers/directory.py b/swh/graphql/resolvers/directory.py index 2e46424..1dd3fb4 100644 --- a/swh/graphql/resolvers/directory.py +++ b/swh/graphql/resolvers/directory.py @@ -1,39 +1,32 @@ -from swh.graphql.backends import archive from swh.graphql.utils import utils from .base_node import BaseNode class BaseDirectoryNode(BaseNode): def _get_directory_by_id(self, directory_id): - # fetch more metadata like name + # Now not fetching any data (schema is exposing just id) + # FIXME, use the right API to fetch metadata like name, path return { "id": directory_id, } - @property - def entries(self): - entries = archive.Archive().get_directory_entries(self._node.id) - # FIXME, local pagination, should be moved to swh-storage (backend) - # return Paginated(DirectoryEntryConnection, entries) - return entries - class DirectoryNode(BaseDirectoryNode): def _get_node_data(self): """ When a directory is requested directly (not from a connection) with an id """ directory_id = utils.str_to_swid(self.kwargs.get("Sha1")) # path = "" return self._get_directory_by_id(directory_id) class RevisionDirectoryNode(BaseDirectoryNode): def _get_node_data(self): """ When a directory is requested from a revision """ directory_id = self.kwargs.get("sha1") return self._get_directory_by_id(directory_id) diff --git a/swh/graphql/resolvers/directory_entry.py b/swh/graphql/resolvers/directory_entry.py index 14474de..b1a31dc 100644 --- a/swh/graphql/resolvers/directory_entry.py +++ b/swh/graphql/resolvers/directory_entry.py @@ -1,10 +1,32 @@ +from swh.graphql.backends import archive +from swh.graphql.utils import utils + from .base_connection import BaseConnection from .base_node import BaseNode class DirectoryEntryNode(BaseNode): - pass + """ """ + + @property + def targetId(self): # To support the schema naming convention + return self._node.target class DirectoryEntryConnection(BaseConnection): - pass + _node_class = DirectoryEntryNode + + def _get_paged_result(self): + """ + When entries requested from a directory + self.obj.id is directory_id here + (as returned from resolvers/directory.py) + + This is not paginated from swh-storgae + using dummy pagination + """ + + # FIXME, using dummy(local) pagination, move to backend + # To remove localpagination, just drop the paginated call + entries = archive.Archive().get_directory_entries(self.obj.id) + return utils.paginated(entries, self._get_first_arg(), self._get_after_arg()) diff --git a/swh/graphql/resolvers/resolver_factory.py b/swh/graphql/resolvers/resolver_factory.py index 3d51901..eeda891 100644 --- a/swh/graphql/resolvers/resolver_factory.py +++ b/swh/graphql/resolvers/resolver_factory.py @@ -1,52 +1,54 @@ from .content import ContentNode from .directory import DirectoryNode +from .directory_entry import DirectoryEntryConnection from .origin import OriginConnection, OriginNode from .release import BranchReleaseNode, ReleaseNode from .revision import BranchRevisionNode, RevisionNode from .snapshot import SnapshotNode, VisitSnapshotNode from .snapshot_branch import SnapshotBranchConnection from .visit import OriginVisitConnection, OriginVisitNode from .visit_status import VisitStatusConnection # def get_mapping_key(info): # """ # Logic to resolve mapping type # """ # # FIXME, move to utils # if info.path.prev: # return f"{info.path.prev.key}_{info.path.key}" # return info.path.key def get_node_resolver(resolver_type): # FIXME, replace with a proper factory method mapping = { "origin": OriginNode, "visit": OriginVisitNode, "visit-snapshot": VisitSnapshotNode, "snapshot": SnapshotNode, "branch-revision": BranchRevisionNode, "branch-release": BranchReleaseNode, "revision": RevisionNode, "release": ReleaseNode, "directory": DirectoryNode, "content": ContentNode, } # resolver_type = get_mapping_key(info) # FIXME, get full name if resolver_type not in mapping: raise AttributeError(f"Invalid type request {resolver_type}") return mapping[resolver_type] def get_connection_resolver(resolver_type): # FIXME, replace with a proper factory method mapping = { "origins": OriginConnection, "origin-visits": OriginVisitConnection, "visit-status": VisitStatusConnection, "snapshot-branches": SnapshotBranchConnection, + "directory-entries": DirectoryEntryConnection, } # resolver_type = get_mapping_key(info) # FIXME, get full name if resolver_type not in mapping: raise AttributeError(f"Invalid type request {resolver_type}") return mapping[resolver_type] diff --git a/swh/graphql/resolvers/resolvers.py b/swh/graphql/resolvers/resolvers.py index 7f90546..24da7d3 100644 --- a/swh/graphql/resolvers/resolvers.py +++ b/swh/graphql/resolvers/resolvers.py @@ -1,123 +1,130 @@ """ High level resolvers Any schema attribute can be resolved by any of the following ways and in the following priority order - In this module using an annotation (eg: @visitstatus.field("snapshot")) - As a property in the Node object (eg: resolvers.visit.OriginVisitNode.id) - As an attribute/item in the object/dict returned by the backend (eg: Origin.url) """ from ariadne import ObjectType, UnionType from .resolver_factory import get_connection_resolver, get_node_resolver query = ObjectType("Query") origin = ObjectType("Origin") visit = ObjectType("Visit") visit_status = ObjectType("VisitStatus") snapshot = ObjectType("Snapshot") snapshot_branch = ObjectType("Branch") +directory = ObjectType("Directory") branch_target = UnionType("BranchTarget") # Node resolvers # A node resolver can return a node object or a data structure @query.field("origin") def origin_resolver(obj, info, **kw): """ """ resolver = get_node_resolver("origin") return resolver(obj, info, **kw)() @query.field("visit") def visit_resolver(obj, info, **kw): """ """ resolver = get_node_resolver("visit") return resolver(obj, info, **kw)() @query.field("snapshot") def snapshot_resolver(obj, info, **kw): """ """ resolver = get_node_resolver("snapshot") return resolver(obj, info, **kw)() @visit_status.field("snapshot") def visit_snapshot_resolver(obj, info, **kw): resolver = get_node_resolver("visit-snapshot") return resolver(obj, info, **kw)() @snapshot_branch.field("target") def snapshot_branch_target_resolver(obj, info, **kw): """ Snapshot branch target can be a revision or a release """ resolver_type = f"branch-{obj.type}" resolver = get_node_resolver(resolver_type) return resolver(obj, info, **kw)() @query.field("revision") def revision_resolver(obj, info, **kw): resolver = get_node_resolver("revision") return resolver(obj, info, **kw)() @query.field("release") def release_resolver(obj, info, **kw): resolver = get_node_resolver("release") return resolver(obj, info, **kw)() @query.field("directory") def directory_resolver(obj, info, **kw): resolver = get_node_resolver("directory") return resolver(obj, info, **kw)() @query.field("content") def content_resolver(obj, info, **kw): resolver = get_node_resolver("content") return resolver(obj, info, **kw)() # Connection resolvers # A connection resolver will return a sub class of BaseConnection @query.field("origins") def origins_resolver(obj, info, **kw): resolver = get_connection_resolver("origins") return resolver(obj, info, **kw)() @origin.field("visits") def visits_resolver(obj, info, **kw): resolver = get_connection_resolver("origin-visits") return resolver(obj, info, **kw)() @visit.field("status") def visitstatus_resolver(obj, info, **kw): resolver = get_connection_resolver("visit-status") return resolver(obj, info, **kw)() @snapshot.field("branches") def snapshot_branches_resolver(obj, info, **kw): resolver = get_connection_resolver("snapshot-branches") return resolver(obj, info, **kw)() +@directory.field("entries") +def directory_entry_resolver(obj, info, **kw): + resolver = get_connection_resolver("directory-entries") + return resolver(obj, info, **kw)() + + # Any other type of resolver @branch_target.type_resolver def union_resolver(obj, *_): """ Generic resolver for all the union types """ return obj.is_type_of() diff --git a/swh/graphql/schema/schema.graphql b/swh/graphql/schema/schema.graphql index 0845167..0a9a0d5 100644 --- a/swh/graphql/schema/schema.graphql +++ b/swh/graphql/schema/schema.graphql @@ -1,301 +1,305 @@ scalar SWHID scalar Sha1 scalar DateTime scalar DateTimeZone scalar BinaryText interface Node { id: ID! } interface SWHIDNode { id: SWHID! } interface SWHNode { id: Sha1! } type PageInfo { endCursor: String hasNextPage: Boolean! } type OriginConnection { edges: [OriginEdge] nodes: [Origin] pageInfo: PageInfo! totalCount: Int } type OriginEdge { cursor: String! node: Origin } type Origin implements SWHNode { id: Sha1! url: String! visits( first: Int after: String ): VisitConnection! } type VisitConnection { edges: [VisitEdge] nodes: [Visit] pageInfo: PageInfo! totalCount: Int } type VisitEdge { cursor: String! node: Visit } type Visit implements Node { id: ID! date: DateTime! type: String status( first: Int after: String ): VisitStatusConnection # origin: Origin # FIXME, this can be added later } type VisitStatusConnection { edges: [VisitStatusEdge] nodes: [VisitStatus] pageInfo: PageInfo! totalCount: Int } type VisitStatusEdge { cursor: String! node: VisitStatus } type VisitStatus implements Node { id: ID! status: String! date: DateTime! snapshot: Snapshot type: String } # FIXME, add OriginSnapshotConnection type Snapshot implements SWHNode { id: Sha1! branches( first: Int after: String ): BranchConnection # releases( # first: Int # after: String # ): ReleaseConnection # FIXME, add alias type as well } type BranchConnection { edges: [BranchConnectionEdge] nodes: [Branch] pageInfo: PageInfo! totalCount: Int } type BranchConnectionEdge { cursor: String! node: [Branch] } # FIXME, this can be Content, Directory, Snapshot, or Alias as well union BranchTarget = Revision | Release type Branch implements Node { id: ID! name: BinaryText type: String # FIXME, change to an enum target: BranchTarget } # type RevisionConnection { # } # type RevisionEdge { # } type Person { email: BinaryText name: BinaryText fullname: BinaryText } type Revision implements SWHNode { id: Sha1! message: BinaryText author: Person committer: Person date: DateTimeZone type: String directoryId: Sha1 directory: Directory parentIds: [Sha1] parents: [Revision] } # type ReleaseConnection { # } # type ReleasEdge { # } type Release implements SWHNode { id: Sha1! name: BinaryText message: BinaryText author: Person date: DateTimeZone } type DirectoryEntryConnection { edges: [DirectoryEntryEdge] nodes: [DirectoryEntry] pageInfo: PageInfo! totalCount: Int } type DirectoryEntryEdge { cursor: String! node: DirectoryEntry } union DirectoryTarget = Directory | Content type DirectoryEntry { name: BinaryText type: String # FIXME, replace with enum + targetId: Sha1 target: DirectoryTarget } type Directory implements SWHNode { id: Sha1! - entries: DirectoryEntryConnection + entries( + first: Int + after: String + ): DirectoryEntryConnection } type ContentChecksum { test: String } type ContentType { test: String } type ContentLanguage { test: String } type ContentLicense { test: String } type Content implements SWHIDNode { id: SWHID! checksum: ContentChecksum # data: filetype: ContentType language: ContentLanguage length: Int license: ContentLicense status: String } type Query { """ Get an origin with its url """ # FIXME, find some unique id to help cache # maybe base64 encode the URL origin( url: String! ): Origin """ Get a list of origins matching the given filters Can also be used to search for an origin """ # FIMXE, use Input types to make this cleaner origins( first: Int after: String ): OriginConnection """ Get a visit object with its id and/or origin and visit id """ # FIXME, find some unique id to help cache visit( originUrl: String! id: String! ): Visit """ Get a snapshot with SWHID """ snapshot( Sha1: String! ): Snapshot # """ # Get all the snapshot for the given origin # """ # originSnapshot( # originUrl: String! # first: Int # after: String # ): SnapshotConnection """ Get the revision with the given swhid """ revision( Sha1: String! ): Revision """ Get the release with the given swhid """ release( Sha1: String! ): Release """ Get the directory with the given swhid """ directory( Sha1: String! ): Directory """ Get the content with the given swhid """ content( SWHID: String! ): Content # """ # Search with the given swhid # """ # searchWithSwhid } diff --git a/swh/graphql/utils/paginate.py b/swh/graphql/utils/paginate.py deleted file mode 100644 index d05d162..0000000 --- a/swh/graphql/utils/paginate.py +++ /dev/null @@ -1,32 +0,0 @@ -""" -Pagination at the GraphQL level -This is a temporary fix and inefficient. -Should eventually be moved to the -backend (storage) level -""" - - -class PaginatedList: - def __init__(self, source): - """ - source can be of any iterable type - """ - self.source = source - - def get_items(self, first, after): - """ - Return the 'first' number of - items 'after' the given cursor - """ - return self.source[after : (after + first)] - - def get_item_objects(self, first, after): - """ - Return the 'first' number of - items 'after' the given cursor - with an item cursor - """ - return [ - {"curosr": first + index, "node": item} - for (index, item) in enumerate(self.get_items(first, after), 1) - ] diff --git a/swh/graphql/utils/utils.py b/swh/graphql/utils/utils.py index bf5aa54..c9afb87 100644 --- a/swh/graphql/utils/utils.py +++ b/swh/graphql/utils/utils.py @@ -1,22 +1,41 @@ import base64 +from swh.storage.interface import PagedResult + def encode(text): return base64.b64encode(bytes(text, "utf-8")).decode("utf-8") def get_encoded_cursor(cursor): if cursor is None: return None return base64.b64encode(bytes(cursor, "utf-8")).decode("utf-8") def get_decoded_cursor(cursor): if cursor is None: - return None + return 0 return base64.b64decode(cursor).decode("utf-8") def str_to_swid(str_swid): # FIXME, use core function return bytearray.fromhex(str_swid) + + +def paginated(source, first, after=0): + """ + Pagination at the GraphQL level + This is a temporary fix and inefficient. + Should eventually be moved to the + backend (storage) level + """ + + # FIXME, handle data errors here + end_cursor = int(after) + first + results = source[int(after) : end_cursor] + next_page_token = None + if len(source) > end_cursor: + next_page_token = str(end_cursor) + return PagedResult(results=results, next_page_token=next_page_token)