diff --git a/config/dev.yml b/config/dev.yml index 38cabbf..2e858f7 100644 --- a/config/dev.yml +++ b/config/dev.yml @@ -1,7 +1,11 @@ storage: cls: remote url: http://moma.internal.softwareheritage.org:5002 +search: + cls: remote + url: http://moma.internal.softwareheritage.org:5010 + debug: yes server-type: asgi diff --git a/config/staging.yml b/config/staging.yml index 10a0133..03e7c28 100644 --- a/config/staging.yml +++ b/config/staging.yml @@ -1,7 +1,11 @@ storage: cls: remote url: http://webapp.internal.staging.swh.network:5002 +search: + cls: remote + url: http://webapp.internal.staging.swh.network:5010 + debug: yes server-type: wsgi diff --git a/requirements-swh.txt b/requirements-swh.txt index d9cbc76..a771d3c 100644 --- a/requirements-swh.txt +++ b/requirements-swh.txt @@ -1,4 +1,5 @@ # Add here internal Software Heritage dependencies, one per line. swh.core[http] >= 0.3 # [http] is required by swh.core.pytest_plugin swh.storage swh.model +swh.search diff --git a/swh/graphql/backends/archive.py b/swh/graphql/backends/archive.py index cf1a4cb..bfd64e7 100644 --- a/swh/graphql/backends/archive.py +++ b/swh/graphql/backends/archive.py @@ -1,86 +1,79 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.graphql import server from swh.model.swhids import ObjectType class Archive: def __init__(self): self.storage = server.get_storage() def get_origin(self, url): return self.storage.origin_get([url])[0] - def get_origins(self, after=None, first=50, url_pattern=None): - # STORAGE-TODO - # Make them a single function in the backend - if url_pattern is None: - return self.storage.origin_list(page_token=after, limit=first) - - return self.storage.origin_search( - url_pattern=url_pattern, page_token=after, limit=first - ) + def get_origins(self, after=None, first=50): + return self.storage.origin_list(page_token=after, limit=first) def get_origin_visits(self, origin_url, after=None, first=50): return self.storage.origin_visit_get(origin_url, page_token=after, limit=first) def get_origin_visit(self, origin_url, visit_id): return self.storage.origin_visit_get_by(origin_url, visit_id) def get_origin_latest_visit(self, origin_url): return self.storage.origin_visit_get_latest(origin_url) def get_visit_status(self, origin_url, visit_id, after=None, first=50): return self.storage.origin_visit_status_get( origin_url, visit_id, page_token=after, limit=first ) def get_latest_visit_status(self, origin_url, visit_id): return self.storage.origin_visit_status_get_latest(origin_url, visit_id) def get_origin_snapshots(self, origin_url): return self.storage.origin_snapshot_get_all(origin_url) def get_snapshot_branches( self, snapshot, after=b"", first=50, target_types=[], name_include=None ): return self.storage.snapshot_get_branches( snapshot, branches_from=after, branches_count=first, target_types=target_types, branch_name_include_substring=name_include, ) def get_revisions(self, revision_ids): return self.storage.revision_get(revision_ids=revision_ids) def get_revision_log(self, revision_ids, after=None, first=50): return self.storage.revision_log(revisions=revision_ids, limit=first) def get_releases(self, release_ids): return self.storage.release_get(releases=release_ids) def get_directory_entries(self, directory_id, after=None, first=50): return self.storage.directory_get_entries( directory_id, limit=first, page_token=after ) def is_object_available(self, object_id: str, object_type: ObjectType) -> bool: mapping = { ObjectType.CONTENT: self.storage.content_missing_per_sha1_git, ObjectType.DIRECTORY: self.storage.directory_missing, ObjectType.RELEASE: self.storage.release_missing, ObjectType.REVISION: self.storage.revision_missing, ObjectType.SNAPSHOT: self.storage.snapshot_missing, } return not list(mapping[object_type]([object_id])) def get_contents(self, checksums: dict): return self.storage.content_find(checksums) def get_content_data(self, content_sha1): return self.storage.content_get_data(content_sha1) diff --git a/swh/graphql/backends/search.py b/swh/graphql/backends/search.py new file mode 100644 index 0000000..1f4e172 --- /dev/null +++ b/swh/graphql/backends/search.py @@ -0,0 +1,18 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from swh.graphql import server + + +class Search: + def __init__(self): + self.search = server.get_search() + + def get_origins(self, query: str, after=None, first=50): + return self.search.origin_search( + url_pattern=query, + page_token=after, + limit=first, + ) diff --git a/swh/graphql/resolvers/origin.py b/swh/graphql/resolvers/origin.py index 9ab2f27..bfbff96 100644 --- a/swh/graphql/resolvers/origin.py +++ b/swh/graphql/resolvers/origin.py @@ -1,34 +1,65 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from swh.graphql.backends import archive +from swh.graphql.backends import archive, search +from swh.model.model import Origin from swh.storage.interface import PagedResult from .base_connection import BaseConnection from .base_node import BaseSWHNode +from .search import SearchResultNode -class OriginNode(BaseSWHNode): +class BaseOriginNode(BaseSWHNode): + def is_type_of(self): + # is_type_of is required only when resolving a UNION type + # This is for ariadne to return the right type + return "Origin" + + +class OriginNode(BaseOriginNode): """ Node resolver for an origin requested directly with its URL """ def _get_node_data(self): return archive.Archive().get_origin(self.kwargs.get("url")) +class TargetOriginNode(BaseOriginNode): + """ + Node resolver for an origin requested as a target + """ + + obj: SearchResultNode + + def _get_node_data(self): + # The target origin URL is guaranteed to exist in the archive + # Hence returning the origin object without any explicit check in the archive + # This assumes that the search index and archive are in sync + return Origin(self.obj.target_url) + + class OriginConnection(BaseConnection): """ Connection resolver for the origins """ - _node_class = OriginNode + _node_class = BaseOriginNode def _get_paged_result(self) -> PagedResult: + # Use the search backend if a urlPattern is given + if self.kwargs.get("urlPattern"): + origins = search.Search().get_origins( + query=self.kwargs.get("urlPattern"), + after=self._get_after_arg(), + first=self._get_first_arg(), + ) + results = [Origin(ori["url"]) for ori in origins.results] + return PagedResult(results=results, next_page_token=origins.next_page_token) + # Use the archive backend by default return archive.Archive().get_origins( - after=self._get_after_arg(), - first=self._get_first_arg(), - url_pattern=self.kwargs.get("urlPattern"), + after=self._get_after_arg(), first=self._get_first_arg() ) diff --git a/swh/graphql/resolvers/resolver_factory.py b/swh/graphql/resolvers/resolver_factory.py index ec5c566..77b376c 100644 --- a/swh/graphql/resolvers/resolver_factory.py +++ b/swh/graphql/resolvers/resolver_factory.py @@ -1,81 +1,83 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from .content import ContentNode, HashContentNode, TargetContentNode from .directory import DirectoryNode, RevisionDirectoryNode, TargetDirectoryNode from .directory_entry import DirectoryEntryConnection -from .origin import OriginConnection, OriginNode +from .origin import OriginConnection, OriginNode, TargetOriginNode from .release import ReleaseNode, TargetReleaseNode from .revision import ( LogRevisionConnection, ParentRevisionConnection, RevisionNode, TargetRevisionNode, ) -from .search import ResolveSwhidConnection +from .search import ResolveSwhidConnection, SearchConnection from .snapshot import ( OriginSnapshotConnection, SnapshotNode, TargetSnapshotNode, VisitSnapshotNode, ) from .snapshot_branch import SnapshotBranchConnection from .visit import LatestVisitNode, OriginVisitConnection, OriginVisitNode from .visit_status import LatestVisitStatusNode, VisitStatusConnection def get_node_resolver(resolver_type): # FIXME, replace with a proper factory method mapping = { "origin": OriginNode, "visit": OriginVisitNode, "latest-visit": LatestVisitNode, "latest-status": LatestVisitStatusNode, "visit-snapshot": VisitSnapshotNode, "snapshot": SnapshotNode, "branch-revision": TargetRevisionNode, "branch-release": TargetReleaseNode, "branch-directory": TargetDirectoryNode, "branch-content": TargetContentNode, "branch-snapshot": TargetSnapshotNode, "revision": RevisionNode, "revision-directory": RevisionDirectoryNode, "release": ReleaseNode, "release-revision": TargetRevisionNode, "release-release": TargetReleaseNode, "release-directory": TargetDirectoryNode, "release-content": TargetContentNode, "directory": DirectoryNode, "content": ContentNode, "content-by-hash": HashContentNode, "dir-entry-dir": TargetDirectoryNode, "dir-entry-file": TargetContentNode, + "search-result-origin": TargetOriginNode, "search-result-snapshot": TargetSnapshotNode, "search-result-revision": TargetRevisionNode, "search-result-release": TargetReleaseNode, "search-result-directory": TargetDirectoryNode, "search-result-content": TargetContentNode, } if resolver_type not in mapping: raise AttributeError(f"Invalid node type: {resolver_type}") return mapping[resolver_type] def get_connection_resolver(resolver_type): # FIXME, replace with a proper factory method mapping = { "origins": OriginConnection, "origin-visits": OriginVisitConnection, "origin-snapshots": OriginSnapshotConnection, "visit-status": VisitStatusConnection, "snapshot-branches": SnapshotBranchConnection, "revision-parents": ParentRevisionConnection, "revision-log": LogRevisionConnection, "directory-entries": DirectoryEntryConnection, "resolve-swhid": ResolveSwhidConnection, + "search": SearchConnection, } if resolver_type not in mapping: raise AttributeError(f"Invalid connection type: {resolver_type}") return mapping[resolver_type] diff --git a/swh/graphql/resolvers/resolvers.py b/swh/graphql/resolvers/resolvers.py index cc0e795..054b94c 100644 --- a/swh/graphql/resolvers/resolvers.py +++ b/swh/graphql/resolvers/resolvers.py @@ -1,290 +1,298 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """ High level resolvers """ # Any schema attribute can be resolved by any of the following ways # and in the following priority order # - In this module using a decorator (eg: @visitstatus.field("snapshot") # Every object (type) is expected to resolve this way as they can accept arguments # eg: origin.visits takes arguments to paginate # - As a property in the Node object (eg: resolvers.visit.BaseVisitNode.id) # Every scalar is expected to resolve this way # - As an attribute/item in the object/dict returned by a backend (eg: Origin.url) from ariadne import ObjectType, UnionType from graphql.type import GraphQLResolveInfo from swh.graphql import resolvers as rs from swh.graphql.utils import utils from .resolver_factory import get_connection_resolver, get_node_resolver query: ObjectType = ObjectType("Query") origin: ObjectType = ObjectType("Origin") visit: ObjectType = ObjectType("Visit") visit_status: ObjectType = ObjectType("VisitStatus") snapshot: ObjectType = ObjectType("Snapshot") snapshot_branch: ObjectType = ObjectType("Branch") revision: ObjectType = ObjectType("Revision") release: ObjectType = ObjectType("Release") directory: ObjectType = ObjectType("Directory") directory_entry: ObjectType = ObjectType("DirectoryEntry") search_result: ObjectType = ObjectType("SearchResult") binary_string: ObjectType = ObjectType("BinaryString") branch_target: UnionType = UnionType("BranchTarget") release_target: UnionType = UnionType("ReleaseTarget") directory_entry_target: UnionType = UnionType("DirectoryEntryTarget") search_result_target: UnionType = UnionType("SearchResultTarget") # Node resolvers # A node resolver should return an instance of BaseNode @query.field("origin") def origin_resolver(obj: None, info: GraphQLResolveInfo, **kw) -> rs.origin.OriginNode: """ """ resolver = get_node_resolver("origin") return resolver(obj, info, **kw) @origin.field("latestVisit") def latest_visit_resolver( obj: rs.origin.OriginNode, info: GraphQLResolveInfo, **kw ) -> rs.visit.LatestVisitNode: """ """ resolver = get_node_resolver("latest-visit") return resolver(obj, info, **kw) @query.field("visit") def visit_resolver( obj: None, info: GraphQLResolveInfo, **kw ) -> rs.visit.OriginVisitNode: """ """ resolver = get_node_resolver("visit") return resolver(obj, info, **kw) @visit.field("latestStatus") def latest_visit_status_resolver( obj, info: GraphQLResolveInfo, **kw ) -> rs.visit_status.LatestVisitStatusNode: """ """ resolver = get_node_resolver("latest-status") return resolver(obj, info, **kw) @query.field("snapshot") def snapshot_resolver( obj: None, info: GraphQLResolveInfo, **kw ) -> rs.snapshot.SnapshotNode: """ """ resolver = get_node_resolver("snapshot") return resolver(obj, info, **kw) @visit_status.field("snapshot") def visit_snapshot_resolver( obj, info: GraphQLResolveInfo, **kw ) -> rs.snapshot.VisitSnapshotNode: resolver = get_node_resolver("visit-snapshot") return resolver(obj, info, **kw) @snapshot_branch.field("target") def snapshot_branch_target_resolver( obj: rs.snapshot_branch.SnapshotBranchNode, info: GraphQLResolveInfo, **kw ): """ Snapshot branch target can be a revision or a release """ resolver_type = f"branch-{obj.type}" resolver = get_node_resolver(resolver_type) return resolver(obj, info, **kw) @query.field("revision") def revision_resolver( obj: None, info: GraphQLResolveInfo, **kw ) -> rs.revision.RevisionNode: resolver = get_node_resolver("revision") return resolver(obj, info, **kw) @revision.field("directory") def revision_directory_resolver( obj, info: GraphQLResolveInfo, **kw ) -> rs.directory.RevisionDirectoryNode: resolver = get_node_resolver("revision-directory") return resolver(obj, info, **kw) @query.field("release") def release_resolver( obj: None, info: GraphQLResolveInfo, **kw ) -> rs.release.ReleaseNode: resolver = get_node_resolver("release") return resolver(obj, info, **kw) @release.field("target") def release_target_resolver(obj, info: GraphQLResolveInfo, **kw): """ release target can be a release, revision, directory or content obj is release here, target type is obj.target_type """ resolver_type = f"release-{obj.target_type.value}" resolver = get_node_resolver(resolver_type) return resolver(obj, info, **kw) @query.field("directory") def directory_resolver( obj: None, info: GraphQLResolveInfo, **kw ) -> rs.directory.DirectoryNode: resolver = get_node_resolver("directory") return resolver(obj, info, **kw) @directory_entry.field("target") def directory_entry_target_resolver( obj: rs.directory_entry.DirectoryEntryNode, info: GraphQLResolveInfo, **kw ): """ directory entry target can be a directory or a content """ resolver_type = f"dir-entry-{obj.type}" resolver = get_node_resolver(resolver_type) return resolver(obj, info, **kw) @query.field("content") def content_resolver( obj: None, info: GraphQLResolveInfo, **kw ) -> rs.content.ContentNode: resolver = get_node_resolver("content") return resolver(obj, info, **kw) @search_result.field("target") def search_result_target_resolver( obj: rs.search.SearchResultNode, info: GraphQLResolveInfo, **kw ): resolver_type = f"search-result-{obj.type}" resolver = get_node_resolver(resolver_type) return resolver(obj, info, **kw) @query.field("contentByHash") def content_by_hash_resolver( obj: None, info: GraphQLResolveInfo, **kw ) -> rs.content.ContentNode: resolver = get_node_resolver("content-by-hash") return resolver(obj, info, **kw) # Connection resolvers # A connection resolver should return an instance of BaseConnection @query.field("origins") def origins_resolver( obj: None, info: GraphQLResolveInfo, **kw ) -> rs.origin.OriginConnection: resolver = get_connection_resolver("origins") return resolver(obj, info, **kw) @origin.field("visits") def visits_resolver( obj: rs.origin.OriginNode, info: GraphQLResolveInfo, **kw ) -> rs.visit.OriginVisitConnection: resolver = get_connection_resolver("origin-visits") return resolver(obj, info, **kw) @origin.field("snapshots") def origin_snapshots_resolver( obj: rs.origin.OriginNode, info: GraphQLResolveInfo, **kw ) -> rs.snapshot.OriginSnapshotConnection: """ """ resolver = get_connection_resolver("origin-snapshots") return resolver(obj, info, **kw) @visit.field("status") def visitstatus_resolver( obj, info: GraphQLResolveInfo, **kw ) -> rs.visit_status.VisitStatusConnection: resolver = get_connection_resolver("visit-status") return resolver(obj, info, **kw) @snapshot.field("branches") def snapshot_branches_resolver( obj, info: GraphQLResolveInfo, **kw ) -> rs.snapshot_branch.SnapshotBranchConnection: resolver = get_connection_resolver("snapshot-branches") return resolver(obj, info, **kw) @revision.field("parents") def revision_parents_resolver( obj, info: GraphQLResolveInfo, **kw ) -> rs.revision.ParentRevisionConnection: resolver = get_connection_resolver("revision-parents") return resolver(obj, info, **kw) # @revision.field("revisionLog") # def revision_log_resolver(obj, info, **kw): # resolver = get_connection_resolver("revision-log") # return resolver(obj, info, **kw) @directory.field("entries") def directory_entry_resolver( obj, info: GraphQLResolveInfo, **kw ) -> rs.directory_entry.DirectoryEntryConnection: resolver = get_connection_resolver("directory-entries") return resolver(obj, info, **kw) @query.field("resolveSwhid") def search_swhid_resolver( obj, info: GraphQLResolveInfo, **kw ) -> rs.search.ResolveSwhidConnection: resolver = get_connection_resolver("resolve-swhid") return resolver(obj, info, **kw) +@query.field("search") +def search_resolver( + obj, info: GraphQLResolveInfo, **kw +) -> rs.search.ResolveSwhidConnection: + resolver = get_connection_resolver("search") + return resolver(obj, info, **kw) + + # Any other type of resolver @release_target.type_resolver @directory_entry_target.type_resolver @branch_target.type_resolver @search_result_target.type_resolver def union_resolver(obj, *_) -> str: """ Generic resolver for all the union types """ return obj.is_type_of() @binary_string.field("text") def binary_string_text_resolver(obj, *args, **kw): return obj.decode(utils.ENCODING, "replace") @binary_string.field("base64") def binary_string_base64_resolver(obj, *args, **kw): return utils.get_b64_string(obj) diff --git a/swh/graphql/resolvers/search.py b/swh/graphql/resolvers/search.py index ed7092b..2eed876 100644 --- a/swh/graphql/resolvers/search.py +++ b/swh/graphql/resolvers/search.py @@ -1,31 +1,49 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -from swh.graphql.backends import archive +from swh.graphql.backends import archive, search from swh.storage.interface import PagedResult from .base_connection import BaseConnection from .base_node import BaseNode class SearchResultNode(BaseNode): """ """ class ResolveSwhidConnection(BaseConnection): _node_class = SearchResultNode def _get_paged_result(self) -> PagedResult: swhid = self.kwargs.get("swhid") results = [] if archive.Archive().is_object_available(swhid.object_id, swhid.object_type): results = [ { "target_hash": swhid.object_id, "type": swhid.object_type.name.lower(), } ] return PagedResult(results=results) + + +class SearchConnection(BaseConnection): + + _node_class = SearchResultNode + + def _get_paged_result(self) -> PagedResult: + origins = search.Search().get_origins( + query=self.kwargs.get("query"), + after=self._get_after_arg(), + first=self._get_first_arg(), + ) + + # FIXME hard coding type to origin for now, as it is the only searchable object + results = [ + {"target_url": ori["url"], "type": "origin"} for ori in origins.results + ] + return PagedResult(results=results, next_page_token=origins.next_page_token) diff --git a/swh/graphql/schema/schema.graphql b/swh/graphql/schema/schema.graphql index 1843186..9696926 100644 --- a/swh/graphql/schema/schema.graphql +++ b/swh/graphql/schema/schema.graphql @@ -1,1043 +1,1063 @@ """ SoftWare Heritage persistent Identifier """ scalar SWHID """ ISO-8601 encoded date string """ scalar DateTime """ Content identifier in the form hash-type:hash-value """ scalar ContentHash """ Object with an id """ interface Node { """ Id of the object. This is for caching purpose and should not be used outside the GraphQL API """ id: ID! } """ SWH merkle node object with a SWHID """ interface MerkleNode { """ SWHID of the object """ swhid: SWHID! } """ Information about pagination """ type PageInfo { """ Cursor to request the next page in the connection """ endCursor: String """ Are there more pages in the connection? """ hasNextPage: Boolean! } """ Binary strings; different encodings """ type BinaryString { """ Utf-8 encoded value, any non Unicode character will be replaced """ text: String """ base64 encoded value """ base64: String } """ Connection to origins """ type OriginConnection { """ List of origin edges """ edges: [OriginEdge] """ List of origin objects """ nodes: [Origin] """ Information for pagination """ pageInfo: PageInfo! """ Total number of origin objects in the connection """ totalCount: Int } """ Edge in origin connection """ type OriginEdge { """ Cursor to request the next page after the item """ cursor: String! """ Origin object """ node: Origin } """ A software origin object """ type Origin implements Node { """ Unique identifier """ id: ID! """ Origin URL """ url: String! """ Connection to all the visit objects for the origin """ visits( """ Returns the first _n_ elements from the list """ first: Int! """ Returns the page after this cursor """ after: String ): VisitConnection! """ Latest visit object for the origin """ latestVisit: Visit """ Connection to all the snapshots for the origin """ snapshots( """ Returns the first _n_ elements from the list """ first: Int! """ Returns the page after this cursor """ after: String ): SnapshotConnection } """ Connection to origin visits """ type VisitConnection { """ List of visit edges """ edges: [VisitEdge] """ List of visit objects """ nodes: [Visit] """ Information for pagination """ pageInfo: PageInfo! """ Total number of visit objects in the connection """ totalCount: Int } """ Edge in origin visit connection """ type VisitEdge { """ Cursor to request the next page after the item """ cursor: String! """ Visit object """ node: Visit } """ An origin visit object """ type Visit implements Node { """ Unique identifier """ id: ID! """ Visit number for the origin """ visitId: Int """ Visit date ISO-8601 encoded """ date: DateTime! """ Type of the origin visited. Eg: git/hg/svn/tar/deb """ type: String """ Connection to all the status objects for the visit """ status( """ Returns the first _n_ elements from the list """ first: Int """ Returns the page after this cursor """ after: String ): VisitStatusConnection """ Latest status object for the Visit """ latestStatus: VisitStatus } """ Connection to visit status """ type VisitStatusConnection { """ List of visit status edges """ edges: [VisitStatusEdge] """ List of visit status objects """ nodes: [VisitStatus] """ Information for pagination """ pageInfo: PageInfo! """ Total number of visit status objects in the connection """ totalCount: Int } """ Edge in visit status connection """ type VisitStatusEdge { """ Cursor to request the next page after the item """ cursor: String! """ Visit status object """ node: VisitStatus } """ A visit status object """ type VisitStatus { """ Status string of the visit (either full, partial or ongoing) """ status: String! """ ISO-8601 encoded date string """ date: DateTime! """ Snapshot object """ snapshot: Snapshot """ Type of the origin visited. Eg: git/hg/svn/tar/deb """ type: String } """ Connection to snapshots """ type SnapshotConnection { """ List of snapshot edges """ edges: [SnapshotEdge] """ List of snapshot objects """ nodes: [Snapshot] """ Information for pagination """ pageInfo: PageInfo! """ Total number of snapshot objects in the connection """ totalCount: Int } """ Edge in snapshot connection """ type SnapshotEdge { """ Cursor to request the next page after the item """ cursor: String! """ Snapshot object """ node: Snapshot } """ A snapshot object """ type Snapshot implements MerkleNode & Node { """ Unique identifier """ id: ID! """ SWHID of the snapshot object """ swhid: SWHID! """ Connection to all the snapshot branches """ branches( """ Returns the first _n_ elements from the list """ first: Int! """ Returns the page after this cursor """ after: String """ Filter by branch target types """ types: [BranchTargetType] """ Filter by branch name """ nameInclude: String ): BranchConnection } """ Connection to snapshot branches """ type BranchConnection { """ List of branch edges """ edges: [BranchConnectionEdge] """ List of branch objects """ nodes: [Branch] """ Information for pagination """ pageInfo: PageInfo! """ Total number of branch objects in the connection """ totalCount: Int } """ Edge in snapshot branch connection """ type BranchConnectionEdge { """ Cursor to request the next page after the item """ cursor: String! """ Branch object """ node: Branch } """ A user object """ type Person { """ User's email address """ email: BinaryString """ User's name """ name: BinaryString """ User's full name """ fullname: BinaryString } """ Possible branch target objects """ union BranchTarget = Revision | Release | Branch | Content | Directory | Snapshot """ Possible Branch target types """ enum BranchTargetType { revision release alias content directory snapshot } """ A snapshot branch object """ type Branch { """ Branch name """ name: BinaryString """ Type of Branch target """ type: BranchTargetType """ Branch target object """ target: BranchTarget } """ Connection to revisions """ type RevisionConnection { """ List of revision edges """ edges: [RevisionEdge] """ List of revision objects """ nodes: [Revision] """ Information for pagination """ pageInfo: PageInfo! """ Total number of revision objects in the connection """ totalCount: Int } """ Edge in revision connection """ type RevisionEdge { """ Cursor to request the next page after the item """ cursor: String! """ Revision object """ node: Revision } """ A revision object """ type Revision implements MerkleNode & Node { """ Unique identifier """ id: ID! """ SWHID of the revision object """ swhid: SWHID! """ Message associated to the revision """ message: BinaryString """ """ author: Person """ """ committer: Person """ Revision date ISO-8601 encoded """ date: DateTime """ Type of the revision, eg: git/hg """ type: String """ The unique directory object that revision points to """ directory: Directory """ Connection to all the parents of the revision """ parents( """ Returns the first _n_ elements from the list """ first: Int """ Returns the page after this cursor """ after: String ): RevisionConnection """ Connection to all the revisions heading to this one aka the commit log """ revisionLog( """ Returns the first _n_ elements from the list """ first: Int! """ Returns the page after the cursor """ after: String ): RevisionConnection } """ Possible release target objects """ union ReleaseTarget = Release | Revision | Directory | Content """ Possible release target types """ enum ReleaseTargetType { release revision content directory } """ A release object """ type Release implements MerkleNode & Node { """ Unique identifier """ id: ID! """ SWHID of the release object """ swhid: SWHID! """ The name of the release """ name: BinaryString """ The message associated to the release """ message: BinaryString """ """ author: Person """ Release date ISO-8601 encoded """ date: DateTime """ Type of release target """ targetType: ReleaseTargetType """ Release target object """ target: ReleaseTarget } """ Connection to directory entries """ type DirectoryEntryConnection { """ List of directory entry edges """ edges: [DirectoryEntryEdge] """ List of directory entry objects """ nodes: [DirectoryEntry] """ Information for pagination """ pageInfo: PageInfo! """ Total number of directory entry objects in the connection """ totalCount: Int } """ Edge in directory entry connection """ type DirectoryEntryEdge { """ Cursor to request the next page after the item """ cursor: String! """ Directory entry object """ node: DirectoryEntry } """ Possible directory entry target objects """ union DirectoryEntryTarget = Directory | Content """ Possible directory entry types """ enum DirectoryEntryType { dir file rev } """ A directory entry object """ type DirectoryEntry { """ The directory entry name """ name: BinaryString """ Directory entry object type; can be file, dir or rev """ type: DirectoryEntryType """ Directory entry target object """ target: DirectoryEntryTarget } """ A directory object """ type Directory implements MerkleNode & Node { """ Unique identifier """ id: ID! """ SWHID of the directory object """ swhid: SWHID! """ Connection to the directory entries """ entries( """ Returns the first _n_ elements from the list """ first: Int """ Returns the page after this cursor """ after: String ): DirectoryEntryConnection } """ An object with different content checksums """ type ContentChecksum { blake2s256: String sha1: String sha1_git: String sha256: String } """ Object with different content data representations """ type ContentData { """ URL to download the file data """ url: String } type ContentFileType { """ Detected content encoding """ encoding: String """ Detected MIME type of the content """ mimetype: String } type ContentLanguage { """ Detected programming language if any """ lang: String } type ContentLicense { """ Array of strings containing the detected license names """ licenses: [String] } """ A content object """ type Content implements MerkleNode & Node { """ Unique identifier """ id: ID! """ SWHID of the content object """ swhid: SWHID! """ Checksums for the content """ checksum: ContentChecksum """ Length of the content in bytes """ length: Int """ Content status, visible or hidden """ status: String """ File content """ data: ContentData """ Information about the content MIME type """ fileType: ContentFileType """ Information about the programming language used in the content """ language: ContentLanguage """ Information about the license of the content """ license: ContentLicense } """ Connection to SearchResults """ type SearchResultConnection { """ List of SearchResult edges """ edges: [SearchResultEdge] """ List of SearchResult objects """ nodes: [SearchResult] """ Information for pagination """ pageInfo: PageInfo! """ Total number of result objects in the connection """ totalCount: Int } """ Edge in SearchResult connection """ type SearchResultEdge { """ Cursor to request the next page after the item """ cursor: String! """ SearchResult object """ node: SearchResult } union SearchResultTarget = Origin | Revision | Release | Content | Directory | Snapshot enum SearchResultTargetType { origin revision release content directory snapshot } """ A SearchResult object """ type SearchResult { """ Result target type """ type: SearchResultTargetType """ Result target object """ target: SearchResultTarget } """ The query root of the GraphQL interface. """ type Query { """ Get an origin with its url """ origin( """ URL of the Origin """ url: String! ): Origin """ Get a Connection to all the origins """ origins( """ Returns the first _n_ elements from the list """ first: Int! """ Returns the page after the cursor """ after: String """ Filter origins with a URL pattern """ urlPattern: String ): OriginConnection """ Get the visit object with an origin URL and a visit id """ visit( """ URL of the origin """ originUrl: String! """ Visit id to get """ visitId: Int! ): Visit """ Get the snapshot with a SWHID """ snapshot( """ SWHID of the snapshot object """ swhid: SWHID! ): Snapshot """ Get the revision with a SWHID """ revision( """ SWHID of the revision object """ swhid: SWHID! ): Revision """ Get the release with a SWHID """ release( """ SWHID of the release object """ swhid: SWHID! ): Release """ Get the directory with a SWHID """ directory( """ SWHID of the directory object """ swhid: SWHID! ): Directory """ Get the content with a SWHID """ content( """ SWHID of the content object """ swhid: SWHID! ): Content """ Get the content by one or more hashes Use multiple hashes for an accurate result """ contentByHash( """ List of hashType:hashValue strings """ checksums: [ContentHash]! ): Content """ Resolve the given SWHID to an object """ resolveSwhid( """ SWHID to look for """ swhid: SWHID! ): SearchResultConnection! + + """ + Search in SWH + """ + search( + """ + String to search for + """ + query: String! + + """ + Returns the first _n_ elements from the list + """ + first: Int! + + """ + Returns the page after the cursor + """ + after: String + ): SearchResultConnection! } diff --git a/swh/graphql/server.py b/swh/graphql/server.py index 771da19..3d3bfea 100644 --- a/swh/graphql/server.py +++ b/swh/graphql/server.py @@ -1,75 +1,84 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os from typing import Any, Dict, Optional from swh.core import config -from swh.storage import get_storage as get_swhstorage +from swh.search import get_search as get_swh_search +from swh.storage import get_storage as get_swh_storage graphql_cfg = None storage = None +search = None def get_storage(): global storage if not storage: - storage = get_swhstorage(**graphql_cfg["storage"]) + storage = get_swh_storage(**graphql_cfg["storage"]) return storage +def get_search(): + global search + if not search: + search = get_swh_search(**graphql_cfg["search"]) + return search + + def load_and_check_config(config_path: Optional[str]) -> Dict[str, Any]: """Check the minimal configuration is set to run the api or raise an error explanation. Args: config_path: Path to the configuration file to load Raises: Error if the setup is not as expected Returns: configuration as a dict """ if not config_path: raise EnvironmentError("Configuration file must be defined") if not os.path.exists(config_path): raise FileNotFoundError(f"Configuration file {config_path} does not exist") cfg = config.read(config_path) if "storage" not in cfg: raise KeyError("Missing 'storage' configuration") return cfg def make_app_from_configfile(): """Loading the configuration from a configuration file. SWH_CONFIG_FILENAME environment variable defines the configuration path to load. """ from .app import schema global graphql_cfg if not graphql_cfg: config_path = os.environ.get("SWH_CONFIG_FILENAME") graphql_cfg = load_and_check_config(config_path) server_type = graphql_cfg.get("server-type") if server_type == "asgi": from ariadne.asgi import GraphQL application = GraphQL(schema) else: from ariadne.wsgi import GraphQL application = GraphQL(schema) return application diff --git a/swh/graphql/tests/conftest.py b/swh/graphql/tests/conftest.py index da619ef..ef045de 100644 --- a/swh/graphql/tests/conftest.py +++ b/swh/graphql/tests/conftest.py @@ -1,48 +1,60 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from ariadne import graphql_sync from flask import Flask, jsonify, request import pytest from swh.graphql import server as app_server from swh.graphql.app import schema -from swh.storage import get_storage as get_swhstorage +from swh.search import get_search as get_swh_search +from swh.storage import get_storage as get_swh_storage -from .data import populate_dummy_data +from .data import populate_dummy_data, populate_search_data @pytest.fixture def storage(): - storage = get_swhstorage(cls="memory") + storage = get_swh_storage(cls="memory") # set the global var to use the in-memory storage app_server.storage = storage # populate the in-memory storage populate_dummy_data(storage) return storage @pytest.fixture -def test_app(storage): +def search(): + search = get_swh_search("memory") + # set the global var to use the in-memory search + app_server.search = search + search.initialize() + # populate the in-memory search + populate_search_data(search) + return search + + +@pytest.fixture +def test_app(storage, search): app = Flask(__name__) @app.route("/", methods=["POST"]) def graphql_server(): # GraphQL queries are always sent as POST data = request.get_json() success, result = graphql_sync( schema, data, context_value=request, debug=app.debug ) status_code = 200 if success else 400 return jsonify(result), status_code yield app @pytest.fixture def client(test_app): with test_app.test_client() as client: yield client diff --git a/swh/graphql/tests/data.py b/swh/graphql/tests/data.py index 8b32edc..468618f 100644 --- a/swh/graphql/tests/data.py +++ b/swh/graphql/tests/data.py @@ -1,36 +1,40 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.model.tests import swh_model_data def populate_dummy_data(storage): for object_type, objects in swh_model_data.TEST_OBJECTS.items(): method = getattr(storage, object_type + "_add") method(objects) +def populate_search_data(search): + search.origin_update({"url": origin.url} for origin in get_origins()) + + def get_origins(): return swh_model_data.ORIGINS def get_snapshots(): return swh_model_data.SNAPSHOTS def get_releases(): return swh_model_data.RELEASES def get_revisions(): return swh_model_data.REVISIONS def get_contents(): return swh_model_data.CONTENTS def get_directories(): return swh_model_data.DIRECTORIES diff --git a/swh/graphql/tests/functional/test_search.py b/swh/graphql/tests/functional/test_search.py new file mode 100644 index 0000000..97fd560 --- /dev/null +++ b/swh/graphql/tests/functional/test_search.py @@ -0,0 +1,64 @@ +# Copyright (C) 2022 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from . import utils + + +def test_search_origins(client): + query_str = """ + { + search(query: "fox", first: 1) { + nodes { + type + target { + ...on Origin { + url + latestVisit { + date + } + } + } + } + pageInfo { + hasNextPage + endCursor + } + } + } + """ + data, _ = utils.get_query_response(client, query_str) + assert len(data["search"]["nodes"]) == 1 + assert data == { + "search": { + "nodes": [ + { + "target": { + "url": "https://somewhere.org/den/fox", + "latestVisit": {"date": "2018-11-27T17:20:39+00:00"}, + }, + "type": "origin", + } + ], + "pageInfo": {"endCursor": "MQ==", "hasNextPage": True}, + } + } + + +def test_search_missing_url(client): + query_str = """ + { + search(query: "missing-fox", first: 1) { + nodes { + type + } + pageInfo { + hasNextPage + endCursor + } + } + } + """ + data, _ = utils.get_query_response(client, query_str) + assert len(data["search"]["nodes"]) == 0