diff --git a/swh/graphql/backends/archive.py b/swh/graphql/backends/archive.py index 2fe04cc..d1bf0d9 100644 --- a/swh/graphql/backends/archive.py +++ b/swh/graphql/backends/archive.py @@ -1,136 +1,147 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os from typing import Any, Dict, Iterable, List, Optional from swh.graphql import server from swh.model.model import ( Content, DirectoryEntry, Origin, OriginVisit, OriginVisitStatus, Release, Revision, Sha1, Sha1Git, ) from swh.model.swhids import ObjectType from swh.storage.interface import PagedResult, PartialBranches, StorageInterface class Archive: def __init__(self) -> None: self.storage: StorageInterface = server.get_storage() def get_origin(self, url: str) -> Optional[Origin]: return list(self.storage.origin_get(origins=[url]))[0] def get_origins( self, after: Optional[str] = None, first: int = 50 ) -> PagedResult[Origin]: return self.storage.origin_list(page_token=after, limit=first) def get_origin_visits( self, origin_url: str, after: Optional[str] = None, first: int = 50 ) -> PagedResult[OriginVisit]: return self.storage.origin_visit_get( origin=origin_url, page_token=after, limit=first ) def get_origin_visit(self, origin_url: str, visit_id: int) -> Optional[OriginVisit]: return self.storage.origin_visit_get_by(origin=origin_url, visit=visit_id) - def get_origin_latest_visit(self, origin_url: str) -> Optional[OriginVisit]: - return self.storage.origin_visit_get_latest(origin=origin_url) + def get_origin_latest_visit( + self, + origin_url: str, + visit_type: Optional[str] = None, + allowed_statuses: Optional[List[str]] = None, + require_snapshot: bool = False, + ) -> Optional[OriginVisit]: + return self.storage.origin_visit_get_latest( + origin=origin_url, + type=visit_type, + allowed_statuses=allowed_statuses, + require_snapshot=require_snapshot, + ) def get_visit_status( self, origin_url: str, visit_id: int, after: Optional[str] = None, first: int = 50, ) -> PagedResult[OriginVisitStatus]: return self.storage.origin_visit_status_get( origin=origin_url, visit=visit_id, page_token=after, limit=first ) def get_latest_visit_status( self, origin_url: str, visit_id: int, allowed_statuses: Optional[List[str]] = None, require_snapshot: bool = False, ) -> Optional[OriginVisitStatus]: return self.storage.origin_visit_status_get_latest( origin_url=origin_url, visit=visit_id, allowed_statuses=allowed_statuses, require_snapshot=require_snapshot, ) def get_origin_snapshots(self, origin_url: str) -> List[Sha1Git]: return self.storage.origin_snapshot_get_all(origin_url=origin_url) def get_snapshot_branches( self, snapshot: Sha1Git, after: bytes = b"", first: int = 50, target_types: Optional[List[str]] = None, name_include: Optional[bytes] = None, name_exclude_prefix: Optional[bytes] = None, ) -> Optional[PartialBranches]: return self.storage.snapshot_get_branches( snapshot_id=snapshot, branches_from=after, branches_count=first, target_types=target_types, branch_name_include_substring=name_include, branch_name_exclude_prefix=name_exclude_prefix, ) def get_revisions(self, revision_ids: List[Sha1Git]) -> List[Optional[Revision]]: return self.storage.revision_get(revision_ids=revision_ids) def get_revision_log( self, revision_ids: List[Sha1Git], first: int = 50 ) -> Iterable[Optional[Dict[str, Any]]]: return self.storage.revision_log(revisions=revision_ids, limit=first) def get_releases(self, release_ids: List[Sha1Git]) -> List[Optional[Release]]: return self.storage.release_get(releases=release_ids) def get_directory_entry_by_path( self, directory_id: Sha1Git, path: str ) -> Optional[Dict[str, Any]]: paths = [x.encode() for x in path.strip(os.path.sep).split(os.path.sep)] return self.storage.directory_entry_get_by_path( directory=directory_id, paths=paths ) def get_directory_entries( self, directory_id: Sha1Git, after: Optional[bytes] = None, first: int = 50 ) -> Optional[PagedResult[DirectoryEntry]]: return self.storage.directory_get_entries( directory_id=directory_id, limit=first, page_token=after ) def is_object_available(self, object_id: bytes, object_type: ObjectType) -> bool: mapping = { ObjectType.CONTENT: self.storage.content_missing_per_sha1_git, ObjectType.DIRECTORY: self.storage.directory_missing, ObjectType.RELEASE: self.storage.release_missing, ObjectType.REVISION: self.storage.revision_missing, ObjectType.SNAPSHOT: self.storage.snapshot_missing, } return not list(mapping[object_type]([object_id])) def get_contents(self, checksums: Dict[str, Any]) -> List[Content]: return self.storage.content_find(content=checksums) def get_content_data(self, content_sha1: Sha1) -> Optional[bytes]: return self.storage.content_get_data(content=content_sha1) diff --git a/swh/graphql/resolvers/visit.py b/swh/graphql/resolvers/visit.py index 27b30fd..bebdb64 100644 --- a/swh/graphql/resolvers/visit.py +++ b/swh/graphql/resolvers/visit.py @@ -1,67 +1,72 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from swh.graphql.utils import utils from swh.storage.interface import PagedResult from .base_connection import BaseConnection from .base_node import BaseNode from .origin import OriginNode class BaseVisitNode(BaseNode): """ Base resolver for all the visit nodes """ @property def id(self): # FIXME, use a better id return utils.get_b64_string(f"{self.origin}-{str(self.visit)}") @property def visitId(self): # To support the schema naming convention return self._node.visit class OriginVisitNode(BaseVisitNode): """ Node resolver for a visit requested directly with an origin URL and a visit ID """ def _get_node_data(self): return self.archive.get_origin_visit( self.kwargs.get("originUrl"), int(self.kwargs.get("visitId")) ) class LatestVisitNode(BaseVisitNode): """ Node resolver for the latest visit in an origin """ _can_be_null = True obj: OriginNode def _get_node_data(self): # self.obj.url is the origin URL - return self.archive.get_origin_latest_visit(self.obj.url) + return self.archive.get_origin_latest_visit( + origin_url=self.obj.url, + visit_type=self.kwargs.get("visitType"), + allowed_statuses=self.kwargs.get("allowedStatuses"), + require_snapshot=self.kwargs.get("requireSnapshot"), + ) class OriginVisitConnection(BaseConnection): """ Connection resolver for the visit objects in an origin """ obj: OriginNode _node_class = BaseVisitNode def _get_paged_result(self) -> PagedResult: # self.obj.url is the origin URL return self.archive.get_origin_visits( self.obj.url, after=self._get_after_arg(), first=self._get_first_arg() ) diff --git a/swh/graphql/schema/schema.graphql b/swh/graphql/schema/schema.graphql index de3f501..c9535e4 100644 --- a/swh/graphql/schema/schema.graphql +++ b/swh/graphql/schema/schema.graphql @@ -1,1110 +1,1125 @@ """ SoftWare Heritage persistent Identifier """ scalar SWHID """ ISO-8601 encoded date string """ scalar DateTime """ Content identifier in the form hash-type:hash-value """ scalar ContentHash """ Object with an id """ interface Node { """ Id of the object. This is for caching purpose and should not be used outside the GraphQL API """ id: ID! } """ SWH merkle node object with a SWHID """ interface MerkleNode { """ SWHID of the object """ swhid: SWHID! } """ Information about pagination """ type PageInfo { """ Cursor to request the next page in the connection """ endCursor: String """ Are there more pages in the connection? """ hasNextPage: Boolean! } """ Binary strings; different encodings """ type BinaryString { """ Utf-8 encoded value, any non Unicode character will be replaced """ text: String """ base64 encoded value """ base64: String } """ Connection to origins """ type OriginConnection { """ List of origin edges """ edges: [OriginEdge] """ List of origin objects """ nodes: [Origin] """ Information for pagination """ pageInfo: PageInfo! """ Total number of origin objects in the connection """ totalCount: Int } """ Edge in origin connection """ type OriginEdge { """ Cursor to request the next page after the item """ cursor: String! """ Origin object """ node: Origin } """ A software origin object """ type Origin implements Node { """ Unique identifier """ id: ID! """ Origin URL """ url: String! """ Connection to all the visit objects for the origin """ visits( """ Returns the first _n_ elements from the list """ first: Int! """ Returns the page after this cursor """ after: String ): VisitConnection! """ Latest visit object for the origin """ - latestVisit: Visit + latestVisit( + """ + Return the latest visit with the given visit type + """ + visitType: String + + """ + Return the latest visit with any of the given statuses + """ + allowedStatuses: [VisitStatusState] + + """ + If True, the latest visit with a snapshot will be returned + """ + requireSnapshot: Boolean + ): Visit """ Connection to all the snapshots for the origin """ snapshots( """ Returns the first _n_ elements from the list """ first: Int! """ Returns the page after this cursor """ after: String ): SnapshotConnection } """ Connection to origin visits """ type VisitConnection { """ List of visit edges """ edges: [VisitEdge] """ List of visit objects """ nodes: [Visit] """ Information for pagination """ pageInfo: PageInfo! """ Total number of visit objects in the connection """ totalCount: Int } """ Edge in origin visit connection """ type VisitEdge { """ Cursor to request the next page after the item """ cursor: String! """ Visit object """ node: Visit } """ Possible visit status states """ enum VisitStatusState { created ongoing partial full not_found failed } """ An origin visit object """ type Visit implements Node { """ Unique identifier """ id: ID! """ Visit number for the origin """ visitId: Int """ Visit date ISO-8601 encoded """ date: DateTime! """ Type of the origin visited. Eg: git/hg/svn/tar/deb """ type: String """ Connection to all the status objects for the visit """ statuses( """ Returns the first _n_ elements from the list """ first: Int """ Returns the page after this cursor """ after: String ): VisitStatusConnection """ Latest status object for the Visit """ latestStatus( """ Filter by status state """ allowedStatuses: [VisitStatusState] """ Filter by the availability of a snapshot in the status """ requireSnapshot: Boolean ): VisitStatus } """ Connection to visit status """ type VisitStatusConnection { """ List of visit status edges """ edges: [VisitStatusEdge] """ List of visit status objects """ nodes: [VisitStatus] """ Information for pagination """ pageInfo: PageInfo! """ Total number of visit status objects in the connection """ totalCount: Int } """ Edge in visit status connection """ type VisitStatusEdge { """ Cursor to request the next page after the item """ cursor: String! """ Visit status object """ node: VisitStatus } """ A visit status object """ type VisitStatus { """ Status string of the visit (either full, partial or ongoing) """ status: VisitStatusState! """ ISO-8601 encoded date string """ date: DateTime! """ Snapshot object """ snapshot: Snapshot """ Type of the origin visited. Eg: git/hg/svn/tar/deb """ type: String } """ Connection to snapshots """ type SnapshotConnection { """ List of snapshot edges """ edges: [SnapshotEdge] """ List of snapshot objects """ nodes: [Snapshot] """ Information for pagination """ pageInfo: PageInfo! """ Total number of snapshot objects in the connection """ totalCount: Int } """ Edge in snapshot connection """ type SnapshotEdge { """ Cursor to request the next page after the item """ cursor: String! """ Snapshot object """ node: Snapshot } """ A snapshot object """ type Snapshot implements MerkleNode & Node { """ Unique identifier """ id: ID! """ SWHID of the snapshot object """ swhid: SWHID! """ Connection to all the snapshot branches """ branches( """ Returns the first _n_ elements from the list """ first: Int! """ Returns the page after this cursor """ after: String """ Filter by branch target types """ types: [BranchTargetType] """ Return branches whose name contains the given substring """ nameInclude: String """ Do not return branches whose name contains the given prefix """ nameExcludePrefix: String ): BranchConnection } """ Connection to snapshot branches """ type BranchConnection { """ List of branch edges """ edges: [BranchConnectionEdge] """ List of branch objects """ nodes: [Branch] """ Information for pagination """ pageInfo: PageInfo! """ Total number of branch objects in the connection """ totalCount: Int } """ Edge in snapshot branch connection """ type BranchConnectionEdge { """ Cursor to request the next page after the item """ cursor: String! """ Branch object """ node: Branch } """ A user object """ type Person { """ User's email address """ email: BinaryString """ User's name """ name: BinaryString """ User's full name """ fullname: BinaryString } """ Possible branch target objects """ union BranchTarget = Revision | Release | Branch | Content | Directory | Snapshot """ Possible Branch target types """ enum BranchTargetType { revision release alias content directory snapshot } """ A snapshot branch object """ type Branch { """ Branch name """ name: BinaryString """ Type of Branch target """ targetType: BranchTargetType """ Branch target object """ target: BranchTarget } """ Connection to revisions """ type RevisionConnection { """ List of revision edges """ edges: [RevisionEdge] """ List of revision objects """ nodes: [Revision] """ Information for pagination """ pageInfo: PageInfo! """ Total number of revision objects in the connection """ totalCount: Int } """ Edge in revision connection """ type RevisionEdge { """ Cursor to request the next page after the item """ cursor: String! """ Revision object """ node: Revision } """ A revision object """ type Revision implements MerkleNode & Node { """ Unique identifier """ id: ID! """ SWHID of the revision object """ swhid: SWHID! """ Message associated to the revision """ message: BinaryString """ """ author: Person """ """ committer: Person """ Revision date ISO-8601 encoded """ date: DateTime """ Type of the revision, eg: git/hg """ type: String """ The unique directory object that revision points to """ directory: Directory """ Connection to all the parents of the revision """ parents( """ Returns the first _n_ elements from the list """ first: Int """ Returns the page after this cursor """ after: String ): RevisionConnection """ Connection to all the revisions heading to this one aka the commit log """ revisionLog( """ Returns the first _n_ elements from the list """ first: Int! """ Returns the page after the cursor """ after: String ): RevisionConnection } """ Possible release target objects """ union ReleaseTarget = Release | Revision | Directory | Content """ Possible release target types """ enum ReleaseTargetType { release revision content directory } """ A release object """ type Release implements MerkleNode & Node { """ Unique identifier """ id: ID! """ SWHID of the release object """ swhid: SWHID! """ The name of the release """ name: BinaryString """ The message associated to the release """ message: BinaryString """ """ author: Person """ Release date ISO-8601 encoded """ date: DateTime """ Type of release target """ targetType: ReleaseTargetType """ Release target object """ target: ReleaseTarget } """ Connection to directory entries """ type DirectoryEntryConnection { """ List of directory entry edges """ edges: [DirectoryEntryEdge] """ List of directory entry objects """ nodes: [DirectoryEntry] """ Information for pagination """ pageInfo: PageInfo! """ Total number of directory entry objects in the connection """ totalCount: Int } """ Edge in directory entry connection """ type DirectoryEntryEdge { """ Cursor to request the next page after the item """ cursor: String! """ Directory entry object """ node: DirectoryEntry } """ Possible directory entry target objects """ union DirectoryEntryTarget = Directory | Content | Revision """ Possible directory entry types """ enum DirectoryEntryTargetType { directory content revision } """ A directory entry object """ type DirectoryEntry { """ The directory entry name """ name: BinaryString """ Directory entry object type; can be file, dir or rev """ targetType: DirectoryEntryTargetType """ Directory entry target object """ target: DirectoryEntryTarget } """ A directory object """ type Directory implements MerkleNode & Node { """ Unique identifier """ id: ID! """ SWHID of the directory object """ swhid: SWHID! """ Connection to the directory entries """ entries( """ Returns the first _n_ elements from the list """ first: Int """ Returns the page after this cursor """ after: String """ Filter by entry name """ nameInclude: String ): DirectoryEntryConnection } """ An object with different content checksums """ type ContentChecksum { blake2s256: String sha1: String sha1_git: String sha256: String } """ Object with different content data representations """ type ContentData { """ URL to download the file data """ url: String } type ContentFileType { """ Detected content encoding """ encoding: String """ Detected MIME type of the content """ mimetype: String } type ContentLanguage { """ Detected programming language if any """ lang: String } type ContentLicense { """ Array of strings containing the detected license names """ licenses: [String] } """ A content object """ type Content implements MerkleNode & Node { """ Unique identifier """ id: ID! """ SWHID of the content object """ swhid: SWHID! """ Checksums for the content """ checksum: ContentChecksum """ Length of the content in bytes """ length: Int """ Content status, visible or hidden """ status: String """ File content """ data: ContentData """ Information about the content MIME type """ fileType: ContentFileType """ Information about the programming language used in the content """ language: ContentLanguage """ Information about the license of the content """ license: ContentLicense } """ Connection to SearchResults """ type SearchResultConnection { """ List of SearchResult edges """ edges: [SearchResultEdge] """ List of SearchResult objects """ nodes: [SearchResult] """ Information for pagination """ pageInfo: PageInfo! """ Total number of result objects in the connection """ totalCount: Int } """ Edge in SearchResult connection """ type SearchResultEdge { """ Cursor to request the next page after the item """ cursor: String! """ SearchResult object """ node: SearchResult } union SearchResultTarget = Origin | Revision | Release | Content | Directory | Snapshot enum SearchResultTargetType { origin revision release content directory snapshot } """ A SearchResult object """ type SearchResult { """ Result target type """ targetType: SearchResultTargetType """ Result target object """ target: SearchResultTarget } """ The query root of the GraphQL interface. """ type Query { """ Get an origin with its url """ origin( """ URL of the Origin """ url: String! ): Origin """ Get a Connection to all the origins """ origins( """ Returns the first _n_ elements from the list """ first: Int! """ Returns the page after the cursor """ after: String """ Filter origins with a URL pattern """ urlPattern: String ): OriginConnection """ Get the visit object with an origin URL and a visit id """ visit( """ URL of the origin """ originUrl: String! """ Visit id to get """ visitId: Int! ): Visit """ Get the snapshot with a SWHID """ snapshot( """ SWHID of the snapshot object """ swhid: SWHID! ): Snapshot """ Get the revision with a SWHID """ revision( """ SWHID of the revision object """ swhid: SWHID! ): Revision """ Get the release with a SWHID """ release( """ SWHID of the release object """ swhid: SWHID! ): Release """ Get the directory with a SWHID """ directory( """ SWHID of the directory object """ swhid: SWHID! ): Directory """ Get a directory entry with directory SWHID and a path """ directoryEntry( """ SWHID of the directory object """ directorySwhid: SWHID! """ Relative path to the requested object """ path: String! ): DirectoryEntry """ Get the content with a SWHID """ content( """ SWHID of the content object """ swhid: SWHID! ): Content """ Get the content by one or more hashes Use multiple hashes for an accurate result """ contentByHash( """ List of hashType:hashValue strings """ checksums: [ContentHash]! ): Content """ Resolve the given SWHID to an object """ resolveSwhid( """ SWHID to look for """ swhid: SWHID! ): SearchResultConnection! """ Search in SWH """ search( """ String to search for """ query: String! """ Returns the first _n_ elements from the list """ first: Int! """ Returns the page after the cursor """ after: String ): SearchResultConnection! } diff --git a/swh/graphql/tests/functional/test_origin_node.py b/swh/graphql/tests/functional/test_origin_node.py index 1f5a04a..792dee3 100644 --- a/swh/graphql/tests/functional/test_origin_node.py +++ b/swh/graphql/tests/functional/test_origin_node.py @@ -1,59 +1,111 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import pytest from ..data import get_origins from .utils import assert_missing_object, get_query_response def test_invalid_get(client): query_str = """ { origin(url: "http://example.com/non-existing") { url } } """ assert_missing_object(client, query_str, "origin") @pytest.mark.parametrize("origin", get_origins()) def test_get(client, storage, origin): query_str = ( """ { origin(url: "%s") { url id visits(first: 10) { nodes { id } } latestVisit { visitId } snapshots(first: 2) { nodes { id } } } } """ % origin.url ) response, _ = get_query_response(client, query_str) data_origin = response["origin"] storage_origin = storage.origin_get([origin.url])[0] visits_and_statuses = storage.origin_visit_get_with_statuses(origin.url).results assert data_origin["url"] == storage_origin.url assert data_origin["id"] == storage_origin.id.hex() assert len(data_origin["visits"]["nodes"]) == len(visits_and_statuses) assert data_origin["latestVisit"]["visitId"] == visits_and_statuses[-1].visit.visit snapshots = storage.origin_snapshot_get_all(origin.url) assert len(data_origin["snapshots"]["nodes"]) == len(snapshots) + + +def test_latest_visit_type_filter(client): + query_str = """ + { + origin(url: "%s") { + latestVisit(visitType: "%s") { + visitId + } + } + } + """ + data, _ = get_query_response(client, query_str % (get_origins()[0].url, "git")) + assert data["origin"] == {"latestVisit": {"visitId": 3}} + + data, _ = get_query_response(client, query_str % (get_origins()[0].url, "hg")) + assert data["origin"] == {"latestVisit": None} + + +def test_latest_visit_require_snapshot_filter(client): + query_str = """ + { + origin(url: "%s") { + latestVisit(requireSnapshot: %s) { + visitId + } + } + } + """ + data, _ = get_query_response(client, query_str % (get_origins()[1].url, "true")) + assert data["origin"] == {"latestVisit": {"visitId": 2}} + + +def test_latest_visit_allowed_statuses_filter(client): + query_str = """ + { + origin(url: "%s") { + latestVisit(allowedStatuses: [partial]) { + visitId + statuses { + nodes { + status + } + } + } + } + } + """ + data, _ = get_query_response(client, query_str % (get_origins()[1].url)) + assert data["origin"] == { + "latestVisit": {"statuses": {"nodes": [{"status": "partial"}]}, "visitId": 2} + }