commit 4f000e884c3010c2bb04bbb516aaf0ba234ccf4f Author: Valentin Lorentz Date: Fri Jun 28 14:57:57 2019 +0200 [WIP] graphql API diff --git a/requirements.txt b/requirements.txt index 8767b06b..4802cbb3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,6 +18,7 @@ python-dateutil pyyaml requests python-memcached +graphene # Doc dependencies sphinx diff --git a/swh/web/api/utils.py b/swh/web/api/utils.py index d4f2d49b..c337ebee 100644 --- a/swh/web/api/utils.py +++ b/swh/web/api/utils.py @@ -159,6 +159,13 @@ def enrich_content(content, top_url=False, query_string=None): return content +REVISION_FIELDS = [ + 'author', 'date', 'committer', 'committer_date', + 'directory', 'id', 'merge', 'message', 'parents', + 'type', +] + + def enrich_revision(revision): """Enrich revision with links where it makes sense (directory, parents). Keep track of the navigation breadcrumbs if they are specified. diff --git a/swh/web/api/views/revision.py b/swh/web/api/views/revision.py index a36199ee..8f786722 100644 --- a/swh/web/api/views/revision.py +++ b/swh/web/api/views/revision.py @@ -11,7 +11,7 @@ from swh.web.api import utils from swh.web.api.apidoc import api_doc, format_docstring from swh.web.api.apiurls import api_route -from swh.web.api.views.utils import api_lookup +from swh.web.api.views.utils import api_lookup, graph_api_lookup DOC_RETURN_REVISION = ''' @@ -258,8 +258,22 @@ def api_revision_with_origin(request, origin_id, :swh_web_api:`revision/origin/13706355/branch/refs/heads/2.7/` """ # noqa - return api_lookup( - service.lookup_revision_by, int(origin_id), branch_name, ts, + query = ''' + query { + origins(ids: [$origin_id]) { + latest_visit(require_snapshot: true) { + snapshot { + snapshots { + branch(name: $branch_name) { + target { + revision { + %s + }}}}}}}}''' % utils.REVISION_FIELDS + variables = {'origin_id': origin_id, 'branch_name': branch_name} + return graph_api_lookup( + query, variables, [ + 'origins', 'latest_visit', 'snapshot', 'snapshots', 0, 'branch', + 'target', 'revision'], notfound_msg=('Revision with (origin_id: {}, branch_name: {}' ', ts: {}) not found.'.format(origin_id, branch_name, ts)), diff --git a/swh/web/api/views/utils.py b/swh/web/api/views/utils.py index acba8ae2..51c2047a 100644 --- a/swh/web/api/views/utils.py +++ b/swh/web/api/views/utils.py @@ -50,6 +50,17 @@ def api_lookup(lookup_fn, *args, return enrich_fn(res) +def graph_api_lookup(query, variables, path, *, notfound_msg, enrich_fn): + from swh.web.common.graphql import execute + result = execute(query, variables) + for part in path: + try: + result = result[part] + except KeyError: + raise NotFoundExc(notfound_msg) + return enrich_fn(result) + + @api_view(['GET', 'HEAD']) def api_home(request): return Response({}, template_name='api/api.html') diff --git a/swh/web/common/graphql.py b/swh/web/common/graphql.py new file mode 100644 index 00000000..781d5846 --- /dev/null +++ b/swh/web/common/graphql.py @@ -0,0 +1,385 @@ +# Copyright (C) 2019 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU Affero General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from graphql.language import ast +from collections import namedtuple + +from graphene import ( + # root types + Schema, Field, Argument, Scalar, ObjectType, Interface, + # collections + List, Enum, + # scalars + String, Int, Boolean, JSONString, DateTime +) +from graphene import NonNull as NN + +from swh.core.utils import encode_with_unescape, decode_with_escape +from swh.model.hashutil import hash_to_hex, hash_to_bytes +from swh.model.identifiers import PersistentId, parse_persistent_identifier +from swh.storage.algos.snapshot import snapshot_get_all_branches + +from swh.web import config + + +Context = namedtuple('Context', 'storage') + + +class Bytestring(Scalar): + @staticmethod + def serialize(bytes_): + return hash_to_hex(bytes_) + + @staticmethod + def parse_literal(node): + if isinstance(node, ast.StringValue): + return hash_to_bytes(node.value) + + @staticmethod + def parse_value(str_): + return hash_to_bytes(str_) + + +class AlmostUtf8Bytestring(Scalar): + """A byte string that we allow ourselves to format as ascii data with + backslash escapes to be more human-readable""" + + @staticmethod + def serialize(bytes_): + return decode_with_escape(bytes_) + + @staticmethod + def parse_literal(node): + if isinstance(node, ast.StringValue): + return encode_with_unescape(node.value) + + @staticmethod + def parse_value(str_): + return encode_with_unescape(str_) + + +class HashId(ObjectType): + sha1_git = Field(NN(Bytestring)) + + +class Person(ObjectType): + fullname = Field(Bytestring) + + +class Timestamp(ObjectType): + seconds = Field(NN(Int)) + microseconds = Field(NN(Int)) + + +class TimestampWithTimezone(ObjectType): + timestamp = Field(NN(Timestamp)) + offset = Field(NN(Int)) + negative_utc = Field(NN(Boolean)) + + +class HashedObject(Interface): + id = Field(NN(HashId)) + + +class SwhObjectType(Enum): + SNAPSHOT = 'snapshot' + RELEASE = 'release' + REVISION = 'revision' + DIRECTORY = 'directory' + CONTENT = 'content' + + +class SwhId(ObjectType): + swh_pid = Field(NN(String)) + + namespace = Field(NN(String), default_value='swh') + scheme_version = Field(NN(String), default_value='1') + object_type = Field(NN(SwhObjectType)) + object_id = Field(NN(Bytestring)) + + objects = Field(NN(List(HashedObject))) + + contents = Field(NN(List(lambda: Content))) + directories = Field(NN(List(lambda: Directory))) + revisions = Field(NN(List(lambda: Revision))) + releases = Field(NN(List(lambda: Release))) + snapshot = Field(NN(List(lambda: Snapshot))) + + def resolve_swh_pid(parent, info): + assert isinstance(parent.object_type.value, str), parent.object_type + pid = PersistentId( + parent.namespace, parent.scheme_version, + parent.object_type.value, parent.object_id) + return str(pid) + + def resolve_objects(parent, info): + if parent.object_type == SwhObjectType.SNAPSHOT: + return parent.resolve_snapshots(parent, info) + + def resolve_revisions(parent, info): + revision = info.context.storage.revision_get(parent.object_id) + return [Revision(**revision)] + + def resolve_snapshots(parent, info): + snapshot = snapshot_get_all_branches( + info.context.storage, parent.object_id) + branches = [ + SnapshotBranch( + name=branch_name, + target=SwhId( + object_type=SwhObjectType.get(branch['target_type']), + object_id=branch['target'])) + for (branch_name, branch) in snapshot['branches'].items()] + return [Snapshot(id=parent.object_id, branches=branches)] + + +class ContentStatus(Enum): + VISIBLE = 'visible' + ABSENT = 'absent' + HIDDEN = 'hidden' + + +class Content(ObjectType): + class Meta: + interfaces = (HashedObject,) + sha1 = Field(Bytestring) + sha256 = Field(Bytestring) + blake2s256 = Field(Bytestring) + + length = Field(NN(Int)) + status = Field(NN(ContentStatus)) + reason = Field(String) + data = Field(Bytestring) + + +class DirectoryEntry(ObjectType): + name = Field(NN(Bytestring)) + target = Field(NN(SwhId)) + perms = Field(NN(Int)) + + +class Directory(ObjectType): + class Meta: + interfaces = (HashedObject,) + entries = Field(NN(List(NN(DirectoryEntry)))) + + entry = Field( + SwhId, + path=NN(AlmostUtf8Bytestring), + ) + + def resolve_entry(parent, info, path): + entry = info.context.storage.directory_entry_get_by_path(parent.id) + if entry: + # TODO: cache the other items of 'entry' + object_type = entry['type'] + if object_type == 'file': + object_type = 'content' + return SwhId(object_type=object_type, object_id=entry['target']) + + +class RevisionType(Enum): + GIT = 'git' + TAR = 'tar' + DSC = 'dsc' + SUBVERSION = 'svn' + MERCURIAL = 'hg' + + +class Revision(ObjectType): + """Foo""" + class Meta: + interfaces = (HashedObject,) + message = Field(NN(AlmostUtf8Bytestring)) + author = Field(NN(Person)) + committer = Field(NN(Person)) + author_date = Field(NN(TimestampWithTimezone)) + committer_date = Field(NN(TimestampWithTimezone)) + type = Field(NN(RevisionType)) + directory = Field(NN(SwhId)) + synthetic = Field(NN(Boolean)) + metadata = Field(JSONString) + parents = Field(NN(List(NN(SwhId)))) + + log = Field( + NN(List(NN(SwhId))), + first=Argument(Int)) + + def resolve_log(parent, info, first=10): + """bar""" + shortlog = info.context.storage.revision_shortlog( + [parent.id.sha1], limit=first) + return [ + SwhId(object_type='revision', object_id=rev_id) + for (rev_id, parents) in shortlog] + + merge = Field(NN(Boolean)) + + def resolve_merge(parent, info): + return len(parent.parents) > 1 + + +class Release(ObjectType): + class Meta: + interfaces = (HashedObject,) + name = Field(NN(Bytestring)) + message = Field(NN(Bytestring)) + target = Field(HashedObject) + synthetic = Field(NN(Boolean)) + author = Field(Person) + date = Field(TimestampWithTimezone) + + +class SnapshotBranch(ObjectType): + name = Field(NN(AlmostUtf8Bytestring)) + # lambda to break recursion: + target = Field(NN(SwhId)) + + +class Snapshot(ObjectType): + class Meta: + interfaces = (HashedObject,) + + branches = Field( + NN(List(NN(SnapshotBranch))), + names=List(NN(AlmostUtf8Bytestring))) + + def resolve_branches(parent, info, names=None): + branches = parent.branches + if names: + branches = [ + branch for branch in branches + if branch.name in names] + return branches + + branch = Field( + SnapshotBranch, + name=NN(AlmostUtf8Bytestring)) + + def resolve_branch(parent, info, name): + branches = [ + branch for branch in parent.branches + if branch.name == name] + if branches: + return branches[0] + + +class VisitStatus(Enum): + ONGOING = 'ongoing' + FULL = 'full' + PARTIAL = 'partial' + + +class OriginVisit(ObjectType): + id = Field(NN(Int)) + type = Field(NN(String)) + origin = Field(NN(lambda: Origin)) + date = Field(NN(DateTime)) + status = Field(VisitStatus) + metadata = Field(JSONString) + snapshot = Field(SwhId) + + @classmethod + def from_dict(cls, visit): + visit['id'] = visit.pop('visit') + if visit.get('snapshot'): + visit['snapshot'] = SwhId( + object_type=SwhObjectType.SNAPSHOT, + object_id=visit.pop('snapshot')) + return cls(**visit) + + +class Origin(ObjectType): + id = Field(Int) + url = Field(NN(String)) + type = Field(NN(String)) + + visits = Field( + NN(List(NN(OriginVisit))), + after=Argument(DateTime), + first=Argument(Int), + ) + + def resolve_visits(parent, info, after=None, first=10): + # TODO: pagination + # TODO: parent.url instead of parent.id + visits = info.context.storage.origin_visit_get( + parent.id, after=after, limit=first) + return [OriginVisit.from_dict(visit) for visit in visits] + + latest_visit = Field( + NN(List(NN(OriginVisit))), + require_snapshot=Argument(Boolean), + last=Argument(Int), + ) + + def resolve_latest_visit(parent, info, require_snapshot=False): + visit = info.context.storage.origin_visit_get_latest( + parent.url, require_snapshot=require_snapshot) + return OriginVisit.from_dict(visit) + + +class RootQuery(ObjectType): + origins = Field( + NN(List(NN(Origin))), + urls=Argument(List(NN(String))), + ids=List(NN(Int)), + ) + + def resolve_origins(parent, info, urls=None, ids=None): + origins = [] + if ids: + origins.extend(info.context.storage.origin_get( + [{'id': id_} for id_ in ids])) + if urls: + origins.extend(info.context.storage.origin_get( + [{'url': url} for url in urls])) + + return [Origin(**origin) for origin in origins] + + pids = Field( + NN(List(NN(SwhId))), + ids=NN(List(NN(String))), + ) + + def resolve_pids(parent, info, ids): + ids = [parse_persistent_identifier(id_) for id_ in ids] + if any(id_.namespace != 'swh' for id_ in ids): + raise ValueError( + 'Only "swh:" namespace is supported in PIDs.') + if any(id_.scheme_version != '1' for id_ in ids): + raise ValueError( + 'Only "1" scheme version is supported in SWH PIDs') + return [ + SwhId(object_type=id_.object_type, + object_id=id_.object_id) + for id_ in ids] + + contents_by_hashes = Field( + NN(List(NN(SwhId))), + algo=NN(String), + hashes=NN(List(NN(Bytestring))), + ) + + def resolve_contents_by_hashes(parent, info, algo, hashes): + results = [] + for hash_ in hashes: + res = info.context.storage.content_find({algo: hash_}) + if res: + results.append(Content(**res)) + return results + + +schema = Schema( + query=RootQuery, + auto_camelcase=False, + types=[ + Content, Directory, Revision, Release, Snapshot, + OriginVisit, Origin]) + + +def execute(query, variables={}): + context = Context(storage=config.storage()) + return schema.execute(query, variables=variables, context=context) diff --git a/test_query.py b/test_query.py new file mode 100644 index 00000000..8b8ab07f --- /dev/null +++ b/test_query.py @@ -0,0 +1,36 @@ +import json + +from swh.storage import get_storage +import swh.web.common.graphql + +context = swh.web.common.graphql.Context( + storage=get_storage('remote', { + 'url': 'http://uffizi.internal.softwareheritage.org:5002/'})) +res = swh.web.common.graphql.schema.execute( + ''' + query { + origins(urls: ["https://github.com/SoftwareHeritage/swh-model"]) { + id + type + url + visits { + date + snapshot { + swh_pid + objects { + __typename + ... on Snapshot { + branches(names: ["HEAD", "refs/heads/master"]) { + name + target { + swh_pid + } + } + } + } + } + } + } + }''', + context=context) +print(json.dumps(dict(res.to_dict()), indent=4))