Page MenuHomeSoftware Heritage
Paste P1136

old graphql draft
ActivePublic

Authored by vlorentz on Aug 30 2021, 10:05 AM.
commit 4f000e884c3010c2bb04bbb516aaf0ba234ccf4f
Author: Valentin Lorentz <vlorentz@softwareheritage.org>
Date: Fri Jun 28 14:57:57 2019 +0200
[WIP] graphql API
diff --git a/requirements.txt b/requirements.txt
index 8767b06b..4802cbb3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -18,6 +18,7 @@ python-dateutil
pyyaml
requests
python-memcached
+graphene
# Doc dependencies
sphinx
diff --git a/swh/web/api/utils.py b/swh/web/api/utils.py
index d4f2d49b..c337ebee 100644
--- a/swh/web/api/utils.py
+++ b/swh/web/api/utils.py
@@ -159,6 +159,13 @@ def enrich_content(content, top_url=False, query_string=None):
return content
+REVISION_FIELDS = [
+ 'author', 'date', 'committer', 'committer_date',
+ 'directory', 'id', 'merge', 'message', 'parents',
+ 'type',
+]
+
+
def enrich_revision(revision):
"""Enrich revision with links where it makes sense (directory, parents).
Keep track of the navigation breadcrumbs if they are specified.
diff --git a/swh/web/api/views/revision.py b/swh/web/api/views/revision.py
index a36199ee..8f786722 100644
--- a/swh/web/api/views/revision.py
+++ b/swh/web/api/views/revision.py
@@ -11,7 +11,7 @@
from swh.web.api import utils
from swh.web.api.apidoc import api_doc, format_docstring
from swh.web.api.apiurls import api_route
-from swh.web.api.views.utils import api_lookup
+from swh.web.api.views.utils import api_lookup, graph_api_lookup
DOC_RETURN_REVISION = '''
@@ -258,8 +258,22 @@ def api_revision_with_origin(request, origin_id,
:swh_web_api:`revision/origin/13706355/branch/refs/heads/2.7/`
""" # noqa
- return api_lookup(
- service.lookup_revision_by, int(origin_id), branch_name, ts,
+ query = '''
+ query {
+ origins(ids: [$origin_id]) {
+ latest_visit(require_snapshot: true) {
+ snapshot {
+ snapshots {
+ branch(name: $branch_name) {
+ target {
+ revision {
+ %s
+ }}}}}}}}''' % utils.REVISION_FIELDS
+ variables = {'origin_id': origin_id, 'branch_name': branch_name}
+ return graph_api_lookup(
+ query, variables, [
+ 'origins', 'latest_visit', 'snapshot', 'snapshots', 0, 'branch',
+ 'target', 'revision'],
notfound_msg=('Revision with (origin_id: {}, branch_name: {}'
', ts: {}) not found.'.format(origin_id,
branch_name, ts)),
diff --git a/swh/web/api/views/utils.py b/swh/web/api/views/utils.py
index acba8ae2..51c2047a 100644
--- a/swh/web/api/views/utils.py
+++ b/swh/web/api/views/utils.py
@@ -50,6 +50,17 @@ def api_lookup(lookup_fn, *args,
return enrich_fn(res)
+def graph_api_lookup(query, variables, path, *, notfound_msg, enrich_fn):
+ from swh.web.common.graphql import execute
+ result = execute(query, variables)
+ for part in path:
+ try:
+ result = result[part]
+ except KeyError:
+ raise NotFoundExc(notfound_msg)
+ return enrich_fn(result)
+
+
@api_view(['GET', 'HEAD'])
def api_home(request):
return Response({}, template_name='api/api.html')
diff --git a/swh/web/common/graphql.py b/swh/web/common/graphql.py
new file mode 100644
index 00000000..781d5846
--- /dev/null
+++ b/swh/web/common/graphql.py
@@ -0,0 +1,385 @@
+# Copyright (C) 2019 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU Affero General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from graphql.language import ast
+from collections import namedtuple
+
+from graphene import (
+ # root types
+ Schema, Field, Argument, Scalar, ObjectType, Interface,
+ # collections
+ List, Enum,
+ # scalars
+ String, Int, Boolean, JSONString, DateTime
+)
+from graphene import NonNull as NN
+
+from swh.core.utils import encode_with_unescape, decode_with_escape
+from swh.model.hashutil import hash_to_hex, hash_to_bytes
+from swh.model.identifiers import PersistentId, parse_persistent_identifier
+from swh.storage.algos.snapshot import snapshot_get_all_branches
+
+from swh.web import config
+
+
+Context = namedtuple('Context', 'storage')
+
+
+class Bytestring(Scalar):
+ @staticmethod
+ def serialize(bytes_):
+ return hash_to_hex(bytes_)
+
+ @staticmethod
+ def parse_literal(node):
+ if isinstance(node, ast.StringValue):
+ return hash_to_bytes(node.value)
+
+ @staticmethod
+ def parse_value(str_):
+ return hash_to_bytes(str_)
+
+
+class AlmostUtf8Bytestring(Scalar):
+ """A byte string that we allow ourselves to format as ascii data with
+ backslash escapes to be more human-readable"""
+
+ @staticmethod
+ def serialize(bytes_):
+ return decode_with_escape(bytes_)
+
+ @staticmethod
+ def parse_literal(node):
+ if isinstance(node, ast.StringValue):
+ return encode_with_unescape(node.value)
+
+ @staticmethod
+ def parse_value(str_):
+ return encode_with_unescape(str_)
+
+
+class HashId(ObjectType):
+ sha1_git = Field(NN(Bytestring))
+
+
+class Person(ObjectType):
+ fullname = Field(Bytestring)
+
+
+class Timestamp(ObjectType):
+ seconds = Field(NN(Int))
+ microseconds = Field(NN(Int))
+
+
+class TimestampWithTimezone(ObjectType):
+ timestamp = Field(NN(Timestamp))
+ offset = Field(NN(Int))
+ negative_utc = Field(NN(Boolean))
+
+
+class HashedObject(Interface):
+ id = Field(NN(HashId))
+
+
+class SwhObjectType(Enum):
+ SNAPSHOT = 'snapshot'
+ RELEASE = 'release'
+ REVISION = 'revision'
+ DIRECTORY = 'directory'
+ CONTENT = 'content'
+
+
+class SwhId(ObjectType):
+ swh_pid = Field(NN(String))
+
+ namespace = Field(NN(String), default_value='swh')
+ scheme_version = Field(NN(String), default_value='1')
+ object_type = Field(NN(SwhObjectType))
+ object_id = Field(NN(Bytestring))
+
+ objects = Field(NN(List(HashedObject)))
+
+ contents = Field(NN(List(lambda: Content)))
+ directories = Field(NN(List(lambda: Directory)))
+ revisions = Field(NN(List(lambda: Revision)))
+ releases = Field(NN(List(lambda: Release)))
+ snapshot = Field(NN(List(lambda: Snapshot)))
+
+ def resolve_swh_pid(parent, info):
+ assert isinstance(parent.object_type.value, str), parent.object_type
+ pid = PersistentId(
+ parent.namespace, parent.scheme_version,
+ parent.object_type.value, parent.object_id)
+ return str(pid)
+
+ def resolve_objects(parent, info):
+ if parent.object_type == SwhObjectType.SNAPSHOT:
+ return parent.resolve_snapshots(parent, info)
+
+ def resolve_revisions(parent, info):
+ revision = info.context.storage.revision_get(parent.object_id)
+ return [Revision(**revision)]
+
+ def resolve_snapshots(parent, info):
+ snapshot = snapshot_get_all_branches(
+ info.context.storage, parent.object_id)
+ branches = [
+ SnapshotBranch(
+ name=branch_name,
+ target=SwhId(
+ object_type=SwhObjectType.get(branch['target_type']),
+ object_id=branch['target']))
+ for (branch_name, branch) in snapshot['branches'].items()]
+ return [Snapshot(id=parent.object_id, branches=branches)]
+
+
+class ContentStatus(Enum):
+ VISIBLE = 'visible'
+ ABSENT = 'absent'
+ HIDDEN = 'hidden'
+
+
+class Content(ObjectType):
+ class Meta:
+ interfaces = (HashedObject,)
+ sha1 = Field(Bytestring)
+ sha256 = Field(Bytestring)
+ blake2s256 = Field(Bytestring)
+
+ length = Field(NN(Int))
+ status = Field(NN(ContentStatus))
+ reason = Field(String)
+ data = Field(Bytestring)
+
+
+class DirectoryEntry(ObjectType):
+ name = Field(NN(Bytestring))
+ target = Field(NN(SwhId))
+ perms = Field(NN(Int))
+
+
+class Directory(ObjectType):
+ class Meta:
+ interfaces = (HashedObject,)
+ entries = Field(NN(List(NN(DirectoryEntry))))
+
+ entry = Field(
+ SwhId,
+ path=NN(AlmostUtf8Bytestring),
+ )
+
+ def resolve_entry(parent, info, path):
+ entry = info.context.storage.directory_entry_get_by_path(parent.id)
+ if entry:
+ # TODO: cache the other items of 'entry'
+ object_type = entry['type']
+ if object_type == 'file':
+ object_type = 'content'
+ return SwhId(object_type=object_type, object_id=entry['target'])
+
+
+class RevisionType(Enum):
+ GIT = 'git'
+ TAR = 'tar'
+ DSC = 'dsc'
+ SUBVERSION = 'svn'
+ MERCURIAL = 'hg'
+
+
+class Revision(ObjectType):
+ """Foo"""
+ class Meta:
+ interfaces = (HashedObject,)
+ message = Field(NN(AlmostUtf8Bytestring))
+ author = Field(NN(Person))
+ committer = Field(NN(Person))
+ author_date = Field(NN(TimestampWithTimezone))
+ committer_date = Field(NN(TimestampWithTimezone))
+ type = Field(NN(RevisionType))
+ directory = Field(NN(SwhId))
+ synthetic = Field(NN(Boolean))
+ metadata = Field(JSONString)
+ parents = Field(NN(List(NN(SwhId))))
+
+ log = Field(
+ NN(List(NN(SwhId))),
+ first=Argument(Int))
+
+ def resolve_log(parent, info, first=10):
+ """bar"""
+ shortlog = info.context.storage.revision_shortlog(
+ [parent.id.sha1], limit=first)
+ return [
+ SwhId(object_type='revision', object_id=rev_id)
+ for (rev_id, parents) in shortlog]
+
+ merge = Field(NN(Boolean))
+
+ def resolve_merge(parent, info):
+ return len(parent.parents) > 1
+
+
+class Release(ObjectType):
+ class Meta:
+ interfaces = (HashedObject,)
+ name = Field(NN(Bytestring))
+ message = Field(NN(Bytestring))
+ target = Field(HashedObject)
+ synthetic = Field(NN(Boolean))
+ author = Field(Person)
+ date = Field(TimestampWithTimezone)
+
+
+class SnapshotBranch(ObjectType):
+ name = Field(NN(AlmostUtf8Bytestring))
+ # lambda to break recursion:
+ target = Field(NN(SwhId))
+
+
+class Snapshot(ObjectType):
+ class Meta:
+ interfaces = (HashedObject,)
+
+ branches = Field(
+ NN(List(NN(SnapshotBranch))),
+ names=List(NN(AlmostUtf8Bytestring)))
+
+ def resolve_branches(parent, info, names=None):
+ branches = parent.branches
+ if names:
+ branches = [
+ branch for branch in branches
+ if branch.name in names]
+ return branches
+
+ branch = Field(
+ SnapshotBranch,
+ name=NN(AlmostUtf8Bytestring))
+
+ def resolve_branch(parent, info, name):
+ branches = [
+ branch for branch in parent.branches
+ if branch.name == name]
+ if branches:
+ return branches[0]
+
+
+class VisitStatus(Enum):
+ ONGOING = 'ongoing'
+ FULL = 'full'
+ PARTIAL = 'partial'
+
+
+class OriginVisit(ObjectType):
+ id = Field(NN(Int))
+ type = Field(NN(String))
+ origin = Field(NN(lambda: Origin))
+ date = Field(NN(DateTime))
+ status = Field(VisitStatus)
+ metadata = Field(JSONString)
+ snapshot = Field(SwhId)
+
+ @classmethod
+ def from_dict(cls, visit):
+ visit['id'] = visit.pop('visit')
+ if visit.get('snapshot'):
+ visit['snapshot'] = SwhId(
+ object_type=SwhObjectType.SNAPSHOT,
+ object_id=visit.pop('snapshot'))
+ return cls(**visit)
+
+
+class Origin(ObjectType):
+ id = Field(Int)
+ url = Field(NN(String))
+ type = Field(NN(String))
+
+ visits = Field(
+ NN(List(NN(OriginVisit))),
+ after=Argument(DateTime),
+ first=Argument(Int),
+ )
+
+ def resolve_visits(parent, info, after=None, first=10):
+ # TODO: pagination
+ # TODO: parent.url instead of parent.id
+ visits = info.context.storage.origin_visit_get(
+ parent.id, after=after, limit=first)
+ return [OriginVisit.from_dict(visit) for visit in visits]
+
+ latest_visit = Field(
+ NN(List(NN(OriginVisit))),
+ require_snapshot=Argument(Boolean),
+ last=Argument(Int),
+ )
+
+ def resolve_latest_visit(parent, info, require_snapshot=False):
+ visit = info.context.storage.origin_visit_get_latest(
+ parent.url, require_snapshot=require_snapshot)
+ return OriginVisit.from_dict(visit)
+
+
+class RootQuery(ObjectType):
+ origins = Field(
+ NN(List(NN(Origin))),
+ urls=Argument(List(NN(String))),
+ ids=List(NN(Int)),
+ )
+
+ def resolve_origins(parent, info, urls=None, ids=None):
+ origins = []
+ if ids:
+ origins.extend(info.context.storage.origin_get(
+ [{'id': id_} for id_ in ids]))
+ if urls:
+ origins.extend(info.context.storage.origin_get(
+ [{'url': url} for url in urls]))
+
+ return [Origin(**origin) for origin in origins]
+
+ pids = Field(
+ NN(List(NN(SwhId))),
+ ids=NN(List(NN(String))),
+ )
+
+ def resolve_pids(parent, info, ids):
+ ids = [parse_persistent_identifier(id_) for id_ in ids]
+ if any(id_.namespace != 'swh' for id_ in ids):
+ raise ValueError(
+ 'Only "swh:" namespace is supported in PIDs.')
+ if any(id_.scheme_version != '1' for id_ in ids):
+ raise ValueError(
+ 'Only "1" scheme version is supported in SWH PIDs')
+ return [
+ SwhId(object_type=id_.object_type,
+ object_id=id_.object_id)
+ for id_ in ids]
+
+ contents_by_hashes = Field(
+ NN(List(NN(SwhId))),
+ algo=NN(String),
+ hashes=NN(List(NN(Bytestring))),
+ )
+
+ def resolve_contents_by_hashes(parent, info, algo, hashes):
+ results = []
+ for hash_ in hashes:
+ res = info.context.storage.content_find({algo: hash_})
+ if res:
+ results.append(Content(**res))
+ return results
+
+
+schema = Schema(
+ query=RootQuery,
+ auto_camelcase=False,
+ types=[
+ Content, Directory, Revision, Release, Snapshot,
+ OriginVisit, Origin])
+
+
+def execute(query, variables={}):
+ context = Context(storage=config.storage())
+ return schema.execute(query, variables=variables, context=context)
diff --git a/test_query.py b/test_query.py
new file mode 100644
index 00000000..8b8ab07f
--- /dev/null
+++ b/test_query.py
@@ -0,0 +1,36 @@
+import json
+
+from swh.storage import get_storage
+import swh.web.common.graphql
+
+context = swh.web.common.graphql.Context(
+ storage=get_storage('remote', {
+ 'url': 'http://uffizi.internal.softwareheritage.org:5002/'}))
+res = swh.web.common.graphql.schema.execute(
+ '''
+ query {
+ origins(urls: ["https://github.com/SoftwareHeritage/swh-model"]) {
+ id
+ type
+ url
+ visits {
+ date
+ snapshot {
+ swh_pid
+ objects {
+ __typename
+ ... on Snapshot {
+ branches(names: ["HEAD", "refs/heads/master"]) {
+ name
+ target {
+ swh_pid
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }''',
+ context=context)
+print(json.dumps(dict(res.to_dict()), indent=4))