commit 4f000e884c3010c2bb04bbb516aaf0ba234ccf4f
Author: Valentin Lorentz <vlorentz@softwareheritage.org>
Date:   Fri Jun 28 14:57:57 2019 +0200

    [WIP] graphql API

diff --git a/requirements.txt b/requirements.txt
index 8767b06b..4802cbb3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -18,6 +18,7 @@ python-dateutil
 pyyaml
 requests
 python-memcached
+graphene
 
 # Doc dependencies
 sphinx
diff --git a/swh/web/api/utils.py b/swh/web/api/utils.py
index d4f2d49b..c337ebee 100644
--- a/swh/web/api/utils.py
+++ b/swh/web/api/utils.py
@@ -159,6 +159,13 @@ def enrich_content(content, top_url=False, query_string=None):
     return content
 
 
+REVISION_FIELDS = [
+    'author', 'date', 'committer', 'committer_date',
+    'directory', 'id', 'merge', 'message', 'parents',
+    'type',
+]
+
+
 def enrich_revision(revision):
     """Enrich revision with links where it makes sense (directory, parents).
     Keep track of the navigation breadcrumbs if they are specified.
diff --git a/swh/web/api/views/revision.py b/swh/web/api/views/revision.py
index a36199ee..8f786722 100644
--- a/swh/web/api/views/revision.py
+++ b/swh/web/api/views/revision.py
@@ -11,7 +11,7 @@
 from swh.web.api import utils
 from swh.web.api.apidoc import api_doc, format_docstring
 from swh.web.api.apiurls import api_route
-from swh.web.api.views.utils import api_lookup
+from swh.web.api.views.utils import api_lookup, graph_api_lookup
 
 
 DOC_RETURN_REVISION = '''
@@ -258,8 +258,22 @@ def api_revision_with_origin(request, origin_id,
 
             :swh_web_api:`revision/origin/13706355/branch/refs/heads/2.7/`
     """ # noqa
-    return api_lookup(
-        service.lookup_revision_by, int(origin_id), branch_name, ts,
+    query = '''
+        query {
+            origins(ids: [$origin_id]) {
+                latest_visit(require_snapshot: true) {
+                    snapshot {
+                        snapshots {
+                            branch(name: $branch_name) {
+                                target {
+                                    revision {
+                                        %s
+        }}}}}}}}''' % utils.REVISION_FIELDS
+    variables = {'origin_id': origin_id, 'branch_name': branch_name}
+    return graph_api_lookup(
+        query, variables, [
+            'origins', 'latest_visit', 'snapshot', 'snapshots', 0, 'branch',
+            'target', 'revision'],
         notfound_msg=('Revision with (origin_id: {}, branch_name: {}'
                       ', ts: {}) not found.'.format(origin_id,
                                                     branch_name, ts)),
diff --git a/swh/web/api/views/utils.py b/swh/web/api/views/utils.py
index acba8ae2..51c2047a 100644
--- a/swh/web/api/views/utils.py
+++ b/swh/web/api/views/utils.py
@@ -50,6 +50,17 @@ def api_lookup(lookup_fn, *args,
     return enrich_fn(res)
 
 
+def graph_api_lookup(query, variables, path, *, notfound_msg, enrich_fn):
+    from swh.web.common.graphql import execute
+    result = execute(query, variables)
+    for part in path:
+        try:
+            result = result[part]
+        except KeyError:
+            raise NotFoundExc(notfound_msg)
+    return enrich_fn(result)
+
+
 @api_view(['GET', 'HEAD'])
 def api_home(request):
     return Response({}, template_name='api/api.html')
diff --git a/swh/web/common/graphql.py b/swh/web/common/graphql.py
new file mode 100644
index 00000000..781d5846
--- /dev/null
+++ b/swh/web/common/graphql.py
@@ -0,0 +1,385 @@
+# Copyright (C) 2019  The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU Affero General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from graphql.language import ast
+from collections import namedtuple
+
+from graphene import (
+    # root types
+    Schema, Field, Argument, Scalar, ObjectType, Interface,
+    # collections
+    List, Enum,
+    # scalars
+    String, Int, Boolean, JSONString, DateTime
+)
+from graphene import NonNull as NN
+
+from swh.core.utils import encode_with_unescape, decode_with_escape
+from swh.model.hashutil import hash_to_hex, hash_to_bytes
+from swh.model.identifiers import PersistentId, parse_persistent_identifier
+from swh.storage.algos.snapshot import snapshot_get_all_branches
+
+from swh.web import config
+
+
+Context = namedtuple('Context', 'storage')
+
+
+class Bytestring(Scalar):
+    @staticmethod
+    def serialize(bytes_):
+        return hash_to_hex(bytes_)
+
+    @staticmethod
+    def parse_literal(node):
+        if isinstance(node, ast.StringValue):
+            return hash_to_bytes(node.value)
+
+    @staticmethod
+    def parse_value(str_):
+        return hash_to_bytes(str_)
+
+
+class AlmostUtf8Bytestring(Scalar):
+    """A byte string that we allow ourselves to format as ascii data with
+    backslash escapes to be more human-readable"""
+
+    @staticmethod
+    def serialize(bytes_):
+        return decode_with_escape(bytes_)
+
+    @staticmethod
+    def parse_literal(node):
+        if isinstance(node, ast.StringValue):
+            return encode_with_unescape(node.value)
+
+    @staticmethod
+    def parse_value(str_):
+        return encode_with_unescape(str_)
+
+
+class HashId(ObjectType):
+    sha1_git = Field(NN(Bytestring))
+
+
+class Person(ObjectType):
+    fullname = Field(Bytestring)
+
+
+class Timestamp(ObjectType):
+    seconds = Field(NN(Int))
+    microseconds = Field(NN(Int))
+
+
+class TimestampWithTimezone(ObjectType):
+    timestamp = Field(NN(Timestamp))
+    offset = Field(NN(Int))
+    negative_utc = Field(NN(Boolean))
+
+
+class HashedObject(Interface):
+    id = Field(NN(HashId))
+
+
+class SwhObjectType(Enum):
+    SNAPSHOT = 'snapshot'
+    RELEASE = 'release'
+    REVISION = 'revision'
+    DIRECTORY = 'directory'
+    CONTENT = 'content'
+
+
+class SwhId(ObjectType):
+    swh_pid = Field(NN(String))
+
+    namespace = Field(NN(String), default_value='swh')
+    scheme_version = Field(NN(String), default_value='1')
+    object_type = Field(NN(SwhObjectType))
+    object_id = Field(NN(Bytestring))
+
+    objects = Field(NN(List(HashedObject)))
+
+    contents = Field(NN(List(lambda: Content)))
+    directories = Field(NN(List(lambda: Directory)))
+    revisions = Field(NN(List(lambda: Revision)))
+    releases = Field(NN(List(lambda: Release)))
+    snapshot = Field(NN(List(lambda: Snapshot)))
+
+    def resolve_swh_pid(parent, info):
+        assert isinstance(parent.object_type.value, str), parent.object_type
+        pid = PersistentId(
+            parent.namespace, parent.scheme_version,
+            parent.object_type.value, parent.object_id)
+        return str(pid)
+
+    def resolve_objects(parent, info):
+        if parent.object_type == SwhObjectType.SNAPSHOT:
+            return parent.resolve_snapshots(parent, info)
+
+    def resolve_revisions(parent, info):
+        revision = info.context.storage.revision_get(parent.object_id)
+        return [Revision(**revision)]
+
+    def resolve_snapshots(parent, info):
+        snapshot = snapshot_get_all_branches(
+            info.context.storage, parent.object_id)
+        branches = [
+            SnapshotBranch(
+                name=branch_name,
+                target=SwhId(
+                    object_type=SwhObjectType.get(branch['target_type']),
+                    object_id=branch['target']))
+            for (branch_name, branch) in snapshot['branches'].items()]
+        return [Snapshot(id=parent.object_id, branches=branches)]
+
+
+class ContentStatus(Enum):
+    VISIBLE = 'visible'
+    ABSENT = 'absent'
+    HIDDEN = 'hidden'
+
+
+class Content(ObjectType):
+    class Meta:
+        interfaces = (HashedObject,)
+    sha1 = Field(Bytestring)
+    sha256 = Field(Bytestring)
+    blake2s256 = Field(Bytestring)
+
+    length = Field(NN(Int))
+    status = Field(NN(ContentStatus))
+    reason = Field(String)
+    data = Field(Bytestring)
+
+
+class DirectoryEntry(ObjectType):
+    name = Field(NN(Bytestring))
+    target = Field(NN(SwhId))
+    perms = Field(NN(Int))
+
+
+class Directory(ObjectType):
+    class Meta:
+        interfaces = (HashedObject,)
+    entries = Field(NN(List(NN(DirectoryEntry))))
+
+    entry = Field(
+        SwhId,
+        path=NN(AlmostUtf8Bytestring),
+    )
+
+    def resolve_entry(parent, info, path):
+        entry = info.context.storage.directory_entry_get_by_path(parent.id)
+        if entry:
+            # TODO: cache the other items of 'entry'
+            object_type = entry['type']
+            if object_type == 'file':
+                object_type = 'content'
+            return SwhId(object_type=object_type, object_id=entry['target'])
+
+
+class RevisionType(Enum):
+    GIT = 'git'
+    TAR = 'tar'
+    DSC = 'dsc'
+    SUBVERSION = 'svn'
+    MERCURIAL = 'hg'
+
+
+class Revision(ObjectType):
+    """Foo"""
+    class Meta:
+        interfaces = (HashedObject,)
+    message = Field(NN(AlmostUtf8Bytestring))
+    author = Field(NN(Person))
+    committer = Field(NN(Person))
+    author_date = Field(NN(TimestampWithTimezone))
+    committer_date = Field(NN(TimestampWithTimezone))
+    type = Field(NN(RevisionType))
+    directory = Field(NN(SwhId))
+    synthetic = Field(NN(Boolean))
+    metadata = Field(JSONString)
+    parents = Field(NN(List(NN(SwhId))))
+
+    log = Field(
+        NN(List(NN(SwhId))),
+        first=Argument(Int))
+
+    def resolve_log(parent, info, first=10):
+        """bar"""
+        shortlog = info.context.storage.revision_shortlog(
+            [parent.id.sha1], limit=first)
+        return [
+            SwhId(object_type='revision', object_id=rev_id)
+            for (rev_id, parents) in shortlog]
+
+    merge = Field(NN(Boolean))
+
+    def resolve_merge(parent, info):
+        return len(parent.parents) > 1
+
+
+class Release(ObjectType):
+    class Meta:
+        interfaces = (HashedObject,)
+    name = Field(NN(Bytestring))
+    message = Field(NN(Bytestring))
+    target = Field(HashedObject)
+    synthetic = Field(NN(Boolean))
+    author = Field(Person)
+    date = Field(TimestampWithTimezone)
+
+
+class SnapshotBranch(ObjectType):
+    name = Field(NN(AlmostUtf8Bytestring))
+    # lambda to break recursion:
+    target = Field(NN(SwhId))
+
+
+class Snapshot(ObjectType):
+    class Meta:
+        interfaces = (HashedObject,)
+
+    branches = Field(
+        NN(List(NN(SnapshotBranch))),
+        names=List(NN(AlmostUtf8Bytestring)))
+
+    def resolve_branches(parent, info, names=None):
+        branches = parent.branches
+        if names:
+            branches = [
+                branch for branch in branches
+                if branch.name in names]
+        return branches
+
+    branch = Field(
+        SnapshotBranch,
+        name=NN(AlmostUtf8Bytestring))
+
+    def resolve_branch(parent, info, name):
+        branches = [
+            branch for branch in parent.branches
+            if branch.name == name]
+        if branches:
+            return branches[0]
+
+
+class VisitStatus(Enum):
+    ONGOING = 'ongoing'
+    FULL = 'full'
+    PARTIAL = 'partial'
+
+
+class OriginVisit(ObjectType):
+    id = Field(NN(Int))
+    type = Field(NN(String))
+    origin = Field(NN(lambda: Origin))
+    date = Field(NN(DateTime))
+    status = Field(VisitStatus)
+    metadata = Field(JSONString)
+    snapshot = Field(SwhId)
+
+    @classmethod
+    def from_dict(cls, visit):
+        visit['id'] = visit.pop('visit')
+        if visit.get('snapshot'):
+            visit['snapshot'] = SwhId(
+                object_type=SwhObjectType.SNAPSHOT,
+                object_id=visit.pop('snapshot'))
+        return cls(**visit)
+
+
+class Origin(ObjectType):
+    id = Field(Int)
+    url = Field(NN(String))
+    type = Field(NN(String))
+
+    visits = Field(
+        NN(List(NN(OriginVisit))),
+        after=Argument(DateTime),
+        first=Argument(Int),
+    )
+
+    def resolve_visits(parent, info, after=None, first=10):
+        # TODO: pagination
+        # TODO: parent.url instead of parent.id
+        visits = info.context.storage.origin_visit_get(
+            parent.id, after=after, limit=first)
+        return [OriginVisit.from_dict(visit) for visit in visits]
+
+    latest_visit = Field(
+        NN(List(NN(OriginVisit))),
+        require_snapshot=Argument(Boolean),
+        last=Argument(Int),
+    )
+
+    def resolve_latest_visit(parent, info, require_snapshot=False):
+        visit = info.context.storage.origin_visit_get_latest(
+            parent.url, require_snapshot=require_snapshot)
+        return OriginVisit.from_dict(visit)
+
+
+class RootQuery(ObjectType):
+    origins = Field(
+        NN(List(NN(Origin))),
+        urls=Argument(List(NN(String))),
+        ids=List(NN(Int)),
+    )
+
+    def resolve_origins(parent, info, urls=None, ids=None):
+        origins = []
+        if ids:
+            origins.extend(info.context.storage.origin_get(
+                [{'id': id_} for id_ in ids]))
+        if urls:
+            origins.extend(info.context.storage.origin_get(
+                [{'url': url} for url in urls]))
+
+        return [Origin(**origin) for origin in origins]
+
+    pids = Field(
+        NN(List(NN(SwhId))),
+        ids=NN(List(NN(String))),
+    )
+
+    def resolve_pids(parent, info, ids):
+        ids = [parse_persistent_identifier(id_) for id_ in ids]
+        if any(id_.namespace != 'swh' for id_ in ids):
+            raise ValueError(
+                'Only "swh:" namespace is supported in PIDs.')
+        if any(id_.scheme_version != '1' for id_ in ids):
+            raise ValueError(
+                'Only "1" scheme version is supported in SWH PIDs')
+        return [
+            SwhId(object_type=id_.object_type,
+                  object_id=id_.object_id)
+            for id_ in ids]
+
+    contents_by_hashes = Field(
+        NN(List(NN(SwhId))),
+        algo=NN(String),
+        hashes=NN(List(NN(Bytestring))),
+    )
+
+    def resolve_contents_by_hashes(parent, info, algo, hashes):
+        results = []
+        for hash_ in hashes:
+            res = info.context.storage.content_find({algo: hash_})
+            if res:
+                results.append(Content(**res))
+        return results
+
+
+schema = Schema(
+    query=RootQuery,
+    auto_camelcase=False,
+    types=[
+        Content, Directory, Revision, Release, Snapshot,
+        OriginVisit, Origin])
+
+
+def execute(query, variables={}):
+    context = Context(storage=config.storage())
+    return schema.execute(query, variables=variables, context=context)
diff --git a/test_query.py b/test_query.py
new file mode 100644
index 00000000..8b8ab07f
--- /dev/null
+++ b/test_query.py
@@ -0,0 +1,36 @@
+import json
+
+from swh.storage import get_storage
+import swh.web.common.graphql
+
+context = swh.web.common.graphql.Context(
+    storage=get_storage('remote', {
+        'url': 'http://uffizi.internal.softwareheritage.org:5002/'}))
+res = swh.web.common.graphql.schema.execute(
+    '''
+    query {
+        origins(urls: ["https://github.com/SoftwareHeritage/swh-model"]) {
+            id
+            type
+            url
+            visits {
+                date
+                snapshot {
+                    swh_pid
+                    objects {
+                        __typename
+                        ... on Snapshot {
+                            branches(names: ["HEAD", "refs/heads/master"]) {
+                                name
+                                target {
+                                    swh_pid
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }''',
+    context=context)
+print(json.dumps(dict(res.to_dict()), indent=4))