diff --git a/swh/graphql/resolvers/scalars.py b/swh/graphql/resolvers/scalars.py index 0bf757e..4a427bf 100644 --- a/swh/graphql/resolvers/scalars.py +++ b/swh/graphql/resolvers/scalars.py @@ -1,58 +1,63 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from datetime import datetime from ariadne import ScalarType from swh.graphql.errors import InvalidInputError from swh.graphql.utils import utils from swh.model import hashutil +from swh.model.exceptions import ValidationError from swh.model.model import TimestampWithTimezone from swh.model.swhids import CoreSWHID datetime_scalar = ScalarType("DateTime") swhid_scalar = ScalarType("SWHID") id_scalar = ScalarType("ID") content_hash_scalar = ScalarType("ContentHash") @id_scalar.serializer def serialize_id(value): if type(value) is bytes: return value.hex() return value @datetime_scalar.serializer def serialize_datetime(value): # FIXME, handle error and return None if type(value) == TimestampWithTimezone: value = value.to_datetime() if type(value) == datetime: return utils.get_formatted_date(value) return None @swhid_scalar.value_parser def validate_swhid(value): - return CoreSWHID.from_string(value) + try: + swhid = CoreSWHID.from_string(value) + except ValidationError as e: + raise InvalidInputError("Invalid SWHID", e) + return swhid @swhid_scalar.serializer def serialize_swhid(value): return str(value) @content_hash_scalar.value_parser def validate_content_hash(value): try: hash_type, hash_string = value.split(":") hash_value = hashutil.hash_to_bytes(hash_string) except ValueError as e: raise InvalidInputError("Invalid content checksum", e) if hash_type not in hashutil.ALGORITHMS: raise InvalidInputError("Invalid hash algorithm") return hash_type, hash_value diff --git a/swh/graphql/server.py b/swh/graphql/server.py index 7a92919..2e01e39 100644 --- a/swh/graphql/server.py +++ b/swh/graphql/server.py @@ -1,86 +1,86 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os from typing import Any, Dict, Optional from swh.core import config from swh.search import get_search as get_swh_search from swh.storage import get_storage as get_swh_storage graphql_cfg = None storage = None search = None def get_storage(): global storage if not storage: storage = get_swh_storage(**graphql_cfg["storage"]) return storage def get_search(): global search if not search: search = get_swh_search(**graphql_cfg["search"]) return search def load_and_check_config(config_path: Optional[str]) -> Dict[str, Any]: """Check the minimal configuration is set to run the api or raise an error explanation. Args: config_path: Path to the configuration file to load Raises: Error if the setup is not as expected Returns: configuration as a dict """ if not config_path: raise EnvironmentError("Configuration file must be defined") if not os.path.exists(config_path): raise FileNotFoundError(f"Configuration file {config_path} does not exist") cfg = config.read(config_path) if "storage" not in cfg: raise KeyError("Missing 'storage' configuration") return cfg def make_app_from_configfile(): """Loading the configuration from a configuration file. SWH_CONFIG_FILENAME environment variable defines the configuration path to load. """ from starlette.middleware.cors import CORSMiddleware from .app import schema - from .errors.handlers import format_error + from .errors import format_error global graphql_cfg if not graphql_cfg: config_path = os.environ.get("SWH_CONFIG_FILENAME") graphql_cfg = load_and_check_config(config_path) server_type = graphql_cfg.get("server-type") if server_type == "asgi": from ariadne.asgi import GraphQL application = CORSMiddleware( GraphQL(schema, debug=graphql_cfg["debug"], error_formatter=format_error), # FIXME, restrict origins after deploying the JS client allow_origins=["*"], allow_methods=("GET", "POST", "OPTIONS"), ) return application diff --git a/swh/graphql/tests/conftest.py b/swh/graphql/tests/conftest.py index 03e319b..b459f32 100644 --- a/swh/graphql/tests/conftest.py +++ b/swh/graphql/tests/conftest.py @@ -1,60 +1,65 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from ariadne import graphql_sync from flask import Flask, jsonify, request import pytest from swh.graphql import server as app_server from swh.graphql.app import schema +from swh.graphql.errors import format_error from swh.search import get_search as get_swh_search from swh.storage import get_storage as get_swh_storage from .data import populate_dummy_data, populate_search_data @pytest.fixture(scope="session") def storage(): storage = get_swh_storage(cls="memory") # set the global var to use the in-memory storage app_server.storage = storage # populate the in-memory storage populate_dummy_data(storage) return storage @pytest.fixture(scope="session") def search(): search = get_swh_search("memory") # set the global var to use the in-memory search app_server.search = search search.initialize() # populate the in-memory search populate_search_data(search) return search @pytest.fixture(scope="session") def test_app(storage, search): app = Flask(__name__) @app.route("/", methods=["POST"]) def graphql_server(): # GraphQL queries are always sent as POST data = request.get_json() success, result = graphql_sync( - schema, data, context_value=request, debug=app.debug + schema, + data, + context_value=request, + debug=app.debug, + error_formatter=format_error, ) status_code = 200 if success else 400 return jsonify(result), status_code yield app @pytest.fixture(scope="session") def client(test_app): with test_app.test_client() as client: yield client diff --git a/swh/graphql/tests/functional/test_content.py b/swh/graphql/tests/functional/test_content.py index 1de014d..30c2b48 100644 --- a/swh/graphql/tests/functional/test_content.py +++ b/swh/graphql/tests/functional/test_content.py @@ -1,163 +1,163 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import pytest from . import utils from ..data import get_contents @pytest.mark.parametrize("content", get_contents()) def test_get_content_with_swhid(client, content): query_str = """ { content(swhid: "%s") { swhid checksum { blake2s256 sha1 sha1_git sha256 } length status data { url } fileType { encoding } language { lang } license { licenses } } } """ data, _ = utils.get_query_response(client, query_str % content.swhid()) archive_url = "https://archive.softwareheritage.org/api/1/" response = { "swhid": str(content.swhid()), "checksum": { "blake2s256": content.blake2s256.hex(), "sha1": content.sha1.hex(), "sha1_git": content.sha1_git.hex(), "sha256": content.sha256.hex(), }, "length": content.length, "status": content.status, "data": { "url": f"{archive_url}content/sha1:{content.sha1.hex()}/raw/", }, "fileType": None, "language": None, "license": None, } assert data["content"] == response @pytest.mark.parametrize("content", get_contents()) def test_get_content_with_hash(client, content): query_str = """ { contentByHash(checksums: ["blake2s256:%s", "sha1:%s", "sha1_git:%s", "sha256:%s"]) { swhid } } """ data, _ = utils.get_query_response( client, query_str % ( content.blake2s256.hex(), content.sha1.hex(), content.sha1_git.hex(), content.sha256.hex(), ), ) assert data["contentByHash"] == {"swhid": str(content.swhid())} def test_get_content_with_invalid_swhid(client): query_str = """ { content(swhid: "swh:1:cnt:invalid") { swhid } } """ errors = utils.get_error_response(client, query_str) # API will throw an error in case of an invalid SWHID assert len(errors) == 1 - assert "Invalid SWHID: invalid syntax" in errors[0]["message"] + assert "Input error: Invalid SWHID" in errors[0]["message"] def test_get_content_with_invalid_hashes(client): content = get_contents()[0] query_str = """ { contentByHash(checksums: ["blake2s256:%s", "sha1:%s", "sha1_git:%s", "sha256:%s"]) { swhid } } """ errors = utils.get_error_response( client, query_str % ( "invalid", # Only one hash is invalid content.sha1.hex(), content.sha1_git.hex(), content.sha256.hex(), ), ) # API will throw an error in case of an invalid content hash assert len(errors) == 1 assert "Input error: Invalid content checksum" in errors[0]["message"] def test_get_content_with_invalid_hash_algorithm(client): content = get_contents()[0] query_str = """ { contentByHash(checksums: ["test:%s"]) { swhid } } """ errors = utils.get_error_response(client, query_str % content.sha1.hex()) assert len(errors) == 1 assert "Input error: Invalid hash algorithm" in errors[0]["message"] def test_get_content_as_target(client): # SWHID of a test dir with a file entry directory_swhid = "swh:1:dir:87b339104f7dc2a8163dec988445e3987995545f" query_str = """ { directory(swhid: "%s") { swhid entries(first: 2) { nodes { type target { ...on Content { swhid length } } } } } } """ data, _ = utils.get_query_response(client, query_str % directory_swhid) content_obj = data["directory"]["entries"]["nodes"][1]["target"] assert content_obj == { "length": 4, "swhid": "swh:1:cnt:86bc6b377e9d25f9d26777a4a28d08e63e7c5779", } diff --git a/swh/graphql/tests/functional/test_directory.py b/swh/graphql/tests/functional/test_directory.py index 494d73b..a07481b 100644 --- a/swh/graphql/tests/functional/test_directory.py +++ b/swh/graphql/tests/functional/test_directory.py @@ -1,74 +1,74 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import pytest from . import utils from ..data import get_directories @pytest.mark.parametrize("directory", get_directories()) def test_get_directory(client, directory): query_str = """ { directory(swhid: "%s") { swhid } } """ data, _ = utils.get_query_response(client, query_str % directory.swhid()) assert data["directory"] == {"swhid": str(directory.swhid())} def test_get_directory_with_invalid_swhid(client): query_str = """ { directory(swhid: "swh:1:dir:invalid") { swhid } } """ errors = utils.get_error_response(client, query_str) # API will throw an error in case of an invalid SWHID assert len(errors) == 1 - assert "Invalid SWHID: invalid syntax" in errors[0]["message"] + assert "Input error: Invalid SWHID" in errors[0]["message"] def test_get_revision_directory(client): query_str = """ { revision(swhid: "swh:1:rev:66c7c1cd9673275037140f2abff7b7b11fc9439c") { swhid directory { swhid } } } """ data, _ = utils.get_query_response(client, query_str) assert data["revision"]["directory"] == { "swhid": "swh:1:dir:0101010101010101010101010101010101010101" } def test_get_target_directory(client): # TargetDirectoryNode is returned from snapshotbranch, release # and directory entry nodes. Release node is used for testing here query_str = """ { release(swhid: "swh:1:rel:ee4d20e80af850cc0f417d25dc5073792c5010d2") { swhid target { ...on Directory { swhid } } } } """ data, _ = utils.get_query_response(client, query_str) assert data["release"]["target"] == { "swhid": "swh:1:dir:0505050505050505050505050505050505050505" } diff --git a/swh/graphql/tests/functional/test_revision.py b/swh/graphql/tests/functional/test_revision.py index 1c72970..6ad3eba 100644 --- a/swh/graphql/tests/functional/test_revision.py +++ b/swh/graphql/tests/functional/test_revision.py @@ -1,160 +1,160 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import pytest from swh.model.swhids import CoreSWHID from . import utils from ..data import get_revisions, get_revisions_with_parents @pytest.mark.parametrize("revision", get_revisions()) def test_get_revision(client, revision): query_str = """ { revision(swhid: "%s") { swhid message { text } author { fullname { text } name { text } email { text } } committer { fullname { text } name { text } email { text } } date type directory { swhid } } } """ data, _ = utils.get_query_response(client, query_str % revision.swhid()) assert data["revision"] == { "swhid": str(revision.swhid()), "message": {"text": revision.message.decode()}, "author": { "fullname": {"text": revision.author.fullname.decode()}, "name": {"text": revision.author.name.decode()}, "email": {"text": revision.author.email.decode()}, }, "committer": { "fullname": {"text": revision.committer.fullname.decode()}, "name": {"text": revision.committer.name.decode()}, "email": {"text": revision.committer.email.decode()}, }, "date": revision.date.to_datetime().isoformat(), "type": revision.type.value, "directory": { "swhid": str(CoreSWHID(object_id=revision.directory, object_type="dir")) }, } def test_get_revision_with_invalid_swhid(client): query_str = """ { revision(swhid: "swh:1:cnt:invalid") { swhid } } """ errors = utils.get_error_response(client, query_str) # API will throw an error in case of an invalid SWHID assert len(errors) == 1 - assert "Invalid SWHID: invalid syntax" in errors[0]["message"] + assert "Input error: Invalid SWHID" in errors[0]["message"] def test_get_revision_as_target(client): # SWHID of a snapshot with revision as target snapshot_swhid = "swh:1:snp:9e78d7105c5e0f886487511e2a92377b4ee4c32a" query_str = """ { snapshot(swhid: "%s") { branches(first: 1, types: [revision]) { nodes { type target { ...on Revision { swhid } } } } } } """ data, _ = utils.get_query_response(client, query_str % snapshot_swhid) revision_obj = data["snapshot"]["branches"]["nodes"][0]["target"] assert revision_obj == { "swhid": "swh:1:rev:66c7c1cd9673275037140f2abff7b7b11fc9439c" } def test_get_revision_log(client): revision_swhid = get_revisions_with_parents()[0].swhid() query_str = """ { revision(swhid: "%s") { swhid revisionLog(first: 3) { nodes { swhid } } } } """ data, _ = utils.get_query_response(client, query_str % revision_swhid) assert data["revision"]["revisionLog"] == { "nodes": [ {"swhid": str(revision_swhid)}, {"swhid": str(get_revisions()[0].swhid())}, {"swhid": str(get_revisions()[1].swhid())}, ] } def test_get_revision_parents(client): revision_swhid = get_revisions_with_parents()[0].swhid() query_str = """ { revision(swhid: "%s") { swhid parents { nodes { swhid } } } } """ data, _ = utils.get_query_response(client, query_str % revision_swhid) assert data["revision"]["parents"] == { "nodes": [ {"swhid": str(get_revisions()[0].swhid())}, {"swhid": str(get_revisions()[1].swhid())}, ] } diff --git a/swh/graphql/tests/functional/test_snapshot_node.py b/swh/graphql/tests/functional/test_snapshot_node.py index 2950486..7f9c2d8 100644 --- a/swh/graphql/tests/functional/test_snapshot_node.py +++ b/swh/graphql/tests/functional/test_snapshot_node.py @@ -1,57 +1,57 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import pytest from ..data import get_snapshots from .utils import assert_missing_object, get_error_response, get_query_response @pytest.mark.parametrize("snapshot", get_snapshots()) def test_get_snapshot(client, snapshot): query_str = """ { snapshot(swhid: "%s") { id swhid branches(first:5) { nodes { type name { text } } } } } """ data, _ = get_query_response(client, query_str % snapshot.swhid()) assert data["snapshot"]["swhid"] == str(snapshot.swhid()) assert data["snapshot"]["id"] == snapshot.id.hex() assert len(data["snapshot"]["branches"]["nodes"]) == len(snapshot.branches) def test_get_snapshot_missing_swhid(client): query_str = """ { snapshot(swhid: "swh:1:snp:0949d7a8c96347dba09be8d79085b8207f345412") { swhid } } """ assert_missing_object(client, query_str, "snapshot") def test_get_snapshot_invalid_swhid(client): query_str = """ { snapshot(swhid: "swh:1:snp:invalid") { swhid } } """ errors = get_error_response(client, query_str) assert len(errors) == 1 - assert "Invalid SWHID: invalid syntax" in errors[0]["message"] + assert "Input error: Invalid SWHID" in errors[0]["message"] diff --git a/swh/graphql/tests/functional/test_swhid_resolve.py b/swh/graphql/tests/functional/test_swhid_resolve.py index 9b0c707..72452b4 100644 --- a/swh/graphql/tests/functional/test_swhid_resolve.py +++ b/swh/graphql/tests/functional/test_swhid_resolve.py @@ -1,221 +1,221 @@ # Copyright (C) 2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import pytest from . import utils from ..data import ( get_contents, get_directories, get_releases, get_revisions, get_snapshots, ) def test_invalid_swhid(client): query_str = """ { resolveSwhid(swhid: "swh:1:dir:dae0d245988b472abd30a4f968b919d0019b6c7") { nodes { type } } } """ errors = utils.get_error_response(client, query_str) # API will throw an error in case of an invalid SWHID assert len(errors) == 1 - assert "Invalid SWHID: invalid syntax" in errors[0]["message"] + assert "Input error: Invalid SWHID" in errors[0]["message"] @pytest.mark.parametrize( "swhid", [ "swh:1:rel:0949d7a8c96347dba09be8d79085b8207f345412", "swh:1:rev:0949d7a8c96347dba09be8d79085b8207f345412", "swh:1:dir:0949d7a8c96347dba09be8d79085b8207f345412", "swh:1:cnt:0949d7a8c96347dba09be8d79085b8207f345412", "swh:1:snp:0949d7a8c96347dba09be8d79085b8207f345412", ], ) def test_missing_swhid(client, swhid): query_str = """ { resolveSwhid(swhid: "%s") { nodes { type } } } """ data, _ = utils.get_query_response(client, query_str % swhid) # API will return an empty list in case of a valid, non existing SWHID assert data == {"resolveSwhid": {"nodes": []}} @pytest.mark.parametrize("snapshot", get_snapshots()) def test_snapshot_swhid_resolve(client, snapshot): query_str = """ { resolveSwhid(swhid: "%s") { nodes { type target { __typename ... on Snapshot { swhid } } } } } """ data, _ = utils.get_query_response(client, query_str % snapshot.swhid()) assert data == { "resolveSwhid": { "nodes": [ { "target": { "__typename": "Snapshot", "swhid": str(snapshot.swhid()), }, "type": "snapshot", } ] } } @pytest.mark.parametrize("revision", get_revisions()) def test_revision_swhid_resolve(client, revision): query_str = """ { resolveSwhid(swhid: "%s") { nodes { type target { __typename ... on Revision { swhid } } } } } """ data, _ = utils.get_query_response(client, query_str % revision.swhid()) assert data == { "resolveSwhid": { "nodes": [ { "target": { "__typename": "Revision", "swhid": str(revision.swhid()), }, "type": "revision", } ] } } @pytest.mark.parametrize("release", get_releases()) def test_release_swhid_resolve(client, release): query_str = """ { resolveSwhid(swhid: "%s") { nodes { type target { __typename ... on Release { swhid } } } } } """ data, _ = utils.get_query_response(client, query_str % release.swhid()) assert data == { "resolveSwhid": { "nodes": [ { "target": { "__typename": "Release", "swhid": str(release.swhid()), }, "type": "release", } ] } } @pytest.mark.parametrize("directory", get_directories()) def test_directory_swhid_resolve(client, directory): query_str = """ { resolveSwhid(swhid: "%s") { nodes { type target { __typename ... on Directory { swhid } } } } } """ data, _ = utils.get_query_response(client, query_str % directory.swhid()) assert data == { "resolveSwhid": { "nodes": [ { "target": { "__typename": "Directory", "swhid": str(directory.swhid()), }, "type": "directory", } ] } } @pytest.mark.parametrize("content", get_contents()) def test_content_swhid_resolve(client, content): query_str = """ { resolveSwhid(swhid: "%s") { nodes { type target { __typename ... on Content { swhid } } } } } """ data, _ = utils.get_query_response(client, query_str % content.swhid()) assert data == { "resolveSwhid": { "nodes": [ { "target": { "__typename": "Content", "swhid": str(content.swhid()), }, "type": "content", } ] } }