diff --git a/requirements.txt b/requirements.txt --- a/requirements.txt +++ b/requirements.txt @@ -9,4 +9,5 @@ numpy dash dash_bootstrap_components +flask dulwich diff --git a/swh/scanner/backend.py b/swh/scanner/backend.py new file mode 100644 --- /dev/null +++ b/swh/scanner/backend.py @@ -0,0 +1,41 @@ +# Copyright (C) 2020 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from flask import Flask, request + +from .db import Db +from .exceptions import LargePayloadExc + +LIMIT = 1000 + + +def create_app(db: Db): + """Backend for swh-scanner, implementing the /known endpoint of the + Software Heritage Web API""" + app = Flask(__name__) + + @app.route("/api/1/known/", methods=["POST"]) + def known(): + swhids = request.get_json() + + if len(swhids) > LIMIT: + raise LargePayloadExc( + f"The maximum number of SWHIDs this endpoint can receive is {LIMIT}" + ) + + cur = db.conn.cursor() + res = {swhid: {"known": db.known(swhid, cur)} for swhid in swhids} + cur.close() + + return res + + return app + + +def run(host: str, port: int, db: Db): + """Serve the local database + """ + app = create_app(db) + app.run(host, port, debug=True) diff --git a/swh/scanner/cli.py b/swh/scanner/cli.py --- a/swh/scanner/cli.py +++ b/swh/scanner/cli.py @@ -18,6 +18,9 @@ from .exceptions import DBError +# Config for the "serve" option +BACKEND_DEFAULT_PORT = 5011 + # All generic config code should reside in swh.core.config CONFIG_ENVVAR = "SWH_CONFIG_FILE" DEFAULT_CONFIG_PATH = os.path.join(click.get_app_dir("swh"), "global.yml") @@ -191,6 +194,46 @@ sys.exit(1) +@db.command("serve") +@click.option( + "-h", + "--host", + metavar="HOST", + default="127.0.0.1", + show_default=True, + help="The host of the API server", +) +@click.option( + "-p", + "--port", + metavar="PORT", + default=f"{BACKEND_DEFAULT_PORT}", + show_default=True, + help="The port of the API server", +) +@click.option( + "-f", + "--db-file", + "db_file", + metavar="DB_FILE", + default="SWHID_DB.sqlite", + show_default=True, + type=click.Path(exists=True), + help="An sqlite database file (it can be generated with: 'swh scanner db import')", +) +@click.pass_context +def serve(ctx, host, port, db_file): + """Start an API service using the sqlite database generated with the "db import" + option.""" + import swh.scanner.backend as backend + + from .db import Db + + db = Db(db_file) + backend.run(host, port, db) + db.close() + + def main(): return scanner(auto_envvar_prefix="SWH_SCANNER") diff --git a/swh/scanner/db.py b/swh/scanner/db.py --- a/swh/scanner/db.py +++ b/swh/scanner/db.py @@ -64,6 +64,5 @@ """Check if a given SWHID is present or not inside the local database.""" cur.execute("""SELECT 1 FROM swhids WHERE swhid=?""", (swhid,)) res = cur.fetchone() - cur.close() return res is not None diff --git a/swh/scanner/exceptions.py b/swh/scanner/exceptions.py --- a/swh/scanner/exceptions.py +++ b/swh/scanner/exceptions.py @@ -12,6 +12,10 @@ pass +class LargePayloadExc(Exception): + pass + + class DBError(Exception): pass diff --git a/swh/scanner/tests/test_backend.py b/swh/scanner/tests/test_backend.py new file mode 100644 --- /dev/null +++ b/swh/scanner/tests/test_backend.py @@ -0,0 +1,61 @@ +# Copyright (C) 2020 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from swh.scanner.backend import LIMIT, create_app +from swh.scanner.db import Db + +from .data import present_swhids + + +def test_backend_endpoint_all_present(tmp_path, live_server, test_swhids_sample): + tmp_dbfile = tmp_path / "tmp_db.sqlite" + db = Db(tmp_dbfile) + cur = db.conn.cursor() + db.create_from(test_swhids_sample, LIMIT, cur) + + app = create_app(db) + + with app.test_client() as test_client: + res = test_client.post("/api/1/known/", json=present_swhids) + + for swhid, attr in res.json.items(): + assert attr["known"] + + +def test_backend_endpoint_one_not_present(tmp_path, live_server, test_swhids_sample): + tmp_dbfile = tmp_path / "tmp_db.sqlite" + not_present_swhid = "swh:1:cnt:fa8eacf43d8646129ae8adfa1648f9307d999999" + swhids = present_swhids + [not_present_swhid] + + db = Db(tmp_dbfile) + cur = db.conn.cursor() + db.create_from(test_swhids_sample, LIMIT, cur) + + app = create_app(db) + + with app.test_client() as test_client: + res = test_client.post("/api/1/known/", json=swhids) + + for swhid, attr in res.json.items(): + if swhid != not_present_swhid: + assert attr["known"] + else: + assert not attr["known"] + + +def test_backend_large_payload_exc(tmp_path, live_server, test_swhids_sample): + tmp_dbfile = tmp_path / "tmp_db.sqlite" + swhid = "swh:1:cnt:fa8eacf43d8646129ae8adfa1648f9307d999999" + # the backend supports up to 1000 SWHID requests + swhids = [swhid for n in range(1001)] + db = Db(tmp_dbfile) + cur = db.conn.cursor() + db.create_from(test_swhids_sample, LIMIT, cur) + + app = create_app(db) + + with app.test_client() as test_client: + res = test_client.post("/api/1/known/", json=swhids) + assert res.status_code != 200