Page MenuHomeSoftware Heritage

D4552.diff
No OneTemporary

D4552.diff

diff --git a/requirements.txt b/requirements.txt
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,4 +9,5 @@
numpy
dash
dash_bootstrap_components
+flask
dulwich
diff --git a/swh/scanner/backend.py b/swh/scanner/backend.py
new file mode 100644
--- /dev/null
+++ b/swh/scanner/backend.py
@@ -0,0 +1,41 @@
+# Copyright (C) 2020 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from flask import Flask, request
+
+from .db import Db
+from .exceptions import LargePayloadExc
+
+LIMIT = 1000
+
+
+def create_app(db: Db):
+ """Backend for swh-scanner, implementing the /known endpoint of the
+ Software Heritage Web API"""
+ app = Flask(__name__)
+
+ @app.route("/api/1/known/", methods=["POST"])
+ def known():
+ swhids = request.get_json()
+
+ if len(swhids) > LIMIT:
+ raise LargePayloadExc(
+ f"The maximum number of SWHIDs this endpoint can receive is {LIMIT}"
+ )
+
+ cur = db.conn.cursor()
+ res = {swhid: {"known": db.known(swhid, cur)} for swhid in swhids}
+ cur.close()
+
+ return res
+
+ return app
+
+
+def run(host: str, port: int, db: Db):
+ """Serve the local database
+ """
+ app = create_app(db)
+ app.run(host, port, debug=True)
diff --git a/swh/scanner/cli.py b/swh/scanner/cli.py
--- a/swh/scanner/cli.py
+++ b/swh/scanner/cli.py
@@ -18,6 +18,9 @@
from .exceptions import DBError
+# Config for the "serve" option
+BACKEND_DEFAULT_PORT = 5011
+
# All generic config code should reside in swh.core.config
CONFIG_ENVVAR = "SWH_CONFIG_FILE"
DEFAULT_CONFIG_PATH = os.path.join(click.get_app_dir("swh"), "global.yml")
@@ -191,6 +194,46 @@
sys.exit(1)
+@db.command("serve")
+@click.option(
+ "-h",
+ "--host",
+ metavar="HOST",
+ default="127.0.0.1",
+ show_default=True,
+ help="The host of the API server",
+)
+@click.option(
+ "-p",
+ "--port",
+ metavar="PORT",
+ default=f"{BACKEND_DEFAULT_PORT}",
+ show_default=True,
+ help="The port of the API server",
+)
+@click.option(
+ "-f",
+ "--db-file",
+ "db_file",
+ metavar="DB_FILE",
+ default="SWHID_DB.sqlite",
+ show_default=True,
+ type=click.Path(exists=True),
+ help="An sqlite database file (it can be generated with: 'swh scanner db import')",
+)
+@click.pass_context
+def serve(ctx, host, port, db_file):
+ """Start an API service using the sqlite database generated with the "db import"
+ option."""
+ import swh.scanner.backend as backend
+
+ from .db import Db
+
+ db = Db(db_file)
+ backend.run(host, port, db)
+ db.close()
+
+
def main():
return scanner(auto_envvar_prefix="SWH_SCANNER")
diff --git a/swh/scanner/db.py b/swh/scanner/db.py
--- a/swh/scanner/db.py
+++ b/swh/scanner/db.py
@@ -64,6 +64,5 @@
"""Check if a given SWHID is present or not inside the local database."""
cur.execute("""SELECT 1 FROM swhids WHERE swhid=?""", (swhid,))
res = cur.fetchone()
- cur.close()
return res is not None
diff --git a/swh/scanner/exceptions.py b/swh/scanner/exceptions.py
--- a/swh/scanner/exceptions.py
+++ b/swh/scanner/exceptions.py
@@ -12,6 +12,10 @@
pass
+class LargePayloadExc(Exception):
+ pass
+
+
class DBError(Exception):
pass
diff --git a/swh/scanner/tests/test_backend.py b/swh/scanner/tests/test_backend.py
new file mode 100644
--- /dev/null
+++ b/swh/scanner/tests/test_backend.py
@@ -0,0 +1,61 @@
+# Copyright (C) 2020 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from swh.scanner.backend import LIMIT, create_app
+from swh.scanner.db import Db
+
+from .data import present_swhids
+
+
+def test_backend_endpoint_all_present(tmp_path, live_server, test_swhids_sample):
+ tmp_dbfile = tmp_path / "tmp_db.sqlite"
+ db = Db(tmp_dbfile)
+ cur = db.conn.cursor()
+ db.create_from(test_swhids_sample, LIMIT, cur)
+
+ app = create_app(db)
+
+ with app.test_client() as test_client:
+ res = test_client.post("/api/1/known/", json=present_swhids)
+
+ for swhid, attr in res.json.items():
+ assert attr["known"]
+
+
+def test_backend_endpoint_one_not_present(tmp_path, live_server, test_swhids_sample):
+ tmp_dbfile = tmp_path / "tmp_db.sqlite"
+ not_present_swhid = "swh:1:cnt:fa8eacf43d8646129ae8adfa1648f9307d999999"
+ swhids = present_swhids + [not_present_swhid]
+
+ db = Db(tmp_dbfile)
+ cur = db.conn.cursor()
+ db.create_from(test_swhids_sample, LIMIT, cur)
+
+ app = create_app(db)
+
+ with app.test_client() as test_client:
+ res = test_client.post("/api/1/known/", json=swhids)
+
+ for swhid, attr in res.json.items():
+ if swhid != not_present_swhid:
+ assert attr["known"]
+ else:
+ assert not attr["known"]
+
+
+def test_backend_large_payload_exc(tmp_path, live_server, test_swhids_sample):
+ tmp_dbfile = tmp_path / "tmp_db.sqlite"
+ swhid = "swh:1:cnt:fa8eacf43d8646129ae8adfa1648f9307d999999"
+ # the backend supports up to 1000 SWHID requests
+ swhids = [swhid for n in range(1001)]
+ db = Db(tmp_dbfile)
+ cur = db.conn.cursor()
+ db.create_from(test_swhids_sample, LIMIT, cur)
+
+ app = create_app(db)
+
+ with app.test_client() as test_client:
+ res = test_client.post("/api/1/known/", json=swhids)
+ assert res.status_code != 200

File Metadata

Mime Type
text/plain
Expires
Mar 17 2025, 6:38 PM (7 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3223733

Event Timeline