Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7342999
D4552.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
5 KB
Subscribers
None
D4552.diff
View Options
diff --git a/requirements.txt b/requirements.txt
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,4 +9,5 @@
numpy
dash
dash_bootstrap_components
+flask
dulwich
diff --git a/swh/scanner/backend.py b/swh/scanner/backend.py
new file mode 100644
--- /dev/null
+++ b/swh/scanner/backend.py
@@ -0,0 +1,41 @@
+# Copyright (C) 2020 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from flask import Flask, request
+
+from .db import Db
+from .exceptions import LargePayloadExc
+
+LIMIT = 1000
+
+
+def create_app(db: Db):
+ """Backend for swh-scanner, implementing the /known endpoint of the
+ Software Heritage Web API"""
+ app = Flask(__name__)
+
+ @app.route("/api/1/known/", methods=["POST"])
+ def known():
+ swhids = request.get_json()
+
+ if len(swhids) > LIMIT:
+ raise LargePayloadExc(
+ f"The maximum number of SWHIDs this endpoint can receive is {LIMIT}"
+ )
+
+ cur = db.conn.cursor()
+ res = {swhid: {"known": db.known(swhid, cur)} for swhid in swhids}
+ cur.close()
+
+ return res
+
+ return app
+
+
+def run(host: str, port: int, db: Db):
+ """Serve the local database
+ """
+ app = create_app(db)
+ app.run(host, port, debug=True)
diff --git a/swh/scanner/cli.py b/swh/scanner/cli.py
--- a/swh/scanner/cli.py
+++ b/swh/scanner/cli.py
@@ -18,6 +18,9 @@
from .exceptions import DBError
+# Config for the "serve" option
+BACKEND_DEFAULT_PORT = 5011
+
# All generic config code should reside in swh.core.config
CONFIG_ENVVAR = "SWH_CONFIG_FILE"
DEFAULT_CONFIG_PATH = os.path.join(click.get_app_dir("swh"), "global.yml")
@@ -191,6 +194,46 @@
sys.exit(1)
+@db.command("serve")
+@click.option(
+ "-h",
+ "--host",
+ metavar="HOST",
+ default="127.0.0.1",
+ show_default=True,
+ help="The host of the API server",
+)
+@click.option(
+ "-p",
+ "--port",
+ metavar="PORT",
+ default=f"{BACKEND_DEFAULT_PORT}",
+ show_default=True,
+ help="The port of the API server",
+)
+@click.option(
+ "-f",
+ "--db-file",
+ "db_file",
+ metavar="DB_FILE",
+ default="SWHID_DB.sqlite",
+ show_default=True,
+ type=click.Path(exists=True),
+ help="An sqlite database file (it can be generated with: 'swh scanner db import')",
+)
+@click.pass_context
+def serve(ctx, host, port, db_file):
+ """Start an API service using the sqlite database generated with the "db import"
+ option."""
+ import swh.scanner.backend as backend
+
+ from .db import Db
+
+ db = Db(db_file)
+ backend.run(host, port, db)
+ db.close()
+
+
def main():
return scanner(auto_envvar_prefix="SWH_SCANNER")
diff --git a/swh/scanner/db.py b/swh/scanner/db.py
--- a/swh/scanner/db.py
+++ b/swh/scanner/db.py
@@ -64,6 +64,5 @@
"""Check if a given SWHID is present or not inside the local database."""
cur.execute("""SELECT 1 FROM swhids WHERE swhid=?""", (swhid,))
res = cur.fetchone()
- cur.close()
return res is not None
diff --git a/swh/scanner/exceptions.py b/swh/scanner/exceptions.py
--- a/swh/scanner/exceptions.py
+++ b/swh/scanner/exceptions.py
@@ -12,6 +12,10 @@
pass
+class LargePayloadExc(Exception):
+ pass
+
+
class DBError(Exception):
pass
diff --git a/swh/scanner/tests/test_backend.py b/swh/scanner/tests/test_backend.py
new file mode 100644
--- /dev/null
+++ b/swh/scanner/tests/test_backend.py
@@ -0,0 +1,61 @@
+# Copyright (C) 2020 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from swh.scanner.backend import LIMIT, create_app
+from swh.scanner.db import Db
+
+from .data import present_swhids
+
+
+def test_backend_endpoint_all_present(tmp_path, live_server, test_swhids_sample):
+ tmp_dbfile = tmp_path / "tmp_db.sqlite"
+ db = Db(tmp_dbfile)
+ cur = db.conn.cursor()
+ db.create_from(test_swhids_sample, LIMIT, cur)
+
+ app = create_app(db)
+
+ with app.test_client() as test_client:
+ res = test_client.post("/api/1/known/", json=present_swhids)
+
+ for swhid, attr in res.json.items():
+ assert attr["known"]
+
+
+def test_backend_endpoint_one_not_present(tmp_path, live_server, test_swhids_sample):
+ tmp_dbfile = tmp_path / "tmp_db.sqlite"
+ not_present_swhid = "swh:1:cnt:fa8eacf43d8646129ae8adfa1648f9307d999999"
+ swhids = present_swhids + [not_present_swhid]
+
+ db = Db(tmp_dbfile)
+ cur = db.conn.cursor()
+ db.create_from(test_swhids_sample, LIMIT, cur)
+
+ app = create_app(db)
+
+ with app.test_client() as test_client:
+ res = test_client.post("/api/1/known/", json=swhids)
+
+ for swhid, attr in res.json.items():
+ if swhid != not_present_swhid:
+ assert attr["known"]
+ else:
+ assert not attr["known"]
+
+
+def test_backend_large_payload_exc(tmp_path, live_server, test_swhids_sample):
+ tmp_dbfile = tmp_path / "tmp_db.sqlite"
+ swhid = "swh:1:cnt:fa8eacf43d8646129ae8adfa1648f9307d999999"
+ # the backend supports up to 1000 SWHID requests
+ swhids = [swhid for n in range(1001)]
+ db = Db(tmp_dbfile)
+ cur = db.conn.cursor()
+ db.create_from(test_swhids_sample, LIMIT, cur)
+
+ app = create_app(db)
+
+ with app.test_client() as test_client:
+ res = test_client.post("/api/1/known/", json=swhids)
+ assert res.status_code != 200
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Mar 17 2025, 6:38 PM (7 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3223733
Attached To
D4552: 'db serve' option to start the API service
Event Timeline
Log In to Comment