diff --git a/swh/scanner/cli.py b/swh/scanner/cli.py --- a/swh/scanner/cli.py +++ b/swh/scanner/cli.py @@ -44,6 +44,16 @@ {CONFIG_FILE_HELP}""" +def setup_config(ctx, api_url): + config = ctx.obj["config"] + if api_url: + if not api_url.endswith("/"): + api_url += "/" + config["web-api"]["url"] = api_url + + return config + + @swh_cli_group.group( name="scanner", context_settings=CONTEXT_SETTINGS, help=SCANNER_HELP, ) @@ -121,15 +131,55 @@ present in the archive""" import swh.scanner.scanner as scanner - config = ctx.obj["config"] - if api_url: - if not api_url.endswith("/"): - api_url += "/" - config["web-api"]["url"] = api_url - + config = setup_config(ctx, api_url) scanner.scan(config, root_path, patterns, out_fmt, interactive) +@scanner.group("db") +@click.pass_context +def db(ctx): + pass + + +@db.command("import") +@click.option( + "-u", + "--api-url", + default=None, + metavar="API_URL", + show_default=True, + help="URL for the api request", +) +@click.option( + "-i", + "--input", + "input_file", + metavar="INPUT_FILE", + type=click.Path(exists=True), + help="A file containing SWHIDs", +) +@click.option( + "-o", + "--output", + "output_file_db", + metavar="OUTPUT_DB_FILE", + default="SWHID_DB.sqlite", + show_default=True, + help="The name of the generated sqlite database", +) +@click.pass_context +def _import(ctx, api_url, input_file, output_file_db): + """Parse an input list of SWHID to generate a local sqlite database + """ + from .db import Db + + config = setup_config(ctx, api_url) + db = Db(output_file_db) + cur = db.conn.cursor() + db.create_from(config, input_file, cur) + db.close() + + def main(): return scanner(auto_envvar_prefix="SWH_SCANNER") diff --git a/swh/scanner/db.py b/swh/scanner/db.py new file mode 100644 --- /dev/null +++ b/swh/scanner/db.py @@ -0,0 +1,93 @@ +# Copyright (C) 2020 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +""" +This module is an interface to interact with the local database +where the SWHIDs will be saved for the local API. + +SWHIDs can be added directly from an input file. +""" + +import asyncio +import os +from pathlib import Path +import sqlite3 +from typing import Any, Dict + +import aiohttp + +from .exceptions import APIError +from .scanner import swhids_discovery + + +class Db: + """Local database interface""" + + def __init__(self, db_file: Path): + self.db_file: Path = db_file + self.conn: sqlite3.Connection = sqlite3.connect( + db_file, check_same_thread=False + ) + + def close(self): + """Close the connection to the database.""" + self.conn.close() + + def create_table(self, cur: sqlite3.Cursor): + """Create the table where the SWHIDs will be stored.""" + cur.execute("""CREATE TABLE IF NOT EXISTS swhid_db (swhid text)""") + + def add(self, swhid: str, cur: sqlite3.Cursor): + """Insert the SWHID inside the database.""" + cur.execute( + """INSERT INTO swhid_db SELECT (?) + WHERE NOT EXISTS (SELECT 1 FROM swhid_db WHERE swhid=?)""", + (swhid, swhid), + ) + + async def parse_file( + self, input_file: Path, config: Dict[str, Any], cur: sqlite3.Cursor + ): + """Add all the SWHIDs present inside the input file inside the database.""" + with open(input_file, "r") as f: + swhids = f.read().splitlines() + + api_url = config["web-api"]["url"] + if config["web-api"]["auth-token"]: + headers = {"Authorization": f"Bearer {config['web-api']['auth-token']}"} + else: + headers = {} + + async with aiohttp.ClientSession(headers=headers) as session: + parsed_swhids = await swhids_discovery(swhids, session, api_url) + for swhid, attr in parsed_swhids.items(): + if attr["known"]: + self.add(swhid, cur) + + def create_from( + self, config: Dict[str, Any], input_file: Path, cur: sqlite3.Cursor + ): + """Create a new database with the SWHIDs present inside the input file.""" + self.create_table(cur) + + try: + loop = asyncio.get_event_loop() + loop.run_until_complete(self.parse_file(input_file, config, cur)) + cur.close() + self.conn.commit() + except APIError: + print("Error during the api call") + os.remove(self.db_file) + except Exception: + print("Failed to create database") + os.remove(self.db_file) + + def check(self, swhid: str, cur: sqlite3.Cursor): + """Check if a given SWHID is present or not inside the local database.""" + cur.execute("""SELECT 1 FROM swhid_db WHERE swhid=?""", (swhid,)) + res = cur.fetchone() + cur.close() + + return True if res else False diff --git a/swh/scanner/tests/conftest.py b/swh/scanner/tests/conftest.py --- a/swh/scanner/tests/conftest.py +++ b/swh/scanner/tests/conftest.py @@ -15,6 +15,7 @@ from swh.model.cli import swhid_of_dir, swhid_of_file from swh.scanner.model import Tree +from .data import present_swhids from .flask_api import create_app @@ -136,6 +137,22 @@ return test_sample_folder +@pytest.fixture +def test_swhids_sample(tmp_path): + """Create and return the location of the "swhids_sample" file, + filled with present swhids present in data.py + """ + test_swhids_sample = Path(os.path.join(tmp_path, "swhids_sample.txt")) + not_present_swhid = "swh:1:cnt:fa8eacf43d8646129ae8adfa1648f9307d999999" + swhids = present_swhids + [not_present_swhid] + + with open(test_swhids_sample, "w") as f: + f.write("\n".join(swhid for swhid in swhids)) + + assert test_swhids_sample.exists() + return test_swhids_sample + + @pytest.fixture(scope="session") def app(): """Flask backend API (used by live_server).""" diff --git a/swh/scanner/tests/test_db.py b/swh/scanner/tests/test_db.py new file mode 100644 --- /dev/null +++ b/swh/scanner/tests/test_db.py @@ -0,0 +1,65 @@ +# Copyright (C) 2020 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from swh.scanner.db import Db + +from .data import present_swhids + + +def test_db_create_from(tmp_path, live_server, test_sample_folder, test_swhids_sample): + tmp_dbfile = tmp_path / "tmp_db.sqlite" + api_url = live_server.url() + "/" + config = {"web-api": {"url": api_url, "auth-token": None}} + + db = Db(tmp_dbfile) + cur = db.conn.cursor() + db.create_from(config, test_swhids_sample, cur) + + for swhid in present_swhids: + cur = db.conn.cursor() + assert db.check(swhid, cur) + + +def test_db_create_from_one_not_present( + tmp_path, live_server, test_sample_folder, test_swhids_sample +): + not_present_swhid = "swh:1:cnt:fa8eacf43d8646129ae8adfa1648f9307d999999" + present_swhids.append(not_present_swhid) + + tmp_dbfile = tmp_path / "tmp_db.sqlite" + api_url = live_server.url() + "/" + config = {"web-api": {"url": api_url, "auth-token": None}} + + db = Db(tmp_dbfile) + cur = db.conn.cursor() + db.create_from(config, test_swhids_sample, cur) + + for swhid in present_swhids: + cur = db.conn.cursor() + if swhid != not_present_swhid: + assert db.check(swhid, cur) + else: + assert not db.check(swhid, cur) + + +def test_db_add(tmp_path): + swhid = "swh:1:cnt:fa8eacf43d8646129ae8adfa1648f9307d999999" + tmp_dbfile = tmp_path / "tmp_db.sqlite" + db = Db(tmp_dbfile) + cur = db.conn.cursor() + + db.create_table(cur) + db.add(swhid, cur) + assert db.check(swhid, cur) + + +def test_db_add_non_present_swhid(tmp_path): + swhid = "swh:1:cnt:fa8eacf43d8646129ae8adfa1648f9307d99999" + tmp_dbfile = tmp_path / "tmp_db.sqlite" + db = Db(tmp_dbfile) + + cur = db.conn.cursor() + db.create_table(cur) + assert not db.check(swhid, cur)