diff --git a/conftest.py b/conftest.py new file mode 100644 --- /dev/null +++ b/conftest.py @@ -0,0 +1 @@ +pytest_plugins = ["swh.storage.pytest_plugin"] diff --git a/mypy.ini b/mypy.ini --- a/mypy.ini +++ b/mypy.ini @@ -11,5 +11,8 @@ [mypy-pytest.*] ignore_missing_imports = True +[mypy-psycopg2.*] +ignore_missing_imports = True + # [mypy-add_your_lib_here.*] # ignore_missing_imports = True diff --git a/requirements-swh.txt b/requirements-swh.txt --- a/requirements-swh.txt +++ b/requirements-swh.txt @@ -1,2 +1,4 @@ # Add here internal Software Heritage dependencies, one per line. swh.core[http] >= 0.3 +swh.model >= 0.9.0 +swh.storage >= 0.11.1 diff --git a/requirements.txt b/requirements.txt --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,11 @@ -# Add here external Python modules dependencies, one per line. Module names -# should match https://pypi.python.org/pypi names. For the full spec or -# dependency lines, see https://pip.readthedocs.org/en/1.1/requirements.html - +attrs==20.3.0 +attrs-strict==0.2.0 +Deprecated==1.2.10 +hypothesis==5.49.0 +iso8601==0.1.13 +psycopg2==2.8.6 +python-dateutil==2.8.1 +six==1.15.0 +sortedcontainers==2.3.0 +typing-extensions==3.7.4.3 +wrapt==1.12.1 diff --git a/swh/clearlydefined/mapping_utils.py b/swh/clearlydefined/mapping_utils.py new file mode 100644 --- /dev/null +++ b/swh/clearlydefined/mapping_utils.py @@ -0,0 +1,27 @@ +# Copyright (C) 2017-2021 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU Affero General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from swh.model.hashutil import hash_to_bytes +from swh.model.hashutil import hash_to_hex +import psycopg2 + + +def map_sha1_with_swhid(sha1: str, dsn: str) -> str: + """ + Take sha1 and dsn as input and give the corresponding + swhID for that sha1 + """ + if not sha1: + return None + read_connection = psycopg2.connect(dsn=dsn) + cur = read_connection.cursor() + sha1 = hash_to_bytes(sha1) + cur.execute("SELECT sha1_git FROM content where sha1= %s;", (sha1,)) + sha1_git_tuple_data = cur.fetchall() + if len(sha1_git_tuple_data) == 0: + return None + sha1_git = hash_to_hex(sha1_git_tuple_data[0][0]) + swh_id = "swh:1:cnt:{sha1_git}".format(sha1_git=sha1_git) + return swh_id diff --git a/swh/clearlydefined/tests/test_mapping.py b/swh/clearlydefined/tests/test_mapping.py new file mode 100644 --- /dev/null +++ b/swh/clearlydefined/tests/test_mapping.py @@ -0,0 +1,65 @@ +# Copyright (C) 2017-2021 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU Affero General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from swh.clearlydefined.mapping_utils import map_sha1_with_swhid +import psycopg2 + + +def add_data(dsn: str): + read_connection = psycopg2.connect(dsn=dsn) + cur = read_connection.cursor() + data = [ + { + "sha1" : "\\xa6628c8a4fbc08c29b8472e2222975e5b9918131", + "sha1_git" : "\\xe103b11cbfecbc116dacbb1f9ab2a02176092a32", + "sha256" : "\\x6ac599151a7aaa8ca5d38dc5bb61b49193a3cadc1ed33de5a57e4d1ecc53c846", + "blake2s256" : ( + "\\xc509b320abede3580bb1de75a0efa09ba7416db9c8de845a4e1b4317c0b8a8d9" + ), + "length" : 717 + }, + { + "sha1" : "\\xd1ece3dbe3e78a6648f37206f996e202acb3926b", + "sha1_git" : "\\x095b80e14c3ea6254f57e94761f2313e32b1d58a", + "sha256" : "\\x2a24791d738e4380d55e1c990dd0bd2bcdc98240d9a25c488804abfd814b8c96", + "blake2s256" : ( + "\\xa18463fe94e1b4ee191815c2fa48948c44ffbdffcda49fad6591ac4c93f19aef" + ), + "length" : 3138 + }, + ] + for row in data: + cur.execute( + "INSERT INTO content (sha1, sha1_git, sha256, blake2s256, length) VALUES \ + (%s,%s,%s,%s,%s);", + (row["sha1"], row["sha1_git"], row["sha256"], row["blake2s256"], row["length"]), + ) + read_connection.commit() + + +def test_mapping(swh_storage_backend_config): + dsn = swh_storage_backend_config["db"] + add_data(dsn=dsn) + sha1 = "a6628c8a4fbc08c29b8472e2222975e5b9918131" + assert "swh:1:cnt:e103b11cbfecbc116dacbb1f9ab2a02176092a32" == map_sha1_with_swhid( + sha1=sha1, dsn=dsn + ) + + +def test_mapping_with_empty_sha1(swh_storage_backend_config): + dsn = swh_storage_backend_config["db"] + sha1 = "" + assert None == map_sha1_with_swhid( + sha1=sha1, dsn=dsn + ) + + +def test_mapping_with_wrong_sha1(swh_storage_backend_config): + dsn = swh_storage_backend_config["db"] + add_data(dsn=dsn) + sha1 = "6ac599151a7aaa8ca5d38dc5bb61b49193a3cadc1ed33de5a57e4d1ecc53c846" + assert None == map_sha1_with_swhid( + sha1=sha1, dsn=dsn + ) diff --git a/swh/clearlydefined/tests/test_nothing.py b/swh/clearlydefined/tests/test_nothing.py deleted file mode 100644 --- a/swh/clearlydefined/tests/test_nothing.py +++ /dev/null @@ -1,3 +0,0 @@ -def test_nothing(): - # Placeholder; remove this when we add actual tests - pass