diff --git a/MANIFEST.in b/MANIFEST.in --- a/MANIFEST.in +++ b/MANIFEST.in @@ -5,5 +5,6 @@ include conftest.py recursive-include swh/loader/package/tests/ *.tar.gz recursive-include swh py.typed +recursive-include swh/loader/tests/data/ * recursive-include swh/loader/package/tests/data/ * recursive-include swh/loader/package/*/tests/data/ * diff --git a/swh/loader/core/tests/__init__.py b/swh/loader/core/tests/__init__.py --- a/swh/loader/core/tests/__init__.py +++ b/swh/loader/core/tests/__init__.py @@ -1,4 +1,4 @@ -# Copyright (C) 2018 The Software Heritage developers +# Copyright (C) 2018-2020 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information diff --git a/swh/loader/tests/__init__.py b/swh/loader/tests/__init__.py --- a/swh/loader/tests/__init__.py +++ b/swh/loader/tests/__init__.py @@ -0,0 +1,31 @@ +# Copyright (C) 2018-2020 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import os +import subprocess + +from pathlib import PosixPath +from typing import Optional, Union + + +def prepare_repository_from_archive( + archive_path: str, + filename: Optional[str] = None, + tmp_path: Union[PosixPath, str] = "/tmp", +) -> str: + """Given an existing archive_path, uncompress it. + Returns a file repo url which can be used as origin url. + + This does not deal with the case where the archive passed along does not exist. + + """ + if not isinstance(tmp_path, str): + tmp_path = str(tmp_path) + # uncompress folder/repositories/dump for the loader to ingest + subprocess.check_output(["tar", "xf", archive_path, "-C", tmp_path]) + # build the origin url (or some derivative form) + _fname = filename if filename else os.path.basename(archive_path) + repo_url = f"file://{tmp_path}/{_fname}" + return repo_url diff --git a/swh/loader/tests/data/0805nexter-1.1.0.tar.gz b/swh/loader/tests/data/0805nexter-1.1.0.tar.gz new file mode 120000 --- /dev/null +++ b/swh/loader/tests/data/0805nexter-1.1.0.tar.gz @@ -0,0 +1 @@ +../../package/pypi/tests/data/https_files.pythonhosted.org/0805nexter-1.1.0.tar.gz \ No newline at end of file diff --git a/swh/loader/tests/test_init.py b/swh/loader/tests/test_init.py new file mode 100644 --- /dev/null +++ b/swh/loader/tests/test_init.py @@ -0,0 +1,50 @@ +# Copyright (C) 2020 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import os +import subprocess + +import pytest + +from swh.loader.tests import prepare_repository_from_archive + + +def test_prepare_repository_from_archive_failure(): + # does not deal with inexistent archive so raise + assert os.path.exists("unknown-archive") is False + with pytest.raises(subprocess.CalledProcessError, match="exit status 2"): + prepare_repository_from_archive("unknown-archive") + + +def test_prepare_repository_from_archive(datadir, tmp_path): + archive_name = "0805nexter-1.1.0" + archive_path = os.path.join(str(datadir), f"{archive_name}.tar.gz") + assert os.path.exists(archive_path) is True + + tmp_path = str(tmp_path) # deals with path string + repo_url = prepare_repository_from_archive( + archive_path, filename=archive_name, tmp_path=tmp_path + ) + expected_uncompressed_archive_path = os.path.join(tmp_path, archive_name) + assert repo_url == f"file://{expected_uncompressed_archive_path}" + assert os.path.exists(expected_uncompressed_archive_path) + + +def test_prepare_repository_from_archive_no_filename(datadir, tmp_path): + archive_name = "0805nexter-1.1.0" + archive_path = os.path.join(str(datadir), f"{archive_name}.tar.gz") + assert os.path.exists(archive_path) is True + + # deals with path as posix path (for tmp_path) + repo_url = prepare_repository_from_archive(archive_path, tmp_path=tmp_path) + + tmp_path = str(tmp_path) + expected_uncompressed_archive_path = os.path.join(tmp_path, archive_name) + expected_repo_url = os.path.join(tmp_path, f"{archive_name}.tar.gz") + assert repo_url == f"file://{expected_repo_url}" + + # passing along the filename does not influence the on-disk extraction + # just the repo-url computation + assert os.path.exists(expected_uncompressed_archive_path)