diff --git a/swh/loader/package/tests/conftest.py b/swh/loader/package/tests/conftest.py index 1c14a7f..397a32a 100644 --- a/swh/loader/package/tests/conftest.py +++ b/swh/loader/package/tests/conftest.py @@ -1,148 +1,149 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import logging import os import re import pytest from functools import partial from os import path from urllib.parse import urlparse -from .common import DATADIR - import swh.storage from swh.storage import get_storage as initial_get_storage logger = logging.getLogger(__name__) def get_storage(cls, args): if cls == 'filter': from swh.loader.package.storage import FilteringProxyStorage return FilteringProxyStorage(**args) if cls == 'buffer': from swh.loader.package.storage import BufferingProxyStorage return BufferingProxyStorage(**args) return initial_get_storage(cls, args) swh.storage.get_storage = get_storage # Check get_local_factory function # Maximum number of iteration checks to generate requests responses MAX_VISIT_FILES = 10 @pytest.fixture -def swh_config(monkeypatch): - conffile = os.path.join(DATADIR, 'loader.yml') +def swh_config(monkeypatch, datadir): + conffile = os.path.join(datadir, 'loader.yml') monkeypatch.setenv('SWH_CONFIG_FILENAME', conffile) return conffile def get_response_cb(request, context, datadir, ignore_urls=[], visits=None): """Mount point callback to fetch on disk the content of a request This is meant to be used as 'body' argument of the requests_mock.get() method. It will look for files on the local filesystem based on the requested URL, using the following rules: - - files are searched in the DATADIR/ directory + - files are searched in the datadir/ directory - the local file name is the path part of the URL with path hierarchy markers (aka '/') replaced by '_' Eg. if you use the requests_mock fixture in your test file as: requests_mock.get('https://nowhere.com', body=get_response_cb) # or even requests_mock.get(re.compile('https://'), body=get_response_cb) then a call requests.get like: requests.get('https://nowhere.com/path/to/resource') will look the content of the response in: - DATADIR/resources/nowhere.com/path_to_resource + datadir/nowhere.com/path_to_resource Args: request (requests.Request): Object requests context (requests.Context): Object holding response metadata information (status_code, headers, etc...) ignore_urls (List): urls whose status response should be 404 even if the local file exists visits (Optional[Dict]): Map of url, number of visits. If None, disable multi visit support (default) Returns: Optional[FileDescriptor] on the on disk file to read from the test context """ logger.debug('get_response_cb(%s, %s)', request, context) logger.debug('url: %s', request.url) logger.debug('ignore_urls: %s', ignore_urls) if request.url in ignore_urls: context.status_code = 404 return None url = urlparse(request.url) dirname = url.hostname # pypi.org | files.pythonhosted.org # url.path: pypi//json -> local file: pypi__json filename = url.path[1:] if filename.endswith('/'): filename = filename[:-1] filename = filename.replace('/', '_') filepath = path.join(datadir, dirname, filename) if visits is not None: visit = visits.get(url, 0) visits[url] = visit + 1 if visit: filepath = filepath + '_visit%s' % visit if not path.isfile(filepath): logger.debug('not found filepath: %s', filepath) context.status_code = 404 return None fd = open(filepath, 'rb') context.headers['content-length'] = str(path.getsize(filepath)) return fd @pytest.fixture -def datadir(): - return DATADIR +def datadir(request): + """By default, returns the test directory + + """ + return path.join(path.dirname(request.fspath), 'data') def local_get_factory(ignore_urls=[], has_multi_visit=False): @pytest.fixture def local_get(requests_mock, datadir): if not has_multi_visit: cb = partial(get_response_cb, ignore_urls=ignore_urls, datadir=datadir) requests_mock.get(re.compile('https://'), body=cb) else: visits = {} requests_mock.get(re.compile('https://'), body=partial( get_response_cb, ignore_urls=ignore_urls, visits=visits, datadir=datadir) ) return requests_mock return local_get local_get = local_get_factory([]) local_get_visits = local_get_factory(has_multi_visit=True) diff --git a/swh/loader/package/tests/resources/deposit.softwareheritage.org/1_private_666_meta b/swh/loader/package/tests/data/deposit.softwareheritage.org/1_private_666_meta similarity index 100% rename from swh/loader/package/tests/resources/deposit.softwareheritage.org/1_private_666_meta rename to swh/loader/package/tests/data/deposit.softwareheritage.org/1_private_666_meta diff --git a/swh/loader/package/tests/resources/deposit.softwareheritage.org/1_private_666_raw b/swh/loader/package/tests/data/deposit.softwareheritage.org/1_private_666_raw similarity index 100% rename from swh/loader/package/tests/resources/deposit.softwareheritage.org/1_private_666_raw rename to swh/loader/package/tests/data/deposit.softwareheritage.org/1_private_666_raw diff --git a/swh/loader/package/tests/resources/deposit.softwareheritage.org/hello_2.10.json b/swh/loader/package/tests/data/deposit.softwareheritage.org/hello_2.10.json similarity index 100% rename from swh/loader/package/tests/resources/deposit.softwareheritage.org/hello_2.10.json rename to swh/loader/package/tests/data/deposit.softwareheritage.org/hello_2.10.json diff --git a/swh/loader/package/tests/resources/deposit.softwareheritage.org/hello_2.10.orig.tar.gz b/swh/loader/package/tests/data/deposit.softwareheritage.org/hello_2.10.orig.tar.gz similarity index 100% rename from swh/loader/package/tests/resources/deposit.softwareheritage.org/hello_2.10.orig.tar.gz rename to swh/loader/package/tests/data/deposit.softwareheritage.org/hello_2.10.orig.tar.gz diff --git a/swh/loader/package/tests/resources/example.com/file.json b/swh/loader/package/tests/data/example.com/file.json similarity index 100% rename from swh/loader/package/tests/resources/example.com/file.json rename to swh/loader/package/tests/data/example.com/file.json diff --git a/swh/loader/package/tests/resources/example.com/file.json_visit1 b/swh/loader/package/tests/data/example.com/file.json_visit1 similarity index 100% rename from swh/loader/package/tests/resources/example.com/file.json_visit1 rename to swh/loader/package/tests/data/example.com/file.json_visit1 diff --git a/swh/loader/package/tests/resources/example.com/other.json b/swh/loader/package/tests/data/example.com/other.json similarity index 100% rename from swh/loader/package/tests/resources/example.com/other.json rename to swh/loader/package/tests/data/example.com/other.json diff --git a/swh/loader/package/tests/resources/files.pythonhosted.org/0805nexter-1.1.0.tar.gz b/swh/loader/package/tests/data/files.pythonhosted.org/0805nexter-1.1.0.tar.gz similarity index 100% rename from swh/loader/package/tests/resources/files.pythonhosted.org/0805nexter-1.1.0.tar.gz rename to swh/loader/package/tests/data/files.pythonhosted.org/0805nexter-1.1.0.tar.gz diff --git a/swh/loader/package/tests/resources/files.pythonhosted.org/0805nexter-1.1.0.zip b/swh/loader/package/tests/data/files.pythonhosted.org/0805nexter-1.1.0.zip similarity index 100% rename from swh/loader/package/tests/resources/files.pythonhosted.org/0805nexter-1.1.0.zip rename to swh/loader/package/tests/data/files.pythonhosted.org/0805nexter-1.1.0.zip diff --git a/swh/loader/package/tests/resources/files.pythonhosted.org/0805nexter-1.2.0.zip b/swh/loader/package/tests/data/files.pythonhosted.org/0805nexter-1.2.0.zip similarity index 100% rename from swh/loader/package/tests/resources/files.pythonhosted.org/0805nexter-1.2.0.zip rename to swh/loader/package/tests/data/files.pythonhosted.org/0805nexter-1.2.0.zip diff --git a/swh/loader/package/tests/resources/files.pythonhosted.org/0805nexter-1.3.0.zip b/swh/loader/package/tests/data/files.pythonhosted.org/0805nexter-1.3.0.zip similarity index 100% rename from swh/loader/package/tests/resources/files.pythonhosted.org/0805nexter-1.3.0.zip rename to swh/loader/package/tests/data/files.pythonhosted.org/0805nexter-1.3.0.zip diff --git a/swh/loader/package/tests/resources/files.pythonhosted.org/0805nexter-1.4.0.zip b/swh/loader/package/tests/data/files.pythonhosted.org/0805nexter-1.4.0.zip similarity index 100% rename from swh/loader/package/tests/resources/files.pythonhosted.org/0805nexter-1.4.0.zip rename to swh/loader/package/tests/data/files.pythonhosted.org/0805nexter-1.4.0.zip diff --git a/swh/loader/package/tests/resources/files.pythonhosted.org/nexter-1.1.0.tar.gz b/swh/loader/package/tests/data/files.pythonhosted.org/nexter-1.1.0.tar.gz similarity index 100% rename from swh/loader/package/tests/resources/files.pythonhosted.org/nexter-1.1.0.tar.gz rename to swh/loader/package/tests/data/files.pythonhosted.org/nexter-1.1.0.tar.gz diff --git a/swh/loader/package/tests/resources/files.pythonhosted.org/nexter-1.1.0.zip b/swh/loader/package/tests/data/files.pythonhosted.org/nexter-1.1.0.zip similarity index 100% rename from swh/loader/package/tests/resources/files.pythonhosted.org/nexter-1.1.0.zip rename to swh/loader/package/tests/data/files.pythonhosted.org/nexter-1.1.0.zip diff --git a/swh/loader/package/tests/resources/files.pythonhosted.org/packages_70_97_c49fb8ec24a7aaab54c3dbfbb5a6ca1431419d9ee0f6c363d9ad01d2b8b1_0805nexter-1.3.0.zip b/swh/loader/package/tests/data/files.pythonhosted.org/packages_70_97_c49fb8ec24a7aaab54c3dbfbb5a6ca1431419d9ee0f6c363d9ad01d2b8b1_0805nexter-1.3.0.zip similarity index 100% rename from swh/loader/package/tests/resources/files.pythonhosted.org/packages_70_97_c49fb8ec24a7aaab54c3dbfbb5a6ca1431419d9ee0f6c363d9ad01d2b8b1_0805nexter-1.3.0.zip rename to swh/loader/package/tests/data/files.pythonhosted.org/packages_70_97_c49fb8ec24a7aaab54c3dbfbb5a6ca1431419d9ee0f6c363d9ad01d2b8b1_0805nexter-1.3.0.zip diff --git a/swh/loader/package/tests/resources/files.pythonhosted.org/packages_c4_a0_4562cda161dc4ecbbe9e2a11eb365400c0461845c5be70d73869786809c4_0805nexter-1.2.0.zip b/swh/loader/package/tests/data/files.pythonhosted.org/packages_c4_a0_4562cda161dc4ecbbe9e2a11eb365400c0461845c5be70d73869786809c4_0805nexter-1.2.0.zip similarity index 100% rename from swh/loader/package/tests/resources/files.pythonhosted.org/packages_c4_a0_4562cda161dc4ecbbe9e2a11eb365400c0461845c5be70d73869786809c4_0805nexter-1.2.0.zip rename to swh/loader/package/tests/data/files.pythonhosted.org/packages_c4_a0_4562cda161dc4ecbbe9e2a11eb365400c0461845c5be70d73869786809c4_0805nexter-1.2.0.zip diff --git a/swh/loader/package/tests/resources/files.pythonhosted.org/packages_c4_a0_4562cda161dc4ecbbe9e2a11eb365400c0461845c5be70d73869786809c4_0805nexter-1.2.0.zip_visit1 b/swh/loader/package/tests/data/files.pythonhosted.org/packages_c4_a0_4562cda161dc4ecbbe9e2a11eb365400c0461845c5be70d73869786809c4_0805nexter-1.2.0.zip_visit1 similarity index 100% rename from swh/loader/package/tests/resources/files.pythonhosted.org/packages_c4_a0_4562cda161dc4ecbbe9e2a11eb365400c0461845c5be70d73869786809c4_0805nexter-1.2.0.zip_visit1 rename to swh/loader/package/tests/data/files.pythonhosted.org/packages_c4_a0_4562cda161dc4ecbbe9e2a11eb365400c0461845c5be70d73869786809c4_0805nexter-1.2.0.zip_visit1 diff --git a/swh/loader/package/tests/resources/files.pythonhosted.org/packages_ec_65_c0116953c9a3f47de89e71964d6c7b0c783b01f29fa3390584dbf3046b4d_0805nexter-1.1.0.zip b/swh/loader/package/tests/data/files.pythonhosted.org/packages_ec_65_c0116953c9a3f47de89e71964d6c7b0c783b01f29fa3390584dbf3046b4d_0805nexter-1.1.0.zip similarity index 100% rename from swh/loader/package/tests/resources/files.pythonhosted.org/packages_ec_65_c0116953c9a3f47de89e71964d6c7b0c783b01f29fa3390584dbf3046b4d_0805nexter-1.1.0.zip rename to swh/loader/package/tests/data/files.pythonhosted.org/packages_ec_65_c0116953c9a3f47de89e71964d6c7b0c783b01f29fa3390584dbf3046b4d_0805nexter-1.1.0.zip diff --git a/swh/loader/package/tests/resources/files.pythonhosted.org/packages_ec_65_c0116953c9a3f47de89e71964d6c7b0c783b01f29fa3390584dbf3046b4d_0805nexter-1.1.0.zip_visit1 b/swh/loader/package/tests/data/files.pythonhosted.org/packages_ec_65_c0116953c9a3f47de89e71964d6c7b0c783b01f29fa3390584dbf3046b4d_0805nexter-1.1.0.zip_visit1 similarity index 100% rename from swh/loader/package/tests/resources/files.pythonhosted.org/packages_ec_65_c0116953c9a3f47de89e71964d6c7b0c783b01f29fa3390584dbf3046b4d_0805nexter-1.1.0.zip_visit1 rename to swh/loader/package/tests/data/files.pythonhosted.org/packages_ec_65_c0116953c9a3f47de89e71964d6c7b0c783b01f29fa3390584dbf3046b4d_0805nexter-1.1.0.zip_visit1 diff --git a/swh/loader/package/tests/resources/ftp.gnu.org/gnu_8sync_8sync-0.1.0.tar.gz b/swh/loader/package/tests/data/ftp.gnu.org/gnu_8sync_8sync-0.1.0.tar.gz similarity index 100% rename from swh/loader/package/tests/resources/ftp.gnu.org/gnu_8sync_8sync-0.1.0.tar.gz rename to swh/loader/package/tests/data/ftp.gnu.org/gnu_8sync_8sync-0.1.0.tar.gz diff --git a/swh/loader/package/tests/resources/ftp.gnu.org/gnu_8sync_8sync-0.2.0.tar.gz b/swh/loader/package/tests/data/ftp.gnu.org/gnu_8sync_8sync-0.2.0.tar.gz similarity index 100% rename from swh/loader/package/tests/resources/ftp.gnu.org/gnu_8sync_8sync-0.2.0.tar.gz rename to swh/loader/package/tests/data/ftp.gnu.org/gnu_8sync_8sync-0.2.0.tar.gz diff --git a/swh/loader/package/tests/resources/json/0805nexter-unpublished-release.json b/swh/loader/package/tests/data/json/0805nexter-unpublished-release.json similarity index 100% rename from swh/loader/package/tests/resources/json/0805nexter-unpublished-release.json rename to swh/loader/package/tests/data/json/0805nexter-unpublished-release.json diff --git a/swh/loader/package/tests/resources/loader.yml b/swh/loader/package/tests/data/loader.yml similarity index 100% rename from swh/loader/package/tests/resources/loader.yml rename to swh/loader/package/tests/data/loader.yml diff --git a/swh/loader/package/tests/resources/pypi.org/pypi_0805nexter_json b/swh/loader/package/tests/data/pypi.org/pypi_0805nexter_json similarity index 100% rename from swh/loader/package/tests/resources/pypi.org/pypi_0805nexter_json rename to swh/loader/package/tests/data/pypi.org/pypi_0805nexter_json diff --git a/swh/loader/package/tests/resources/pypi.org/pypi_0805nexter_json_visit1 b/swh/loader/package/tests/data/pypi.org/pypi_0805nexter_json_visit1 similarity index 100% rename from swh/loader/package/tests/resources/pypi.org/pypi_0805nexter_json_visit1 rename to swh/loader/package/tests/data/pypi.org/pypi_0805nexter_json_visit1 diff --git a/swh/loader/package/tests/resources/pypi.org/pypi_nexter_json b/swh/loader/package/tests/data/pypi.org/pypi_nexter_json similarity index 100% rename from swh/loader/package/tests/resources/pypi.org/pypi_nexter_json rename to swh/loader/package/tests/data/pypi.org/pypi_nexter_json diff --git a/swh/loader/package/tests/resources/registry.npmjs.org/org_-_org-0.0.2.tgz b/swh/loader/package/tests/data/registry.npmjs.org/org_-_org-0.0.2.tgz similarity index 100% rename from swh/loader/package/tests/resources/registry.npmjs.org/org_-_org-0.0.2.tgz rename to swh/loader/package/tests/data/registry.npmjs.org/org_-_org-0.0.2.tgz diff --git a/swh/loader/package/tests/resources/registry.npmjs.org/org_-_org-0.0.3.tgz b/swh/loader/package/tests/data/registry.npmjs.org/org_-_org-0.0.3.tgz similarity index 100% rename from swh/loader/package/tests/resources/registry.npmjs.org/org_-_org-0.0.3.tgz rename to swh/loader/package/tests/data/registry.npmjs.org/org_-_org-0.0.3.tgz diff --git a/swh/loader/package/tests/resources/registry.npmjs.org/org_-_org-0.0.4.tgz b/swh/loader/package/tests/data/registry.npmjs.org/org_-_org-0.0.4.tgz similarity index 100% rename from swh/loader/package/tests/resources/registry.npmjs.org/org_-_org-0.0.4.tgz rename to swh/loader/package/tests/data/registry.npmjs.org/org_-_org-0.0.4.tgz diff --git a/swh/loader/package/tests/resources/registry.npmjs.org/org_-_org-0.0.5.tgz b/swh/loader/package/tests/data/registry.npmjs.org/org_-_org-0.0.5.tgz similarity index 100% rename from swh/loader/package/tests/resources/registry.npmjs.org/org_-_org-0.0.5.tgz rename to swh/loader/package/tests/data/registry.npmjs.org/org_-_org-0.0.5.tgz diff --git a/swh/loader/package/tests/resources/registry.npmjs.org/org_-_org-0.1.0.tgz b/swh/loader/package/tests/data/registry.npmjs.org/org_-_org-0.1.0.tgz similarity index 100% rename from swh/loader/package/tests/resources/registry.npmjs.org/org_-_org-0.1.0.tgz rename to swh/loader/package/tests/data/registry.npmjs.org/org_-_org-0.1.0.tgz diff --git a/swh/loader/package/tests/resources/registry.npmjs.org/org_-_org-0.2.0.tgz b/swh/loader/package/tests/data/registry.npmjs.org/org_-_org-0.2.0.tgz similarity index 100% rename from swh/loader/package/tests/resources/registry.npmjs.org/org_-_org-0.2.0.tgz rename to swh/loader/package/tests/data/registry.npmjs.org/org_-_org-0.2.0.tgz diff --git a/swh/loader/package/tests/resources/replicate.npmjs.com/org b/swh/loader/package/tests/data/replicate.npmjs.com/org similarity index 100% rename from swh/loader/package/tests/resources/replicate.npmjs.com/org rename to swh/loader/package/tests/data/replicate.npmjs.com/org diff --git a/swh/loader/package/tests/resources/replicate.npmjs.com/org_visit1 b/swh/loader/package/tests/data/replicate.npmjs.com/org_visit1 similarity index 100% rename from swh/loader/package/tests/resources/replicate.npmjs.com/org_visit1 rename to swh/loader/package/tests/data/replicate.npmjs.com/org_visit1 diff --git a/swh/loader/package/tests/fixture/__init__.py b/swh/loader/package/tests/fixture/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/swh/loader/package/tests/fixture/test_conftest.py b/swh/loader/package/tests/fixture/test_conftest.py index 17361e1..3cdcbba 100644 --- a/swh/loader/package/tests/fixture/test_conftest.py +++ b/swh/loader/package/tests/fixture/test_conftest.py @@ -1,19 +1,24 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import requests +from .conftest import DATADIR # In this arborescence, we override in the local conftest.py module the # "datadir" fixture to specify where to retrieve the data files from. def test_local_get_with_datadir_fixture_override(local_get): """Override datadir fixture should retrieve data from elsewhere """ response = requests.get('https://example.com/file.json') assert response.ok assert response.json() == {'welcome': 'you'} + + +def test_data_dir_override(datadir): + assert datadir == DATADIR diff --git a/swh/loader/package/tests/test_conftest.py b/swh/loader/package/tests/test_conftest.py index dce8a41..8683ee3 100644 --- a/swh/loader/package/tests/test_conftest.py +++ b/swh/loader/package/tests/test_conftest.py @@ -1,83 +1,89 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import requests +from os import path from swh.loader.package.tests.conftest import local_get_factory def test_get_response_cb_with_visits_nominal(local_get_visits): response = requests.get('https://example.com/file.json') assert response.ok assert response.json() == {'hello': 'you'} response = requests.get('https://example.com/file.json') assert response.ok assert response.json() == {'hello': 'world'} response = requests.get('https://example.com/file.json') assert not response.ok assert response.status_code == 404 def test_get_response_cb_with_visits(local_get_visits): response = requests.get('https://example.com/file.json') assert response.ok assert response.json() == {'hello': 'you'} response = requests.get('https://example.com/other.json') assert response.ok assert response.json() == "foobar" response = requests.get('https://example.com/file.json') assert response.ok assert response.json() == {'hello': 'world'} response = requests.get('https://example.com/other.json') assert not response.ok assert response.status_code == 404 response = requests.get('https://example.com/file.json') assert not response.ok assert response.status_code == 404 def test_get_response_cb_no_visit(local_get): response = requests.get('https://example.com/file.json') assert response.ok assert response.json() == {'hello': 'you'} response = requests.get('https://example.com/file.json') assert response.ok assert response.json() == {'hello': 'you'} local_get_ignore = local_get_factory( ignore_urls=['https://example.com/file.json'], has_multi_visit=False, ) def test_get_response_cb_ignore_url(local_get_ignore): response = requests.get('https://example.com/file.json') assert not response.ok assert response.status_code == 404 local_get_ignore_and_visit = local_get_factory( ignore_urls=['https://example.com/file.json'], has_multi_visit=True, ) def test_get_response_cb_ignore_url_with_visit(local_get_ignore_and_visit): response = requests.get('https://example.com/file.json') assert not response.ok assert response.status_code == 404 response = requests.get('https://example.com/file.json') assert not response.ok assert response.status_code == 404 + + +def test_data_dir(datadir): + expected_datadir = path.join(path.abspath(path.dirname(__file__)), 'data') + assert datadir == expected_datadir diff --git a/swh/loader/package/tests/test_npm.py b/swh/loader/package/tests/test_npm.py index 4107ff8..301d67c 100644 --- a/swh/loader/package/tests/test_npm.py +++ b/swh/loader/package/tests/test_npm.py @@ -1,525 +1,525 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json import os from swh.model.hashutil import hash_to_bytes from swh.loader.package.npm import ( parse_npm_package_author, extract_npm_package_author ) from swh.loader.package.tests.common import ( - DATADIR, check_snapshot, check_metadata_paths + check_snapshot, check_metadata_paths ) from swh.loader.package.npm import NpmLoader def _parse_author_string_test(author_str, expected_result): assert parse_npm_package_author(author_str) == expected_result assert parse_npm_package_author(' %s' % author_str) == expected_result assert parse_npm_package_author('%s ' % author_str) == expected_result def test_parse_npm_package_author(): _parse_author_string_test( 'John Doe', { 'name': 'John Doe' } ) _parse_author_string_test( '', { 'email': 'john.doe@foo.bar' } ) _parse_author_string_test( '(https://john.doe)', { 'url': 'https://john.doe' } ) _parse_author_string_test( 'John Doe ', { 'name': 'John Doe', 'email': 'john.doe@foo.bar' } ) _parse_author_string_test( 'John Doe', { 'name': 'John Doe', 'email': 'john.doe@foo.bar' } ) _parse_author_string_test( 'John Doe (https://john.doe)', { 'name': 'John Doe', 'url': 'https://john.doe' } ) _parse_author_string_test( 'John Doe(https://john.doe)', { 'name': 'John Doe', 'url': 'https://john.doe' } ) _parse_author_string_test( ' (https://john.doe)', { 'email': 'john.doe@foo.bar', 'url': 'https://john.doe' } ) _parse_author_string_test( '(https://john.doe) ', { 'email': 'john.doe@foo.bar', 'url': 'https://john.doe' } ) _parse_author_string_test( 'John Doe (https://john.doe)', { 'name': 'John Doe', 'email': 'john.doe@foo.bar', 'url': 'https://john.doe' } ) _parse_author_string_test( 'John Doe (https://john.doe) ', { 'name': 'John Doe', 'email': 'john.doe@foo.bar', 'url': 'https://john.doe' } ) _parse_author_string_test( 'John Doe (https://john.doe)', { 'name': 'John Doe', 'email': 'john.doe@foo.bar', 'url': 'https://john.doe' } ) _parse_author_string_test( 'John Doe(https://john.doe)', { 'name': 'John Doe', 'email': 'john.doe@foo.bar', 'url': 'https://john.doe' } ) _parse_author_string_test('', {}) _parse_author_string_test('<>', {}) _parse_author_string_test(' <>', {}) _parse_author_string_test('<>()', {}) _parse_author_string_test('<> ()', {}) _parse_author_string_test('()', {}) _parse_author_string_test(' ()', {}) _parse_author_string_test( 'John Doe <> ()', { 'name': 'John Doe' } ) _parse_author_string_test( 'John Doe <>', { 'name': 'John Doe' } ) _parse_author_string_test( 'John Doe ()', { 'name': 'John Doe' } ) -def test_extract_npm_package_author(): +def test_extract_npm_package_author(datadir): package_metadata_filepath = os.path.join( - DATADIR, 'replicate.npmjs.com', 'org_visit1') + datadir, 'replicate.npmjs.com', 'org_visit1') with open(package_metadata_filepath) as json_file: package_metadata = json.load(json_file) extract_npm_package_author(package_metadata['versions']['0.0.2']) == \ { 'fullname': b'mooz ', 'name': b'mooz', 'email': b'stillpedant@gmail.com' } assert ( extract_npm_package_author(package_metadata['versions']['0.0.3']) == { 'fullname': b'Masafumi Oyamada ', 'name': b'Masafumi Oyamada', 'email': b'stillpedant@gmail.com' } ) package_json = json.loads(''' { "name": "highlightjs-line-numbers.js", "version": "2.7.0", "description": "Highlight.js line numbers plugin.", "main": "src/highlightjs-line-numbers.js", "dependencies": {}, "devDependencies": { "gulp": "^4.0.0", "gulp-rename": "^1.4.0", "gulp-replace": "^0.6.1", "gulp-uglify": "^1.2.0" }, "repository": { "type": "git", "url": "https://github.com/wcoder/highlightjs-line-numbers.js.git" }, "author": "Yauheni Pakala ", "license": "MIT", "bugs": { "url": "https://github.com/wcoder/highlightjs-line-numbers.js/issues" }, "homepage": "http://wcoder.github.io/highlightjs-line-numbers.js/" }''') # noqa assert extract_npm_package_author(package_json) == \ { 'fullname': b'Yauheni Pakala ', 'name': b'Yauheni Pakala', 'email': b'evgeniy.pakalo@gmail.com' } package_json = json.loads(''' { "name": "3-way-diff", "version": "0.0.1", "description": "3-way diffing of JavaScript objects", "main": "index.js", "authors": [ { "name": "Shawn Walsh", "url": "https://github.com/shawnpwalsh" }, { "name": "Markham F Rollins IV", "url": "https://github.com/mrollinsiv" } ], "keywords": [ "3-way diff", "3 way diff", "three-way diff", "three way diff" ], "devDependencies": { "babel-core": "^6.20.0", "babel-preset-es2015": "^6.18.0", "mocha": "^3.0.2" }, "dependencies": { "lodash": "^4.15.0" } }''') assert extract_npm_package_author(package_json) == \ { 'fullname': b'Shawn Walsh', 'name': b'Shawn Walsh', 'email': None } package_json = json.loads(''' { "name": "yfe-ynpm", "version": "1.0.0", "homepage": "http://gitlab.ywwl.com/yfe/yfe-ynpm", "repository": { "type": "git", "url": "git@gitlab.ywwl.com:yfe/yfe-ynpm.git" }, "author": [ "fengmk2 (https://fengmk2.com)", "xufuzi (https://7993.org)" ], "license": "MIT" }''') assert extract_npm_package_author(package_json) == \ { 'fullname': b'fengmk2 ', 'name': b'fengmk2', 'email': b'fengmk2@gmail.com' } package_json = json.loads(''' { "name": "umi-plugin-whale", "version": "0.0.8", "description": "Internal contract component", "authors": { "name": "xiaohuoni", "email": "448627663@qq.com" }, "repository": "alitajs/whale", "devDependencies": { "np": "^3.0.4", "umi-tools": "*" }, "license": "MIT" }''') assert extract_npm_package_author(package_json) == \ { 'fullname': b'xiaohuoni <448627663@qq.com>', 'name': b'xiaohuoni', 'email': b'448627663@qq.com' } def normalize_hashes(hashes): if isinstance(hashes, str): return hash_to_bytes(hashes) if isinstance(hashes, list): return [hash_to_bytes(x) for x in hashes] return {hash_to_bytes(k): hash_to_bytes(v) for k, v in hashes.items()} _expected_new_contents_first_visit = normalize_hashes([ '4ce3058e16ab3d7e077f65aabf855c34895bf17c', '858c3ceee84c8311adc808f8cdb30d233ddc9d18', '0fa33b4f5a4e0496da6843a38ff1af8b61541996', '85a410f8ef8eb8920f2c384a9555566ad4a2e21b', '9163ac8025923d5a45aaac482262893955c9b37b', '692cf623b8dd2c5df2c2998fd95ae4ec99882fb4', '18c03aac6d3e910efb20039c15d70ab5e0297101', '41265c42446aac17ca769e67d1704f99e5a1394d', '783ff33f5882813dca9239452c4a7cadd4dba778', 'b029cfb85107aee4590c2434a3329bfcf36f8fa1', '112d1900b4c2e3e9351050d1b542c9744f9793f3', '5439bbc4bd9a996f1a38244e6892b71850bc98fd', 'd83097a2f994b503185adf4e719d154123150159', 'd0939b4898e83090ee55fd9d8a60e312cfadfbaf', 'b3523a26f7147e4af40d9d462adaae6d49eda13e', 'cd065fb435d6fb204a8871bcd623d0d0e673088c', '2854a40855ad839a54f4b08f5cff0cf52fca4399', 'b8a53bbaac34ebb8c6169d11a4b9f13b05c583fe', '0f73d56e1cf480bded8a1ecf20ec6fc53c574713', '0d9882b2dfafdce31f4e77fe307d41a44a74cefe', '585fc5caab9ead178a327d3660d35851db713df1', 'e8cd41a48d79101977e3036a87aeb1aac730686f', '5414efaef33cceb9f3c9eb5c4cc1682cd62d14f7', '9c3cc2763bf9e9e37067d3607302c4776502df98', '3649a68410e354c83cd4a38b66bd314de4c8f5c9', 'e96ed0c091de1ebdf587104eaf63400d1974a1fe', '078ca03d2f99e4e6eab16f7b75fbb7afb699c86c', '38de737da99514de6559ff163c988198bc91367a', ]) _expected_new_directories_first_visit = normalize_hashes([ '3370d20d6f96dc1c9e50f083e2134881db110f4f', '42753c0c2ab00c4501b552ac4671c68f3cf5aece', 'd7895533ef5edbcffdea3f057d9fef3a1ef845ce', '80579be563e2ef3e385226fe7a3f079b377f142c', '3b0ddc6a9e58b4b53c222da4e27b280b6cda591c', 'bcad03ce58ac136f26f000990fc9064e559fe1c0', '5fc7e82a1bc72e074665c6078c6d3fad2f13d7ca', 'e3cd26beba9b1e02f6762ef54bd9ac80cc5f25fd', '584b5b4b6cf7f038095e820b99386a9c232de931', '184c8d6d0d242f2b1792ef9d3bf396a5434b7f7a', 'bb5f4ee143c970367eb409f2e4c1104898048b9d', '1b95491047add1103db0dfdfa84a9735dcb11e88', 'a00c6de13471a2d66e64aca140ddb21ef5521e62', '5ce6c1cd5cda2d546db513aaad8c72a44c7771e2', 'c337091e349b6ac10d38a49cdf8c2401ef9bb0f2', '202fafcd7c0f8230e89d5496ad7f44ab12b807bf', '775cc516543be86c15c1dc172f49c0d4e6e78235', 'ff3d1ead85a14f891e8b3fa3a89de39db1b8de2e', ]) _expected_new_revisions_first_visit = normalize_hashes({ 'd8a1c7474d2956ac598a19f0f27d52f7015f117e': '42753c0c2ab00c4501b552ac4671c68f3cf5aece', '5f9eb78af37ffd12949f235e86fac04898f9f72a': '3370d20d6f96dc1c9e50f083e2134881db110f4f', 'ba019b192bdb94bd0b5bd68b3a5f92b5acc2239a': 'd7895533ef5edbcffdea3f057d9fef3a1ef845ce'} ) _expected_new_snapshot_first_visit_id = normalize_hashes( 'd0587e1195aed5a8800411a008f2f2d627f18e2d') _expected_branches_first_visit = { 'HEAD': { 'target': 'releases/0.0.4', 'target_type': 'alias' }, 'releases/0.0.2': { 'target': 'd8a1c7474d2956ac598a19f0f27d52f7015f117e', 'target_type': 'revision' }, 'releases/0.0.3': { 'target': '5f9eb78af37ffd12949f235e86fac04898f9f72a', 'target_type': 'revision' }, 'releases/0.0.4': { 'target': 'ba019b192bdb94bd0b5bd68b3a5f92b5acc2239a', 'target_type': 'revision' } } def package_url(package): return 'https://www.npmjs.com/package/%s' % package def package_metadata_url(package): return 'https://replicate.npmjs.com/%s/' % package def test_revision_metadata_structure(swh_config, local_get): package = 'org' loader = NpmLoader(package, package_url(package), package_metadata_url(package)) actual_load_status = loader.load() assert actual_load_status['status'] == 'eventful' expected_revision_id = hash_to_bytes( 'd8a1c7474d2956ac598a19f0f27d52f7015f117e') revision = list(loader.storage.revision_get([expected_revision_id]))[0] assert revision is not None check_metadata_paths(revision['metadata'], paths=[ ('intrinsic.tool', str), ('intrinsic.raw', dict), ('extrinsic.provider', str), ('extrinsic.when', str), ('extrinsic.raw', dict), ('original_artifact.filename', str), ('original_artifact.length', int), ('original_artifact.checksums', dict), ]) def test_npm_loader_first_visit(swh_config, local_get): package = 'org' loader = NpmLoader(package, package_url(package), package_metadata_url(package)) actual_load_status = loader.load() assert actual_load_status['status'] == 'eventful' stats = loader.storage.stat_counters() assert { 'content': len(_expected_new_contents_first_visit), 'directory': len(_expected_new_directories_first_visit), 'origin': 1, 'origin_visit': 1, 'person': 2, 'release': 0, 'revision': len(_expected_new_revisions_first_visit), 'skipped_content': 0, 'snapshot': 1, } == stats assert len(list(loader.storage.content_get( _expected_new_contents_first_visit))) == len( _expected_new_contents_first_visit) assert list(loader.storage.directory_missing( _expected_new_directories_first_visit)) == [] assert list(loader.storage.revision_missing( _expected_new_revisions_first_visit)) == [] expected_snapshot = { 'id': _expected_new_snapshot_first_visit_id, 'branches': _expected_branches_first_visit, } check_snapshot(expected_snapshot, loader.storage) def test_npm_loader_incremental_visit(swh_config, local_get_visits): package = 'org' url = package_url(package) metadata_url = package_metadata_url(package) loader = NpmLoader(package, url, metadata_url) actual_load_status = loader.load() assert actual_load_status['status'] == 'eventful' origin_visit = list(loader.storage.origin_visit_get(url))[-1] assert origin_visit['status'] == 'full' stats = loader.storage.stat_counters() assert { 'content': len(_expected_new_contents_first_visit), 'directory': len(_expected_new_directories_first_visit), 'origin': 1, 'origin_visit': 1, 'person': 2, 'release': 0, 'revision': len(_expected_new_revisions_first_visit), 'skipped_content': 0, 'snapshot': 1, } == stats loader._info = None # reset loader internal state actual_load_status2 = loader.load() assert actual_load_status2['status'] == 'eventful' origin_visit2 = list(loader.storage.origin_visit_get(url))[-1] assert origin_visit2['status'] == 'full' stats = loader.storage.stat_counters() assert { # 3 new releases artifacts 'content': len(_expected_new_contents_first_visit) + 14, 'directory': len(_expected_new_directories_first_visit) + 15, 'origin': 1, 'origin_visit': 2, 'person': 2, 'release': 0, 'revision': len(_expected_new_revisions_first_visit) + 3, 'skipped_content': 0, 'snapshot': 2, } == stats urls = [ m.url for m in local_get_visits.request_history if m.url.startswith('https://registry.npmjs.org') ] assert len(urls) == len(set(urls)) # we visited each artifact once across diff --git a/swh/loader/package/tests/test_pypi.py b/swh/loader/package/tests/test_pypi.py index c4e5636..014f301 100644 --- a/swh/loader/package/tests/test_pypi.py +++ b/swh/loader/package/tests/test_pypi.py @@ -1,652 +1,652 @@ # Copyright (C) 2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import os from os import path import pytest from unittest.mock import patch from swh.core.tarball import uncompress from swh.model.hashutil import hash_to_bytes from swh.loader.package.pypi import ( PyPILoader, pypi_api_url, author, extract_intrinsic_metadata ) from swh.loader.package.tests.common import ( - check_snapshot, DATADIR, check_metadata_paths + check_snapshot, check_metadata_paths ) from swh.loader.package.tests.conftest import local_get_factory def test_author_basic(): data = { 'author': "i-am-groot", 'author_email': 'iam@groot.org', } actual_author = author(data) expected_author = { 'fullname': b'i-am-groot ', 'name': b'i-am-groot', 'email': b'iam@groot.org', } assert actual_author == expected_author def test_author_empty_email(): data = { 'author': 'i-am-groot', 'author_email': '', } actual_author = author(data) expected_author = { 'fullname': b'i-am-groot', 'name': b'i-am-groot', 'email': b'', } assert actual_author == expected_author def test_author_empty_name(): data = { 'author': "", 'author_email': 'iam@groot.org', } actual_author = author(data) expected_author = { 'fullname': b' ', 'name': b'', 'email': b'iam@groot.org', } assert actual_author == expected_author def test_author_malformed(): data = { 'author': "['pierre', 'paul', 'jacques']", 'author_email': None, } actual_author = author(data) expected_author = { 'fullname': b"['pierre', 'paul', 'jacques']", 'name': b"['pierre', 'paul', 'jacques']", 'email': None, } assert actual_author == expected_author def test_author_malformed_2(): data = { 'author': '[marie, jeanne]', 'author_email': '[marie@some, jeanne@thing]', } actual_author = author(data) expected_author = { 'fullname': b'[marie, jeanne] <[marie@some, jeanne@thing]>', 'name': b'[marie, jeanne]', 'email': b'[marie@some, jeanne@thing]', } assert actual_author == expected_author def test_author_malformed_3(): data = { 'author': '[marie, jeanne, pierre]', 'author_email': '[marie@somewhere.org, jeanne@somewhere.org]', } actual_author = author(data) expected_author = { 'fullname': b'[marie, jeanne, pierre] <[marie@somewhere.org, jeanne@somewhere.org]>', # noqa 'name': b'[marie, jeanne, pierre]', 'email': b'[marie@somewhere.org, jeanne@somewhere.org]', } actual_author == expected_author # configuration error # def test_badly_configured_loader_raise(monkeypatch): """Badly configured loader should raise""" monkeypatch.delenv('SWH_CONFIG_FILENAME', raising=False) with pytest.raises(ValueError) as e: PyPILoader(url='some-url') assert 'Misconfiguration' in e.value.args[0] def test_pypi_api_url(): """Compute pypi api url from the pypi project url should be ok""" url = pypi_api_url('https://pypi.org/project/requests') assert url == 'https://pypi.org/pypi/requests/json' @pytest.mark.fs -def test_extract_intrinsic_metadata(tmp_path): +def test_extract_intrinsic_metadata(tmp_path, datadir): """Parsing existing archive's PKG-INFO should yield results""" uncompressed_archive_path = str(tmp_path) archive_path = path.join( - DATADIR, 'files.pythonhosted.org', '0805nexter-1.1.0.zip') + datadir, 'files.pythonhosted.org', '0805nexter-1.1.0.zip') uncompress(archive_path, dest=uncompressed_archive_path) actual_metadata = extract_intrinsic_metadata(uncompressed_archive_path) expected_metadata = { 'metadata_version': '1.0', 'name': '0805nexter', 'version': '1.1.0', 'summary': 'a simple printer of nested lest', 'home_page': 'http://www.hp.com', 'author': 'hgtkpython', 'author_email': '2868989685@qq.com', 'platforms': ['UNKNOWN'], } assert actual_metadata == expected_metadata @pytest.mark.fs def test_extract_intrinsic_metadata_failures(tmp_path): """Parsing inexistant path/archive/PKG-INFO yield None""" # inexistant first level path assert extract_intrinsic_metadata('/something-inexistant') == {} # inexistant second level path (as expected by pypi archives) assert extract_intrinsic_metadata(tmp_path) == {} # inexistant PKG-INFO within second level path existing_path_no_pkginfo = str(tmp_path / 'something') os.mkdir(existing_path_no_pkginfo) assert extract_intrinsic_metadata(tmp_path) == {} # LOADER SCENARIO # # "edge" cases (for the same origin) # # no release artifact: # {visit full, status: uneventful, no contents, etc...} local_get_missing_all = local_get_factory(ignore_urls=[ 'https://files.pythonhosted.org/packages/ec/65/c0116953c9a3f47de89e71964d6c7b0c783b01f29fa3390584dbf3046b4d/0805nexter-1.1.0.zip', # noqa 'https://files.pythonhosted.org/packages/c4/a0/4562cda161dc4ecbbe9e2a11eb365400c0461845c5be70d73869786809c4/0805nexter-1.2.0.zip', # noqa ]) def test_no_release_artifact(swh_config, local_get_missing_all): """Load a pypi project with all artifacts missing ends up with no snapshot """ url = 'https://pypi.org/project/0805nexter' loader = PyPILoader(url) actual_load_status = loader.load() assert actual_load_status['status'] == 'uneventful' stats = loader.storage.stat_counters() assert { 'content': 0, 'directory': 0, 'origin': 1, 'origin_visit': 1, 'person': 0, 'release': 0, 'revision': 0, 'skipped_content': 0, 'snapshot': 1, } == stats origin_visit = next(loader.storage.origin_visit_get(url)) assert origin_visit['status'] == 'partial' # problem during loading: # {visit: partial, status: uneventful, no snapshot} def test_release_with_traceback(swh_config): url = 'https://pypi.org/project/0805nexter' with patch('swh.loader.package.pypi.PyPILoader.get_default_release', side_effect=ValueError('Problem')): loader = PyPILoader(url) actual_load_status = loader.load() assert actual_load_status['status'] == 'failed' stats = loader.storage.stat_counters() assert { 'content': 0, 'directory': 0, 'origin': 1, 'origin_visit': 1, 'person': 0, 'release': 0, 'revision': 0, 'skipped_content': 0, 'snapshot': 0, } == stats origin_visit = next(loader.storage.origin_visit_get(url)) assert origin_visit['status'] == 'partial' # problem during loading: failure early enough in between swh contents... # some contents (contents, directories, etc...) have been written in storage # {visit: partial, status: eventful, no snapshot} # problem during loading: failure late enough we can have snapshots (some # revisions are written in storage already) # {visit: partial, status: eventful, snapshot} # "normal" cases (for the same origin) # local_get_missing_one = local_get_factory(ignore_urls=[ 'https://files.pythonhosted.org/packages/ec/65/c0116953c9a3f47de89e71964d6c7b0c783b01f29fa3390584dbf3046b4d/0805nexter-1.1.0.zip', # noqa ]) # some missing release artifacts: # {visit partial, status: eventful, 1 snapshot} def test_revision_metadata_structure(swh_config, local_get): url = 'https://pypi.org/project/0805nexter' loader = PyPILoader(url) actual_load_status = loader.load() assert actual_load_status['status'] == 'eventful' expected_revision_id = hash_to_bytes( 'e445da4da22b31bfebb6ffc4383dbf839a074d21') revision = list(loader.storage.revision_get([expected_revision_id]))[0] assert revision is not None check_metadata_paths(revision['metadata'], paths=[ ('intrinsic.tool', str), ('intrinsic.raw', dict), ('extrinsic.provider', str), ('extrinsic.when', str), ('extrinsic.raw', dict), ('original_artifact.filename', str), ('original_artifact.length', int), ('original_artifact.checksums', dict), ]) def test_visit_with_missing_artifact(swh_config, local_get_missing_one): """Load a pypi project with some missing artifacts ends up with 1 snapshot """ url = 'https://pypi.org/project/0805nexter' loader = PyPILoader(url) actual_load_status = loader.load() assert actual_load_status['status'] == 'eventful' stats = loader.storage.stat_counters() assert { 'content': 3, 'directory': 2, 'origin': 1, 'origin_visit': 1, 'person': 1, 'release': 0, 'revision': 1, 'skipped_content': 0, 'snapshot': 1 } == stats expected_contents = map(hash_to_bytes, [ '405859113963cb7a797642b45f171d6360425d16', 'e5686aa568fdb1d19d7f1329267082fe40482d31', '83ecf6ec1114fd260ca7a833a2d165e71258c338', ]) assert list(loader.storage.content_missing_per_sha1(expected_contents))\ == [] expected_dirs = map(hash_to_bytes, [ 'b178b66bd22383d5f16f4f5c923d39ca798861b4', 'c3a58f8b57433a4b56caaa5033ae2e0931405338', ]) assert list(loader.storage.directory_missing(expected_dirs)) == [] # {revision hash: directory hash} expected_revs = { hash_to_bytes('e445da4da22b31bfebb6ffc4383dbf839a074d21'): hash_to_bytes('b178b66bd22383d5f16f4f5c923d39ca798861b4'), # noqa } assert list(loader.storage.revision_missing(expected_revs)) == [] expected_branches = { 'releases/1.2.0': { 'target': 'e445da4da22b31bfebb6ffc4383dbf839a074d21', 'target_type': 'revision', }, 'HEAD': { 'target': 'releases/1.2.0', 'target_type': 'alias', }, } expected_snapshot = { 'id': 'dd0e4201a232b1c104433741dbf45895b8ac9355', 'branches': expected_branches, } check_snapshot(expected_snapshot, storage=loader.storage) origin_visit = next(loader.storage.origin_visit_get(url)) assert origin_visit['status'] == 'partial' def test_visit_with_1_release_artifact(swh_config, local_get): """With no prior visit, load a pypi project ends up with 1 snapshot """ url = 'https://pypi.org/project/0805nexter' loader = PyPILoader(url) actual_load_status = loader.load() assert actual_load_status['status'] == 'eventful' stats = loader.storage.stat_counters() assert { 'content': 6, 'directory': 4, 'origin': 1, 'origin_visit': 1, 'person': 1, 'release': 0, 'revision': 2, 'skipped_content': 0, 'snapshot': 1 } == stats expected_contents = map(hash_to_bytes, [ 'a61e24cdfdab3bb7817f6be85d37a3e666b34566', '938c33483285fd8ad57f15497f538320df82aeb8', 'a27576d60e08c94a05006d2e6d540c0fdb5f38c8', '405859113963cb7a797642b45f171d6360425d16', 'e5686aa568fdb1d19d7f1329267082fe40482d31', '83ecf6ec1114fd260ca7a833a2d165e71258c338', ]) assert list(loader.storage.content_missing_per_sha1(expected_contents))\ == [] expected_dirs = map(hash_to_bytes, [ '05219ba38bc542d4345d5638af1ed56c7d43ca7d', 'cf019eb456cf6f78d8c4674596f1c9a97ece8f44', 'b178b66bd22383d5f16f4f5c923d39ca798861b4', 'c3a58f8b57433a4b56caaa5033ae2e0931405338', ]) assert list(loader.storage.directory_missing(expected_dirs)) == [] # {revision hash: directory hash} expected_revs = { hash_to_bytes('4c99891f93b81450385777235a37b5e966dd1571'): hash_to_bytes('05219ba38bc542d4345d5638af1ed56c7d43ca7d'), # noqa hash_to_bytes('e445da4da22b31bfebb6ffc4383dbf839a074d21'): hash_to_bytes('b178b66bd22383d5f16f4f5c923d39ca798861b4'), # noqa } assert list(loader.storage.revision_missing(expected_revs)) == [] expected_branches = { 'releases/1.1.0': { 'target': '4c99891f93b81450385777235a37b5e966dd1571', 'target_type': 'revision', }, 'releases/1.2.0': { 'target': 'e445da4da22b31bfebb6ffc4383dbf839a074d21', 'target_type': 'revision', }, 'HEAD': { 'target': 'releases/1.2.0', 'target_type': 'alias', }, } expected_snapshot = { 'id': 'ba6e158ada75d0b3cfb209ffdf6daa4ed34a227a', 'branches': expected_branches, } check_snapshot(expected_snapshot, loader.storage) origin_visit = next(loader.storage.origin_visit_get(url)) assert origin_visit['status'] == 'full' def test_multiple_visits_with_no_change(swh_config, local_get): """Multiple visits with no changes results in 1 same snapshot """ url = 'https://pypi.org/project/0805nexter' loader = PyPILoader(url) actual_load_status = loader.load() assert actual_load_status['status'] == 'eventful' stats = loader.storage.stat_counters() assert { 'content': 6, 'directory': 4, 'origin': 1, 'origin_visit': 1, 'person': 1, 'release': 0, 'revision': 2, 'skipped_content': 0, 'snapshot': 1 } == stats expected_branches = { 'releases/1.1.0': { 'target': '4c99891f93b81450385777235a37b5e966dd1571', 'target_type': 'revision', }, 'releases/1.2.0': { 'target': 'e445da4da22b31bfebb6ffc4383dbf839a074d21', 'target_type': 'revision', }, 'HEAD': { 'target': 'releases/1.2.0', 'target_type': 'alias', }, } snapshot_id = 'ba6e158ada75d0b3cfb209ffdf6daa4ed34a227a' expected_snapshot = { 'id': snapshot_id, 'branches': expected_branches, } check_snapshot(expected_snapshot, loader.storage) origin_visit = next(loader.storage.origin_visit_get(url)) assert origin_visit['status'] == 'full' actual_load_status2 = loader.load() assert actual_load_status2['status'] == 'uneventful' stats2 = loader.storage.stat_counters() expected_stats2 = stats.copy() expected_stats2['origin_visit'] = 1 + 1 assert expected_stats2 == stats2 # same snapshot actual_snapshot_id = origin_visit['snapshot']['id'] assert actual_snapshot_id == hash_to_bytes(snapshot_id) def test_incremental_visit(swh_config, local_get_visits): """With prior visit, 2nd load will result with a different snapshot """ url = 'https://pypi.org/project/0805nexter' loader = PyPILoader(url) visit1_actual_load_status = loader.load() visit1_stats = loader.storage.stat_counters() assert visit1_actual_load_status['status'] == 'eventful' origin_visit1 = next(loader.storage.origin_visit_get(url)) assert origin_visit1['status'] == 'full' assert { 'content': 6, 'directory': 4, 'origin': 1, 'origin_visit': 1, 'person': 1, 'release': 0, 'revision': 2, 'skipped_content': 0, 'snapshot': 1 } == visit1_stats # Reset internal state loader._info = None visit2_actual_load_status = loader.load() visit2_stats = loader.storage.stat_counters() assert visit2_actual_load_status['status'] == 'eventful' visits = list(loader.storage.origin_visit_get(url)) assert len(visits) == 2 assert visits[1]['status'] == 'full' assert { 'content': 6 + 1, # 1 more content 'directory': 4 + 2, # 2 more directories 'origin': 1, 'origin_visit': 1 + 1, 'person': 1, 'release': 0, 'revision': 2 + 1, # 1 more revision 'skipped_content': 0, 'snapshot': 1 + 1, # 1 more snapshot } == visit2_stats expected_contents = map(hash_to_bytes, [ 'a61e24cdfdab3bb7817f6be85d37a3e666b34566', '938c33483285fd8ad57f15497f538320df82aeb8', 'a27576d60e08c94a05006d2e6d540c0fdb5f38c8', '405859113963cb7a797642b45f171d6360425d16', 'e5686aa568fdb1d19d7f1329267082fe40482d31', '83ecf6ec1114fd260ca7a833a2d165e71258c338', '92689fa2b7fb4d4fc6fb195bf73a50c87c030639' ]) assert list(loader.storage.content_missing_per_sha1(expected_contents))\ == [] expected_dirs = map(hash_to_bytes, [ '05219ba38bc542d4345d5638af1ed56c7d43ca7d', 'cf019eb456cf6f78d8c4674596f1c9a97ece8f44', 'b178b66bd22383d5f16f4f5c923d39ca798861b4', 'c3a58f8b57433a4b56caaa5033ae2e0931405338', 'e226e7e4ad03b4fc1403d69a18ebdd6f2edd2b3a', '52604d46843b898f5a43208045d09fcf8731631b', ]) assert list(loader.storage.directory_missing(expected_dirs)) == [] # {revision hash: directory hash} expected_revs = { hash_to_bytes('4c99891f93b81450385777235a37b5e966dd1571'): hash_to_bytes('05219ba38bc542d4345d5638af1ed56c7d43ca7d'), # noqa hash_to_bytes('e445da4da22b31bfebb6ffc4383dbf839a074d21'): hash_to_bytes('b178b66bd22383d5f16f4f5c923d39ca798861b4'), # noqa hash_to_bytes('51247143b01445c9348afa9edfae31bf7c5d86b1'): hash_to_bytes('e226e7e4ad03b4fc1403d69a18ebdd6f2edd2b3a'), # noqa } assert list(loader.storage.revision_missing(expected_revs)) == [] expected_branches = { 'releases/1.1.0': { 'target': '4c99891f93b81450385777235a37b5e966dd1571', 'target_type': 'revision', }, 'releases/1.2.0': { 'target': 'e445da4da22b31bfebb6ffc4383dbf839a074d21', 'target_type': 'revision', }, 'releases/1.3.0': { 'target': '51247143b01445c9348afa9edfae31bf7c5d86b1', 'target_type': 'revision', }, 'HEAD': { 'target': 'releases/1.3.0', 'target_type': 'alias', }, } expected_snapshot = { 'id': '2e5149a7b0725d18231a37b342e9b7c4e121f283', 'branches': expected_branches, } check_snapshot(expected_snapshot, loader.storage) origin_visit = list(loader.storage.origin_visit_get(url))[-1] assert origin_visit['status'] == 'full' urls = [ m.url for m in local_get_visits.request_history if m.url.startswith('https://files.pythonhosted.org') ] # visited each artifact once across 2 visits assert len(urls) == len(set(urls)) # release artifact, no new artifact # {visit full, status uneventful, same snapshot as before} # release artifact, old artifact with different checksums # {visit full, status full, new snapshot with shared history and some new # different history} # release with multiple sdist artifacts per pypi "version" # snapshot branch output is different def test_visit_1_release_with_2_artifacts(swh_config, local_get): """With no prior visit, load a pypi project ends up with 1 snapshot """ url = 'https://pypi.org/project/nexter' loader = PyPILoader(url) actual_load_status = loader.load() assert actual_load_status['status'] == 'eventful' expected_branches = { 'releases/1.1.0/nexter-1.1.0.zip': { 'target': '4c99891f93b81450385777235a37b5e966dd1571', 'target_type': 'revision', }, 'releases/1.1.0/nexter-1.1.0.tar.gz': { 'target': '0bf88f5760cca7665d0af4d6575d9301134fe11a', 'target_type': 'revision', }, } expected_snapshot = { 'id': 'a27e638a4dad6fbfa273c6ebec1c4bf320fb84c6', 'branches': expected_branches, } check_snapshot(expected_snapshot, loader.storage) origin_visit = next(loader.storage.origin_visit_get(url)) assert origin_visit['status'] == 'full'