Page MenuHomeSoftware Heritage

No OneTemporary

diff --git a/MANIFEST.in b/MANIFEST.in
index f601d81..4c91126 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,7 +1,7 @@
include Makefile
include requirements.txt
include requirements-swh.txt
include version.txt
include README.md
recursive-include swh/loader/package/tests/ *.tar.gz
-recursive-include swh/loader/package/tests/resources/ *
+recursive-include swh/loader/package/tests/data/ *
diff --git a/requirements-swh.txt b/requirements-swh.txt
index f18315a..84de3de 100644
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -1,3 +1,4 @@
+swh.core >= 0.0.72
swh.model >= 0.0.18
swh.storage >= 0.0.152
swh.deposit
diff --git a/requirements-test.txt b/requirements-test.txt
index 5821059..5847ac3 100644
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -1,2 +1,3 @@
pytest
requests_mock
+swh-core[testing]
diff --git a/swh/loader/package/tests/conftest.py b/swh/loader/package/tests/conftest.py
index d3b2b65..f6e787f 100644
--- a/swh/loader/package/tests/conftest.py
+++ b/swh/loader/package/tests/conftest.py
@@ -1,133 +1,15 @@
# Copyright (C) 2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-import logging
-import os
-import re
import pytest
-from functools import partial
from os import path
-from urllib.parse import urlparse
-
-
-logger = logging.getLogger(__name__)
-
-
-# Check get_local_factory function
-# Maximum number of iteration checks to generate requests responses
-MAX_VISIT_FILES = 10
@pytest.fixture
def swh_config(monkeypatch, datadir):
- conffile = os.path.join(datadir, 'loader.yml')
+ conffile = path.join(datadir, 'loader.yml')
monkeypatch.setenv('SWH_CONFIG_FILENAME', conffile)
return conffile
-
-
-def get_response_cb(request, context, datadir, ignore_urls=[], visits=None):
- """Mount point callback to fetch on disk the content of a request
-
- This is meant to be used as 'body' argument of the requests_mock.get()
- method.
-
- It will look for files on the local filesystem based on the requested URL,
- using the following rules:
-
- - files are searched in the datadir/<hostname> directory
-
- - the local file name is the path part of the URL with path hierarchy
- markers (aka '/') replaced by '_'
-
- Eg. if you use the requests_mock fixture in your test file as:
-
- requests_mock.get('https://nowhere.com', body=get_response_cb)
- # or even
- requests_mock.get(re.compile('https://'), body=get_response_cb)
-
- then a call requests.get like:
-
- requests.get('https://nowhere.com/path/to/resource')
-
- will look the content of the response in:
-
- datadir/nowhere.com/path_to_resource
-
- Args:
- request (requests.Request): Object requests
- context (requests.Context): Object holding response metadata
- information (status_code, headers, etc...)
- ignore_urls (List): urls whose status response should be 404 even if
- the local file exists
- visits (Optional[Dict]): Map of url, number of visits. If None, disable
- multi visit support (default)
-
- Returns:
- Optional[FileDescriptor] on the on disk file to read from the test
- context
-
- """
- logger.debug('get_response_cb(%s, %s)', request, context)
- logger.debug('url: %s', request.url)
- logger.debug('ignore_urls: %s', ignore_urls)
- if request.url in ignore_urls:
- context.status_code = 404
- return None
- url = urlparse(request.url)
- dirname = url.hostname # pypi.org | files.pythonhosted.org
- # url.path: pypi/<project>/json -> local file: pypi_<project>_json
- filename = url.path[1:]
- if filename.endswith('/'):
- filename = filename[:-1]
- filename = filename.replace('/', '_')
- filepath = path.join(datadir, dirname, filename)
- if visits is not None:
- visit = visits.get(url, 0)
- visits[url] = visit + 1
- if visit:
- filepath = filepath + '_visit%s' % visit
-
- if not path.isfile(filepath):
- logger.debug('not found filepath: %s', filepath)
- context.status_code = 404
- return None
- fd = open(filepath, 'rb')
- context.headers['content-length'] = str(path.getsize(filepath))
- return fd
-
-
-@pytest.fixture
-def datadir(request):
- """By default, returns the test directory
-
- """
- return path.join(path.dirname(request.fspath), 'data')
-
-
-def local_get_factory(ignore_urls=[],
- has_multi_visit=False):
- @pytest.fixture
- def local_get(requests_mock, datadir):
- if not has_multi_visit:
- cb = partial(get_response_cb,
- ignore_urls=ignore_urls,
- datadir=datadir)
- requests_mock.get(re.compile('https://'), body=cb)
- else:
- visits = {}
- requests_mock.get(re.compile('https://'), body=partial(
- get_response_cb, ignore_urls=ignore_urls, visits=visits,
- datadir=datadir)
- )
-
- return requests_mock
-
- return local_get
-
-
-local_get = local_get_factory([])
-
-local_get_visits = local_get_factory(has_multi_visit=True)
diff --git a/swh/loader/package/tests/fixture/__init__.py b/swh/loader/package/tests/fixture/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/swh/loader/package/tests/fixture/conftest.py b/swh/loader/package/tests/fixture/conftest.py
deleted file mode 100644
index 399adac..0000000
--- a/swh/loader/package/tests/fixture/conftest.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# Copyright (C) 2019 The Software Heritage developers
-# See the AUTHORS file at the top-level directory of this distribution
-# License: GNU General Public License version 3, or any later version
-# See top-level LICENSE file for more information
-
-import pytest
-
-from os import path
-
-
-DATADIR = path.join(path.abspath(path.dirname(__file__)), 'data')
-
-
-@pytest.fixture
-def datadir():
- return DATADIR
diff --git a/swh/loader/package/tests/fixture/data/example.com/file.json b/swh/loader/package/tests/fixture/data/example.com/file.json
deleted file mode 100644
index 000a8dd..0000000
--- a/swh/loader/package/tests/fixture/data/example.com/file.json
+++ /dev/null
@@ -1,3 +0,0 @@
-{
- "welcome": "you"
-}
diff --git a/swh/loader/package/tests/fixture/test_conftest.py b/swh/loader/package/tests/fixture/test_conftest.py
deleted file mode 100644
index 3cdcbba..0000000
--- a/swh/loader/package/tests/fixture/test_conftest.py
+++ /dev/null
@@ -1,24 +0,0 @@
-# Copyright (C) 2019 The Software Heritage developers
-# See the AUTHORS file at the top-level directory of this distribution
-# License: GNU General Public License version 3, or any later version
-# See top-level LICENSE file for more information
-
-import requests
-
-from .conftest import DATADIR
-
-# In this arborescence, we override in the local conftest.py module the
-# "datadir" fixture to specify where to retrieve the data files from.
-
-
-def test_local_get_with_datadir_fixture_override(local_get):
- """Override datadir fixture should retrieve data from elsewhere
-
- """
- response = requests.get('https://example.com/file.json')
- assert response.ok
- assert response.json() == {'welcome': 'you'}
-
-
-def test_data_dir_override(datadir):
- assert datadir == DATADIR
diff --git a/swh/loader/package/tests/test_conftest.py b/swh/loader/package/tests/test_conftest.py
deleted file mode 100644
index 8683ee3..0000000
--- a/swh/loader/package/tests/test_conftest.py
+++ /dev/null
@@ -1,89 +0,0 @@
-# Copyright (C) 2019 The Software Heritage developers
-# See the AUTHORS file at the top-level directory of this distribution
-# License: GNU General Public License version 3, or any later version
-# See top-level LICENSE file for more information
-
-import requests
-
-from os import path
-
-from swh.loader.package.tests.conftest import local_get_factory
-
-
-def test_get_response_cb_with_visits_nominal(local_get_visits):
- response = requests.get('https://example.com/file.json')
- assert response.ok
- assert response.json() == {'hello': 'you'}
-
- response = requests.get('https://example.com/file.json')
- assert response.ok
- assert response.json() == {'hello': 'world'}
-
- response = requests.get('https://example.com/file.json')
- assert not response.ok
- assert response.status_code == 404
-
-
-def test_get_response_cb_with_visits(local_get_visits):
- response = requests.get('https://example.com/file.json')
- assert response.ok
- assert response.json() == {'hello': 'you'}
-
- response = requests.get('https://example.com/other.json')
- assert response.ok
- assert response.json() == "foobar"
-
- response = requests.get('https://example.com/file.json')
- assert response.ok
- assert response.json() == {'hello': 'world'}
-
- response = requests.get('https://example.com/other.json')
- assert not response.ok
- assert response.status_code == 404
-
- response = requests.get('https://example.com/file.json')
- assert not response.ok
- assert response.status_code == 404
-
-
-def test_get_response_cb_no_visit(local_get):
- response = requests.get('https://example.com/file.json')
- assert response.ok
- assert response.json() == {'hello': 'you'}
-
- response = requests.get('https://example.com/file.json')
- assert response.ok
- assert response.json() == {'hello': 'you'}
-
-
-local_get_ignore = local_get_factory(
- ignore_urls=['https://example.com/file.json'],
- has_multi_visit=False,
-)
-
-
-def test_get_response_cb_ignore_url(local_get_ignore):
- response = requests.get('https://example.com/file.json')
- assert not response.ok
- assert response.status_code == 404
-
-
-local_get_ignore_and_visit = local_get_factory(
- ignore_urls=['https://example.com/file.json'],
- has_multi_visit=True,
-)
-
-
-def test_get_response_cb_ignore_url_with_visit(local_get_ignore_and_visit):
- response = requests.get('https://example.com/file.json')
- assert not response.ok
- assert response.status_code == 404
-
- response = requests.get('https://example.com/file.json')
- assert not response.ok
- assert response.status_code == 404
-
-
-def test_data_dir(datadir):
- expected_datadir = path.join(path.abspath(path.dirname(__file__)), 'data')
- assert datadir == expected_datadir
diff --git a/swh/loader/package/tests/test_deposit.py b/swh/loader/package/tests/test_deposit.py
index 9b6c222..8cc5723 100644
--- a/swh/loader/package/tests/test_deposit.py
+++ b/swh/loader/package/tests/test_deposit.py
@@ -1,199 +1,199 @@
# Copyright (C) 2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import re
from swh.model.hashutil import hash_to_bytes
from swh.loader.package.deposit import DepositLoader
from swh.loader.package.tests.common import (
check_snapshot, check_metadata_paths
)
-from swh.loader.package.tests.conftest import local_get_factory
+from swh.core.pytest_plugin import requests_mock_datadir_factory
def test_deposit_init_ok(swh_config):
url = 'some-url'
deposit_id = 999
loader = DepositLoader(url, deposit_id) # Something that does not exist
assert loader.url == url
assert loader.archive_url == '/%s/raw/' % deposit_id
assert loader.metadata_url == '/%s/meta/' % deposit_id
assert loader.deposit_update_url == '/%s/update/' % deposit_id
assert loader.client is not None
def test_deposit_loading_failure_to_fetch_metadata(swh_config):
"""Error during fetching artifact ends us with failed/partial visit
"""
# private api url form: 'https://deposit.s.o/1/private/hal/666/raw/'
url = 'some-url'
unknown_deposit_id = 666
loader = DepositLoader(url, unknown_deposit_id) # does not exist
actual_load_status = loader.load()
assert actual_load_status['status'] == 'failed'
stats = loader.storage.stat_counters()
assert {
'content': 0,
'directory': 0,
'origin': 1,
'origin_visit': 1,
'person': 0,
'release': 0,
'revision': 0,
'skipped_content': 0,
'snapshot': 0,
} == stats
origin_visit = next(loader.storage.origin_visit_get(url))
assert origin_visit['status'] == 'partial'
-local_get_missing_one = local_get_factory(ignore_urls=[
+requests_mock_datadir_missing_one = requests_mock_datadir_factory(ignore_urls=[
'https://deposit.softwareheritage.org/1/private/666/raw/',
])
def test_deposit_loading_failure_to_retrieve_1_artifact(
- swh_config, local_get_missing_one):
+ swh_config, requests_mock_datadir_missing_one):
"""Deposit with missing artifact ends up with an uneventful/partial visit
"""
# private api url form: 'https://deposit.s.o/1/private/hal/666/raw/'
url = 'some-url-2'
deposit_id = 666
loader = DepositLoader(url, deposit_id)
assert loader.archive_url
actual_load_status = loader.load()
assert actual_load_status['status'] == 'uneventful'
stats = loader.storage.stat_counters()
assert {
'content': 0,
'directory': 0,
'origin': 1,
'origin_visit': 1,
'person': 0,
'release': 0,
'revision': 0,
'skipped_content': 0,
'snapshot': 1,
} == stats
origin_visit = next(loader.storage.origin_visit_get(url))
assert origin_visit['status'] == 'partial'
-def test_revision_metadata_structure(swh_config, local_get, requests_mock):
+def test_revision_metadata_structure(swh_config, requests_mock_datadir):
# do not care for deposit update query
- requests_mock.put(re.compile('https'))
+ requests_mock_datadir.put(re.compile('https'))
url = 'https://hal-test.archives-ouvertes.fr/some-external-id'
deposit_id = 666
loader = DepositLoader(url, deposit_id)
assert loader.archive_url
actual_load_status = loader.load()
assert actual_load_status['status'] == 'eventful'
expected_revision_id = hash_to_bytes(
'9471c606239bccb1f269564c9ea114e1eeab9eb4')
revision = list(loader.storage.revision_get([expected_revision_id]))[0]
assert revision is not None
check_metadata_paths(revision['metadata'], paths=[
('extrinsic.provider', str),
('extrinsic.when', str),
('extrinsic.raw', dict),
('original_artifact.filename', str),
('original_artifact.length', int),
('original_artifact.checksums', dict),
])
-def test_deposit_loading_ok(swh_config, local_get, requests_mock):
- requests_mock.put(re.compile('https')) # do not care for put
+def test_deposit_loading_ok(swh_config, requests_mock_datadir):
+ requests_mock_datadir.put(re.compile('https')) # do not care for put
url = 'https://hal-test.archives-ouvertes.fr/some-external-id'
deposit_id = 666
loader = DepositLoader(url, deposit_id)
assert loader.archive_url
actual_load_status = loader.load()
assert actual_load_status['status'] == 'eventful'
stats = loader.storage.stat_counters()
assert {
'content': 303,
'directory': 12,
'origin': 1,
'origin_visit': 1,
'person': 1,
'release': 0,
'revision': 1,
'skipped_content': 0,
'snapshot': 1,
} == stats
origin_visit = next(loader.storage.origin_visit_get(url))
assert origin_visit['status'] == 'full'
expected_branches = {
'HEAD': {
'target': '9471c606239bccb1f269564c9ea114e1eeab9eb4',
'target_type': 'revision',
},
}
expected_snapshot = {
'id': '453f455d0efb69586143cd6b6e5897f9906b53a7',
'branches': expected_branches,
}
check_snapshot(expected_snapshot, storage=loader.storage)
# check metadata
tool = {
"name": "swh-deposit",
"version": "0.0.1",
"configuration": {
"sword_version": "2",
}
}
tool = loader.storage.tool_get(tool)
assert tool is not None
assert tool['id'] is not None
provider = {
"provider_name": "hal",
"provider_type": "deposit_client",
"provider_url": "https://hal-test.archives-ouvertes.fr/",
"metadata": None,
}
provider = loader.storage.metadata_provider_get_by(provider)
assert provider is not None
assert provider['id'] is not None
metadata = loader.storage.origin_metadata_get_by(
url, provider_type='deposit_client')
assert metadata is not None
assert isinstance(metadata, list)
assert len(metadata) == 1
metadata0 = metadata[0]
assert metadata0['provider_id'] == provider['id']
assert metadata0['provider_type'] == 'deposit_client'
assert metadata0['tool_id'] == tool['id']
diff --git a/swh/loader/package/tests/test_gnu.py b/swh/loader/package/tests/test_gnu.py
index 7b0fc63..ea70a83 100644
--- a/swh/loader/package/tests/test_gnu.py
+++ b/swh/loader/package/tests/test_gnu.py
@@ -1,348 +1,349 @@
# Copyright (C) 2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import os
import re
from swh.model.hashutil import hash_to_bytes
from swh.loader.package.gnu import GNULoader, get_version
from swh.loader.package.tests.common import (
check_snapshot, check_metadata_paths
)
def test_get_version():
"""From url to branch name should yield something relevant
"""
for url, expected_branchname in [
('https://gnu.org/sthg/info-2.1.0.tar.gz', '2.1.0'),
('https://gnu.org/sthg/info-2.1.2.zip', '2.1.2'),
('https://sthg.org/gnu/sthg.tar.gz', 'sthg'),
('https://sthg.org/gnu/DLDF-1.1.4.tar.gz', '1.1.4'),
('https://sthg.org/gnu/anubis-latest.tar.bz2', 'latest'),
('https://ftp.org/gnu/aris-w32.zip', 'w32'),
('https://ftp.org/gnu/aris-w32-2.2.zip', 'w32-2.2'),
('https://ftp.org/gnu/autogen.info.tar.gz', 'autogen.info'),
('https://ftp.org/gnu/crypto-build-demo.tar.gz',
'crypto-build-demo'),
('https://ftp.org/gnu/clue+clio+xit.clisp.tar.gz',
'clue+clio+xit.clisp'),
('https://ftp.org/gnu/clue+clio.for-pcl.tar.gz',
'clue+clio.for-pcl'),
('https://ftp.org/gnu/clisp-hppa2.0-hp-hpux10.20.tar.gz',
'hppa2.0-hp-hpux10.20'),
('clisp-i386-solaris2.6.tar.gz', 'i386-solaris2.6'),
('clisp-mips-sgi-irix6.5.tar.gz', 'mips-sgi-irix6.5'),
('clisp-powerpc-apple-macos.tar.gz', 'powerpc-apple-macos'),
('clisp-powerpc-unknown-linuxlibc6.tar.gz',
'powerpc-unknown-linuxlibc6'),
('clisp-rs6000-ibm-aix3.2.5.tar.gz', 'rs6000-ibm-aix3.2.5'),
('clisp-sparc-redhat51-linux.tar.gz', 'sparc-redhat51-linux'),
('clisp-sparc-sun-solaris2.4.tar.gz', 'sparc-sun-solaris2.4'),
('clisp-sparc-sun-sunos4.1.3_U1.tar.gz',
'sparc-sun-sunos4.1.3_U1'),
('clisp-2.25.1-powerpc-apple-MacOSX.tar.gz',
'2.25.1-powerpc-apple-MacOSX'),
('clisp-2.27-PowerMacintosh-powerpc-Darwin-1.3.7.tar.gz',
'2.27-PowerMacintosh-powerpc-Darwin-1.3.7'),
('clisp-2.27-i686-unknown-Linux-2.2.19.tar.gz',
'2.27-i686-unknown-Linux-2.2.19'),
('clisp-2.28-i386-i386-freebsd-4.3-RELEASE.tar.gz',
'2.28-i386-i386-freebsd-4.3-RELEASE'),
('clisp-2.28-i686-unknown-cygwin_me-4.90-1.3.10.tar.gz',
'2.28-i686-unknown-cygwin_me-4.90-1.3.10'),
('clisp-2.29-i386-i386-freebsd-4.6-STABLE.tar.gz',
'2.29-i386-i386-freebsd-4.6-STABLE'),
('clisp-2.29-i686-unknown-cygwin_nt-5.0-1.3.12.tar.gz',
'2.29-i686-unknown-cygwin_nt-5.0-1.3.12'),
('gcl-2.5.3-ansi-japi-xdr.20030701_mingw32.zip',
'2.5.3-ansi-japi-xdr.20030701_mingw32'),
('gettext-runtime-0.13.1.bin.woe32.zip', '0.13.1.bin.woe32'),
('sather-logo_images.tar.gz', 'sather-logo_images'),
('sather-specification-000328.html.tar.gz', '000328.html')
]:
actual_branchname = get_version(url)
assert actual_branchname == expected_branchname
_expected_new_contents_first_visit = [
'e9258d81faf5881a2f96a77ba609396f82cb97ad',
'1170cf105b04b7e2822a0e09d2acf71da7b9a130',
'fbd27c3f41f2668624ffc80b7ba5db9b92ff27ac',
'0057bec9b5422aff9256af240b177ac0e3ac2608',
'2b8d0d0b43a1078fc708930c8ddc2956a86c566e',
'27de3b3bc6545d2a797aeeb4657c0e215a0c2e55',
'2e6db43f5cd764e677f416ff0d0c78c7a82ef19b',
'ae9be03bd2a06ed8f4f118d3fe76330bb1d77f62',
'edeb33282b2bffa0e608e9d2fd960fd08093c0ea',
'd64e64d4c73679323f8d4cde2643331ba6c20af9',
'7a756602914be889c0a2d3952c710144b3e64cb0',
'84fb589b554fcb7f32b806951dcf19518d67b08f',
'8624bcdae55baeef00cd11d5dfcfa60f68710a02',
'e08441aeab02704cfbd435d6445f7c072f8f524e',
'f67935bc3a83a67259cda4b2d43373bd56703844',
'809788434b433eb2e3cfabd5d591c9a659d5e3d8',
'7d7c6c8c5ebaeff879f61f37083a3854184f6c41',
'b99fec102eb24bffd53ab61fc30d59e810f116a2',
'7d149b28eaa228b3871c91f0d5a95a2fa7cb0c68',
'f0c97052e567948adf03e641301e9983c478ccff',
'7fb724242e2b62b85ca64190c31dcae5303e19b3',
'4f9709e64a9134fe8aefb36fd827b84d8b617ab5',
'7350628ccf194c2c3afba4ac588c33e3f3ac778d',
'0bb892d9391aa706dc2c3b1906567df43cbe06a2',
'49d4c0ce1a16601f1e265d446b6c5ea6b512f27c',
'6b5cc594ac466351450f7f64a0b79fdaf4435ad3',
'3046e5d1f70297e2a507b98224b6222c9688d610',
'1572607d456d7f633bc6065a2b3048496d679a31',
]
_expected_new_directories_first_visit = [
'daabc65ec75d487b1335ffc101c0ac11c803f8fc',
'263be23b4a8101d3ad0d9831319a3e0f2b065f36',
'7f6e63ba6eb3e2236f65892cd822041f1a01dd5c',
'4db0a3ecbc976083e2dac01a62f93729698429a3',
'dfef1c80e1098dd5deda664bb44a9ab1f738af13',
'eca971d346ea54d95a6e19d5051f900237fafdaa',
'3aebc29ed1fccc4a6f2f2010fb8e57882406b528',
]
_expected_new_revisions_first_visit = {
'44183488c0774ce3c957fa19ba695cf18a4a42b3':
'3aebc29ed1fccc4a6f2f2010fb8e57882406b528'
}
_expected_branches_first_visit = {
'HEAD': {
'target_type': 'alias',
'target': 'releases/0.1.0',
},
'releases/0.1.0': {
'target_type': 'revision',
'target': '44183488c0774ce3c957fa19ba695cf18a4a42b3',
},
}
# hash is different then before as we changed the snapshot
# gnu used to use `release/` (singular) instead of plural
_expected_new_snapshot_first_visit_id = 'c419397fd912039825ebdbea378bc6283f006bf5' # noqa
def test_visit_with_no_artifact_found(swh_config, requests_mock):
package_url = 'https://ftp.gnu.org/gnu/8sync/'
tarballs = [{
'time': '944729610',
'archive': 'https://ftp.gnu.org/gnu/8sync/8sync-0.1.0.tar.gz',
'length': 221837,
}]
loader = GNULoader(package_url, tarballs)
requests_mock.get(re.compile('https://'), status_code=404)
actual_load_status = loader.load()
assert actual_load_status['status'] == 'uneventful'
stats = loader.storage.stat_counters()
assert {
'content': 0,
'directory': 0,
'origin': 1,
'origin_visit': 1,
'person': 0,
'release': 0,
'revision': 0,
'skipped_content': 0,
'snapshot': 1,
} == stats
origin_visit = next(loader.storage.origin_visit_get(package_url))
assert origin_visit['status'] == 'partial'
-def test_check_revision_metadata_structure(swh_config, local_get):
+def test_check_revision_metadata_structure(swh_config, requests_mock_datadir):
package_url = 'https://ftp.gnu.org/gnu/8sync/'
tarballs = [{
'time': '944729610',
'archive': 'https://ftp.gnu.org/gnu/8sync/8sync-0.1.0.tar.gz',
'length': 221837,
}]
loader = GNULoader(package_url, tarballs)
actual_load_status = loader.load()
assert actual_load_status['status'] == 'eventful'
expected_revision_id = hash_to_bytes(
'44183488c0774ce3c957fa19ba695cf18a4a42b3')
revision = list(loader.storage.revision_get([expected_revision_id]))[0]
assert revision is not None
check_metadata_paths(revision['metadata'], paths=[
('intrinsic', dict),
('extrinsic.provider', str),
('extrinsic.when', str),
('extrinsic.raw', dict),
('original_artifact.filename', str),
('original_artifact.length', int),
('original_artifact.checksums', dict),
])
-def test_visit_with_release_artifact_no_prior_visit(swh_config, local_get):
+def test_visit_with_release_artifact_no_prior_visit(
+ swh_config, requests_mock_datadir):
"""With no prior visit, load a gnu project ends up with 1 snapshot
"""
assert 'SWH_CONFIG_FILENAME' in os.environ # cf. tox.ini
package_url = 'https://ftp.gnu.org/gnu/8sync/'
tarballs = [{
'time': 944729610,
'archive': 'https://ftp.gnu.org/gnu/8sync/8sync-0.1.0.tar.gz',
'length': 221837,
}]
loader = GNULoader(package_url, tarballs)
actual_load_status = loader.load()
assert actual_load_status['status'] == 'eventful'
stats = loader.storage.stat_counters()
assert {
'content': len(_expected_new_contents_first_visit),
'directory': len(_expected_new_directories_first_visit),
'origin': 1,
'origin_visit': 1,
'person': 1,
'release': 0,
'revision': len(_expected_new_revisions_first_visit),
'skipped_content': 0,
'snapshot': 1
} == stats
expected_contents = map(hash_to_bytes, _expected_new_contents_first_visit)
assert list(loader.storage.content_missing_per_sha1(expected_contents)) \
== []
expected_dirs = map(hash_to_bytes, _expected_new_directories_first_visit)
assert list(loader.storage.directory_missing(expected_dirs)) == []
expected_revs = map(hash_to_bytes, _expected_new_revisions_first_visit)
assert list(loader.storage.revision_missing(expected_revs)) == []
expected_snapshot = {
'id': _expected_new_snapshot_first_visit_id,
'branches': _expected_branches_first_visit,
}
check_snapshot(expected_snapshot, loader.storage)
-def test_2_visits_without_change(swh_config, local_get):
+def test_2_visits_without_change(swh_config, requests_mock_datadir):
"""With no prior visit, load a gnu project ends up with 1 snapshot
"""
assert 'SWH_CONFIG_FILENAME' in os.environ # cf. tox.ini
url = 'https://ftp.gnu.org/gnu/8sync/'
tarballs = [{
'time': 944729610,
'archive': 'https://ftp.gnu.org/gnu/8sync/8sync-0.1.0.tar.gz',
'length': 221837,
}]
loader = GNULoader(url, tarballs)
actual_load_status = loader.load()
assert actual_load_status['status'] == 'eventful'
origin_visit = list(loader.storage.origin_visit_get(url))[-1]
assert origin_visit['status'] == 'full'
actual_load_status2 = loader.load()
assert actual_load_status2['status'] == 'uneventful'
origin_visit2 = list(loader.storage.origin_visit_get(url))[-1]
assert origin_visit2['status'] == 'full'
urls = [
- m.url for m in local_get.request_history
+ m.url for m in requests_mock_datadir.request_history
if m.url.startswith('https://ftp.gnu.org')
]
assert len(urls) == 1
-def test_2_visits_with_new_artifact(swh_config, local_get):
+def test_2_visits_with_new_artifact(swh_config, requests_mock_datadir):
"""With no prior visit, load a gnu project ends up with 1 snapshot
"""
assert 'SWH_CONFIG_FILENAME' in os.environ # cf. tox.ini
url = 'https://ftp.gnu.org/gnu/8sync/'
tarball1 = {
'time': 944729610,
'archive': 'https://ftp.gnu.org/gnu/8sync/8sync-0.1.0.tar.gz',
'length': 221837,
}
loader = GNULoader(url, [tarball1])
actual_load_status = loader.load()
assert actual_load_status['status'] == 'eventful'
origin_visit = list(loader.storage.origin_visit_get(url))[-1]
assert origin_visit['status'] == 'full'
stats = loader.storage.stat_counters()
assert {
'content': len(_expected_new_contents_first_visit),
'directory': len(_expected_new_directories_first_visit),
'origin': 1,
'origin_visit': 1,
'person': 1,
'release': 0,
'revision': len(_expected_new_revisions_first_visit),
'skipped_content': 0,
'snapshot': 1
} == stats
urls = [
- m.url for m in local_get.request_history
+ m.url for m in requests_mock_datadir.request_history
if m.url.startswith('https://ftp.gnu.org')
]
assert len(urls) == 1
tarball2 = {
'time': 1480991830,
'archive': 'https://ftp.gnu.org/gnu/8sync/8sync-0.2.0.tar.gz',
'length': 238466,
}
loader2 = GNULoader(url, [tarball1, tarball2])
# implementation detail: share the storage in between visits
loader2.storage = loader.storage
stats2 = loader2.storage.stat_counters()
assert stats == stats2 # ensure we share the storage
actual_load_status2 = loader2.load()
assert actual_load_status2['status'] == 'eventful'
stats2 = loader.storage.stat_counters()
assert {
'content': len(_expected_new_contents_first_visit) + 14,
'directory': len(_expected_new_directories_first_visit) + 8,
'origin': 1,
'origin_visit': 1 + 1,
'person': 1,
'release': 0,
'revision': len(_expected_new_revisions_first_visit) + 1,
'skipped_content': 0,
'snapshot': 1 + 1,
} == stats2
origin_visit2 = list(loader.storage.origin_visit_get(url))[-1]
assert origin_visit2['status'] == 'full'
urls = [
- m.url for m in local_get.request_history
+ m.url for m in requests_mock_datadir.request_history
if m.url.startswith('https://ftp.gnu.org')
]
# 1 artifact (2nd time no modification) + 1 new artifact
assert len(urls) == 2
diff --git a/swh/loader/package/tests/test_npm.py b/swh/loader/package/tests/test_npm.py
index 301d67c..bce41b0 100644
--- a/swh/loader/package/tests/test_npm.py
+++ b/swh/loader/package/tests/test_npm.py
@@ -1,525 +1,526 @@
# Copyright (C) 2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import json
import os
from swh.model.hashutil import hash_to_bytes
from swh.loader.package.npm import (
parse_npm_package_author, extract_npm_package_author
)
from swh.loader.package.tests.common import (
check_snapshot, check_metadata_paths
)
from swh.loader.package.npm import NpmLoader
def _parse_author_string_test(author_str, expected_result):
assert parse_npm_package_author(author_str) == expected_result
assert parse_npm_package_author(' %s' % author_str) == expected_result
assert parse_npm_package_author('%s ' % author_str) == expected_result
def test_parse_npm_package_author():
_parse_author_string_test(
'John Doe',
{
'name': 'John Doe'
}
)
_parse_author_string_test(
'<john.doe@foo.bar>',
{
'email': 'john.doe@foo.bar'
}
)
_parse_author_string_test(
'(https://john.doe)',
{
'url': 'https://john.doe'
}
)
_parse_author_string_test(
'John Doe <john.doe@foo.bar>',
{
'name': 'John Doe',
'email': 'john.doe@foo.bar'
}
)
_parse_author_string_test(
'John Doe<john.doe@foo.bar>',
{
'name': 'John Doe',
'email': 'john.doe@foo.bar'
}
)
_parse_author_string_test(
'John Doe (https://john.doe)',
{
'name': 'John Doe',
'url': 'https://john.doe'
}
)
_parse_author_string_test(
'John Doe(https://john.doe)',
{
'name': 'John Doe',
'url': 'https://john.doe'
}
)
_parse_author_string_test(
'<john.doe@foo.bar> (https://john.doe)',
{
'email': 'john.doe@foo.bar',
'url': 'https://john.doe'
}
)
_parse_author_string_test(
'(https://john.doe) <john.doe@foo.bar>',
{
'email': 'john.doe@foo.bar',
'url': 'https://john.doe'
}
)
_parse_author_string_test(
'John Doe <john.doe@foo.bar> (https://john.doe)',
{
'name': 'John Doe',
'email': 'john.doe@foo.bar',
'url': 'https://john.doe'
}
)
_parse_author_string_test(
'John Doe (https://john.doe) <john.doe@foo.bar>',
{
'name': 'John Doe',
'email': 'john.doe@foo.bar',
'url': 'https://john.doe'
}
)
_parse_author_string_test(
'John Doe<john.doe@foo.bar> (https://john.doe)',
{
'name': 'John Doe',
'email': 'john.doe@foo.bar',
'url': 'https://john.doe'
}
)
_parse_author_string_test(
'John Doe<john.doe@foo.bar>(https://john.doe)',
{
'name': 'John Doe',
'email': 'john.doe@foo.bar',
'url': 'https://john.doe'
}
)
_parse_author_string_test('', {})
_parse_author_string_test('<>', {})
_parse_author_string_test(' <>', {})
_parse_author_string_test('<>()', {})
_parse_author_string_test('<> ()', {})
_parse_author_string_test('()', {})
_parse_author_string_test(' ()', {})
_parse_author_string_test(
'John Doe <> ()',
{
'name': 'John Doe'
}
)
_parse_author_string_test(
'John Doe <>',
{
'name': 'John Doe'
}
)
_parse_author_string_test(
'John Doe ()',
{
'name': 'John Doe'
}
)
def test_extract_npm_package_author(datadir):
package_metadata_filepath = os.path.join(
datadir, 'replicate.npmjs.com', 'org_visit1')
with open(package_metadata_filepath) as json_file:
package_metadata = json.load(json_file)
extract_npm_package_author(package_metadata['versions']['0.0.2']) == \
{
'fullname': b'mooz <stillpedant@gmail.com>',
'name': b'mooz',
'email': b'stillpedant@gmail.com'
}
assert (
extract_npm_package_author(package_metadata['versions']['0.0.3']) ==
{
'fullname': b'Masafumi Oyamada <stillpedant@gmail.com>',
'name': b'Masafumi Oyamada',
'email': b'stillpedant@gmail.com'
}
)
package_json = json.loads('''
{
"name": "highlightjs-line-numbers.js",
"version": "2.7.0",
"description": "Highlight.js line numbers plugin.",
"main": "src/highlightjs-line-numbers.js",
"dependencies": {},
"devDependencies": {
"gulp": "^4.0.0",
"gulp-rename": "^1.4.0",
"gulp-replace": "^0.6.1",
"gulp-uglify": "^1.2.0"
},
"repository": {
"type": "git",
"url": "https://github.com/wcoder/highlightjs-line-numbers.js.git"
},
"author": "Yauheni Pakala <evgeniy.pakalo@gmail.com>",
"license": "MIT",
"bugs": {
"url": "https://github.com/wcoder/highlightjs-line-numbers.js/issues"
},
"homepage": "http://wcoder.github.io/highlightjs-line-numbers.js/"
}''') # noqa
assert extract_npm_package_author(package_json) == \
{
'fullname': b'Yauheni Pakala <evgeniy.pakalo@gmail.com>',
'name': b'Yauheni Pakala',
'email': b'evgeniy.pakalo@gmail.com'
}
package_json = json.loads('''
{
"name": "3-way-diff",
"version": "0.0.1",
"description": "3-way diffing of JavaScript objects",
"main": "index.js",
"authors": [
{
"name": "Shawn Walsh",
"url": "https://github.com/shawnpwalsh"
},
{
"name": "Markham F Rollins IV",
"url": "https://github.com/mrollinsiv"
}
],
"keywords": [
"3-way diff",
"3 way diff",
"three-way diff",
"three way diff"
],
"devDependencies": {
"babel-core": "^6.20.0",
"babel-preset-es2015": "^6.18.0",
"mocha": "^3.0.2"
},
"dependencies": {
"lodash": "^4.15.0"
}
}''')
assert extract_npm_package_author(package_json) == \
{
'fullname': b'Shawn Walsh',
'name': b'Shawn Walsh',
'email': None
}
package_json = json.loads('''
{
"name": "yfe-ynpm",
"version": "1.0.0",
"homepage": "http://gitlab.ywwl.com/yfe/yfe-ynpm",
"repository": {
"type": "git",
"url": "git@gitlab.ywwl.com:yfe/yfe-ynpm.git"
},
"author": [
"fengmk2 <fengmk2@gmail.com> (https://fengmk2.com)",
"xufuzi <xufuzi@ywwl.com> (https://7993.org)"
],
"license": "MIT"
}''')
assert extract_npm_package_author(package_json) == \
{
'fullname': b'fengmk2 <fengmk2@gmail.com>',
'name': b'fengmk2',
'email': b'fengmk2@gmail.com'
}
package_json = json.loads('''
{
"name": "umi-plugin-whale",
"version": "0.0.8",
"description": "Internal contract component",
"authors": {
"name": "xiaohuoni",
"email": "448627663@qq.com"
},
"repository": "alitajs/whale",
"devDependencies": {
"np": "^3.0.4",
"umi-tools": "*"
},
"license": "MIT"
}''')
assert extract_npm_package_author(package_json) == \
{
'fullname': b'xiaohuoni <448627663@qq.com>',
'name': b'xiaohuoni',
'email': b'448627663@qq.com'
}
def normalize_hashes(hashes):
if isinstance(hashes, str):
return hash_to_bytes(hashes)
if isinstance(hashes, list):
return [hash_to_bytes(x) for x in hashes]
return {hash_to_bytes(k): hash_to_bytes(v) for k, v in hashes.items()}
_expected_new_contents_first_visit = normalize_hashes([
'4ce3058e16ab3d7e077f65aabf855c34895bf17c',
'858c3ceee84c8311adc808f8cdb30d233ddc9d18',
'0fa33b4f5a4e0496da6843a38ff1af8b61541996',
'85a410f8ef8eb8920f2c384a9555566ad4a2e21b',
'9163ac8025923d5a45aaac482262893955c9b37b',
'692cf623b8dd2c5df2c2998fd95ae4ec99882fb4',
'18c03aac6d3e910efb20039c15d70ab5e0297101',
'41265c42446aac17ca769e67d1704f99e5a1394d',
'783ff33f5882813dca9239452c4a7cadd4dba778',
'b029cfb85107aee4590c2434a3329bfcf36f8fa1',
'112d1900b4c2e3e9351050d1b542c9744f9793f3',
'5439bbc4bd9a996f1a38244e6892b71850bc98fd',
'd83097a2f994b503185adf4e719d154123150159',
'd0939b4898e83090ee55fd9d8a60e312cfadfbaf',
'b3523a26f7147e4af40d9d462adaae6d49eda13e',
'cd065fb435d6fb204a8871bcd623d0d0e673088c',
'2854a40855ad839a54f4b08f5cff0cf52fca4399',
'b8a53bbaac34ebb8c6169d11a4b9f13b05c583fe',
'0f73d56e1cf480bded8a1ecf20ec6fc53c574713',
'0d9882b2dfafdce31f4e77fe307d41a44a74cefe',
'585fc5caab9ead178a327d3660d35851db713df1',
'e8cd41a48d79101977e3036a87aeb1aac730686f',
'5414efaef33cceb9f3c9eb5c4cc1682cd62d14f7',
'9c3cc2763bf9e9e37067d3607302c4776502df98',
'3649a68410e354c83cd4a38b66bd314de4c8f5c9',
'e96ed0c091de1ebdf587104eaf63400d1974a1fe',
'078ca03d2f99e4e6eab16f7b75fbb7afb699c86c',
'38de737da99514de6559ff163c988198bc91367a',
])
_expected_new_directories_first_visit = normalize_hashes([
'3370d20d6f96dc1c9e50f083e2134881db110f4f',
'42753c0c2ab00c4501b552ac4671c68f3cf5aece',
'd7895533ef5edbcffdea3f057d9fef3a1ef845ce',
'80579be563e2ef3e385226fe7a3f079b377f142c',
'3b0ddc6a9e58b4b53c222da4e27b280b6cda591c',
'bcad03ce58ac136f26f000990fc9064e559fe1c0',
'5fc7e82a1bc72e074665c6078c6d3fad2f13d7ca',
'e3cd26beba9b1e02f6762ef54bd9ac80cc5f25fd',
'584b5b4b6cf7f038095e820b99386a9c232de931',
'184c8d6d0d242f2b1792ef9d3bf396a5434b7f7a',
'bb5f4ee143c970367eb409f2e4c1104898048b9d',
'1b95491047add1103db0dfdfa84a9735dcb11e88',
'a00c6de13471a2d66e64aca140ddb21ef5521e62',
'5ce6c1cd5cda2d546db513aaad8c72a44c7771e2',
'c337091e349b6ac10d38a49cdf8c2401ef9bb0f2',
'202fafcd7c0f8230e89d5496ad7f44ab12b807bf',
'775cc516543be86c15c1dc172f49c0d4e6e78235',
'ff3d1ead85a14f891e8b3fa3a89de39db1b8de2e',
])
_expected_new_revisions_first_visit = normalize_hashes({
'd8a1c7474d2956ac598a19f0f27d52f7015f117e':
'42753c0c2ab00c4501b552ac4671c68f3cf5aece',
'5f9eb78af37ffd12949f235e86fac04898f9f72a':
'3370d20d6f96dc1c9e50f083e2134881db110f4f',
'ba019b192bdb94bd0b5bd68b3a5f92b5acc2239a':
'd7895533ef5edbcffdea3f057d9fef3a1ef845ce'}
)
_expected_new_snapshot_first_visit_id = normalize_hashes(
'd0587e1195aed5a8800411a008f2f2d627f18e2d')
_expected_branches_first_visit = {
'HEAD': {
'target': 'releases/0.0.4',
'target_type': 'alias'
},
'releases/0.0.2': {
'target': 'd8a1c7474d2956ac598a19f0f27d52f7015f117e',
'target_type': 'revision'
},
'releases/0.0.3': {
'target': '5f9eb78af37ffd12949f235e86fac04898f9f72a',
'target_type': 'revision'
},
'releases/0.0.4': {
'target': 'ba019b192bdb94bd0b5bd68b3a5f92b5acc2239a',
'target_type': 'revision'
}
}
def package_url(package):
return 'https://www.npmjs.com/package/%s' % package
def package_metadata_url(package):
return 'https://replicate.npmjs.com/%s/' % package
-def test_revision_metadata_structure(swh_config, local_get):
+def test_revision_metadata_structure(swh_config, requests_mock_datadir):
package = 'org'
loader = NpmLoader(package,
package_url(package),
package_metadata_url(package))
actual_load_status = loader.load()
assert actual_load_status['status'] == 'eventful'
expected_revision_id = hash_to_bytes(
'd8a1c7474d2956ac598a19f0f27d52f7015f117e')
revision = list(loader.storage.revision_get([expected_revision_id]))[0]
assert revision is not None
check_metadata_paths(revision['metadata'], paths=[
('intrinsic.tool', str),
('intrinsic.raw', dict),
('extrinsic.provider', str),
('extrinsic.when', str),
('extrinsic.raw', dict),
('original_artifact.filename', str),
('original_artifact.length', int),
('original_artifact.checksums', dict),
])
-def test_npm_loader_first_visit(swh_config, local_get):
+def test_npm_loader_first_visit(swh_config, requests_mock_datadir):
package = 'org'
loader = NpmLoader(package,
package_url(package),
package_metadata_url(package))
actual_load_status = loader.load()
assert actual_load_status['status'] == 'eventful'
stats = loader.storage.stat_counters()
assert {
'content': len(_expected_new_contents_first_visit),
'directory': len(_expected_new_directories_first_visit),
'origin': 1,
'origin_visit': 1,
'person': 2,
'release': 0,
'revision': len(_expected_new_revisions_first_visit),
'skipped_content': 0,
'snapshot': 1,
} == stats
assert len(list(loader.storage.content_get(
_expected_new_contents_first_visit))) == len(
_expected_new_contents_first_visit)
assert list(loader.storage.directory_missing(
_expected_new_directories_first_visit)) == []
assert list(loader.storage.revision_missing(
_expected_new_revisions_first_visit)) == []
expected_snapshot = {
'id': _expected_new_snapshot_first_visit_id,
'branches': _expected_branches_first_visit,
}
check_snapshot(expected_snapshot, loader.storage)
-def test_npm_loader_incremental_visit(swh_config, local_get_visits):
+def test_npm_loader_incremental_visit(
+ swh_config, requests_mock_datadir_visits):
package = 'org'
url = package_url(package)
metadata_url = package_metadata_url(package)
loader = NpmLoader(package, url, metadata_url)
actual_load_status = loader.load()
assert actual_load_status['status'] == 'eventful'
origin_visit = list(loader.storage.origin_visit_get(url))[-1]
assert origin_visit['status'] == 'full'
stats = loader.storage.stat_counters()
assert {
'content': len(_expected_new_contents_first_visit),
'directory': len(_expected_new_directories_first_visit),
'origin': 1,
'origin_visit': 1,
'person': 2,
'release': 0,
'revision': len(_expected_new_revisions_first_visit),
'skipped_content': 0,
'snapshot': 1,
} == stats
loader._info = None # reset loader internal state
actual_load_status2 = loader.load()
assert actual_load_status2['status'] == 'eventful'
origin_visit2 = list(loader.storage.origin_visit_get(url))[-1]
assert origin_visit2['status'] == 'full'
stats = loader.storage.stat_counters()
assert { # 3 new releases artifacts
'content': len(_expected_new_contents_first_visit) + 14,
'directory': len(_expected_new_directories_first_visit) + 15,
'origin': 1,
'origin_visit': 2,
'person': 2,
'release': 0,
'revision': len(_expected_new_revisions_first_visit) + 3,
'skipped_content': 0,
'snapshot': 2,
} == stats
urls = [
- m.url for m in local_get_visits.request_history
+ m.url for m in requests_mock_datadir_visits.request_history
if m.url.startswith('https://registry.npmjs.org')
]
assert len(urls) == len(set(urls)) # we visited each artifact once across
diff --git a/swh/loader/package/tests/test_pypi.py b/swh/loader/package/tests/test_pypi.py
index 014f301..9cac2dc 100644
--- a/swh/loader/package/tests/test_pypi.py
+++ b/swh/loader/package/tests/test_pypi.py
@@ -1,652 +1,653 @@
# Copyright (C) 2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import os
from os import path
import pytest
from unittest.mock import patch
from swh.core.tarball import uncompress
+from swh.core.pytest_plugin import requests_mock_datadir_factory
from swh.model.hashutil import hash_to_bytes
+
from swh.loader.package.pypi import (
PyPILoader, pypi_api_url, author, extract_intrinsic_metadata
)
from swh.loader.package.tests.common import (
check_snapshot, check_metadata_paths
)
-from swh.loader.package.tests.conftest import local_get_factory
-
def test_author_basic():
data = {
'author': "i-am-groot",
'author_email': 'iam@groot.org',
}
actual_author = author(data)
expected_author = {
'fullname': b'i-am-groot <iam@groot.org>',
'name': b'i-am-groot',
'email': b'iam@groot.org',
}
assert actual_author == expected_author
def test_author_empty_email():
data = {
'author': 'i-am-groot',
'author_email': '',
}
actual_author = author(data)
expected_author = {
'fullname': b'i-am-groot',
'name': b'i-am-groot',
'email': b'',
}
assert actual_author == expected_author
def test_author_empty_name():
data = {
'author': "",
'author_email': 'iam@groot.org',
}
actual_author = author(data)
expected_author = {
'fullname': b' <iam@groot.org>',
'name': b'',
'email': b'iam@groot.org',
}
assert actual_author == expected_author
def test_author_malformed():
data = {
'author': "['pierre', 'paul', 'jacques']",
'author_email': None,
}
actual_author = author(data)
expected_author = {
'fullname': b"['pierre', 'paul', 'jacques']",
'name': b"['pierre', 'paul', 'jacques']",
'email': None,
}
assert actual_author == expected_author
def test_author_malformed_2():
data = {
'author': '[marie, jeanne]',
'author_email': '[marie@some, jeanne@thing]',
}
actual_author = author(data)
expected_author = {
'fullname': b'[marie, jeanne] <[marie@some, jeanne@thing]>',
'name': b'[marie, jeanne]',
'email': b'[marie@some, jeanne@thing]',
}
assert actual_author == expected_author
def test_author_malformed_3():
data = {
'author': '[marie, jeanne, pierre]',
'author_email': '[marie@somewhere.org, jeanne@somewhere.org]',
}
actual_author = author(data)
expected_author = {
'fullname': b'[marie, jeanne, pierre] <[marie@somewhere.org, jeanne@somewhere.org]>', # noqa
'name': b'[marie, jeanne, pierre]',
'email': b'[marie@somewhere.org, jeanne@somewhere.org]',
}
actual_author == expected_author
# configuration error #
def test_badly_configured_loader_raise(monkeypatch):
"""Badly configured loader should raise"""
monkeypatch.delenv('SWH_CONFIG_FILENAME', raising=False)
with pytest.raises(ValueError) as e:
PyPILoader(url='some-url')
assert 'Misconfiguration' in e.value.args[0]
def test_pypi_api_url():
"""Compute pypi api url from the pypi project url should be ok"""
url = pypi_api_url('https://pypi.org/project/requests')
assert url == 'https://pypi.org/pypi/requests/json'
@pytest.mark.fs
def test_extract_intrinsic_metadata(tmp_path, datadir):
"""Parsing existing archive's PKG-INFO should yield results"""
uncompressed_archive_path = str(tmp_path)
archive_path = path.join(
datadir, 'files.pythonhosted.org', '0805nexter-1.1.0.zip')
uncompress(archive_path, dest=uncompressed_archive_path)
actual_metadata = extract_intrinsic_metadata(uncompressed_archive_path)
expected_metadata = {
'metadata_version': '1.0',
'name': '0805nexter',
'version': '1.1.0',
'summary': 'a simple printer of nested lest',
'home_page': 'http://www.hp.com',
'author': 'hgtkpython',
'author_email': '2868989685@qq.com',
'platforms': ['UNKNOWN'],
}
assert actual_metadata == expected_metadata
@pytest.mark.fs
def test_extract_intrinsic_metadata_failures(tmp_path):
"""Parsing inexistant path/archive/PKG-INFO yield None"""
# inexistant first level path
assert extract_intrinsic_metadata('/something-inexistant') == {}
# inexistant second level path (as expected by pypi archives)
assert extract_intrinsic_metadata(tmp_path) == {}
# inexistant PKG-INFO within second level path
existing_path_no_pkginfo = str(tmp_path / 'something')
os.mkdir(existing_path_no_pkginfo)
assert extract_intrinsic_metadata(tmp_path) == {}
# LOADER SCENARIO #
# "edge" cases (for the same origin) #
# no release artifact:
# {visit full, status: uneventful, no contents, etc...}
-local_get_missing_all = local_get_factory(ignore_urls=[
+requests_mock_datadir_missing_all = requests_mock_datadir_factory(ignore_urls=[
'https://files.pythonhosted.org/packages/ec/65/c0116953c9a3f47de89e71964d6c7b0c783b01f29fa3390584dbf3046b4d/0805nexter-1.1.0.zip', # noqa
'https://files.pythonhosted.org/packages/c4/a0/4562cda161dc4ecbbe9e2a11eb365400c0461845c5be70d73869786809c4/0805nexter-1.2.0.zip', # noqa
])
-def test_no_release_artifact(swh_config, local_get_missing_all):
+def test_no_release_artifact(swh_config, requests_mock_datadir_missing_all):
"""Load a pypi project with all artifacts missing ends up with no snapshot
"""
url = 'https://pypi.org/project/0805nexter'
loader = PyPILoader(url)
actual_load_status = loader.load()
assert actual_load_status['status'] == 'uneventful'
stats = loader.storage.stat_counters()
assert {
'content': 0,
'directory': 0,
'origin': 1,
'origin_visit': 1,
'person': 0,
'release': 0,
'revision': 0,
'skipped_content': 0,
'snapshot': 1,
} == stats
origin_visit = next(loader.storage.origin_visit_get(url))
assert origin_visit['status'] == 'partial'
# problem during loading:
# {visit: partial, status: uneventful, no snapshot}
def test_release_with_traceback(swh_config):
url = 'https://pypi.org/project/0805nexter'
with patch('swh.loader.package.pypi.PyPILoader.get_default_release',
side_effect=ValueError('Problem')):
loader = PyPILoader(url)
actual_load_status = loader.load()
assert actual_load_status['status'] == 'failed'
stats = loader.storage.stat_counters()
assert {
'content': 0,
'directory': 0,
'origin': 1,
'origin_visit': 1,
'person': 0,
'release': 0,
'revision': 0,
'skipped_content': 0,
'snapshot': 0,
} == stats
origin_visit = next(loader.storage.origin_visit_get(url))
assert origin_visit['status'] == 'partial'
# problem during loading: failure early enough in between swh contents...
# some contents (contents, directories, etc...) have been written in storage
# {visit: partial, status: eventful, no snapshot}
# problem during loading: failure late enough we can have snapshots (some
# revisions are written in storage already)
# {visit: partial, status: eventful, snapshot}
# "normal" cases (for the same origin) #
-local_get_missing_one = local_get_factory(ignore_urls=[
+requests_mock_datadir_missing_one = requests_mock_datadir_factory(ignore_urls=[
'https://files.pythonhosted.org/packages/ec/65/c0116953c9a3f47de89e71964d6c7b0c783b01f29fa3390584dbf3046b4d/0805nexter-1.1.0.zip', # noqa
])
# some missing release artifacts:
# {visit partial, status: eventful, 1 snapshot}
-def test_revision_metadata_structure(swh_config, local_get):
+def test_revision_metadata_structure(swh_config, requests_mock_datadir):
url = 'https://pypi.org/project/0805nexter'
loader = PyPILoader(url)
actual_load_status = loader.load()
assert actual_load_status['status'] == 'eventful'
expected_revision_id = hash_to_bytes(
'e445da4da22b31bfebb6ffc4383dbf839a074d21')
revision = list(loader.storage.revision_get([expected_revision_id]))[0]
assert revision is not None
check_metadata_paths(revision['metadata'], paths=[
('intrinsic.tool', str),
('intrinsic.raw', dict),
('extrinsic.provider', str),
('extrinsic.when', str),
('extrinsic.raw', dict),
('original_artifact.filename', str),
('original_artifact.length', int),
('original_artifact.checksums', dict),
])
-def test_visit_with_missing_artifact(swh_config, local_get_missing_one):
+def test_visit_with_missing_artifact(
+ swh_config, requests_mock_datadir_missing_one):
"""Load a pypi project with some missing artifacts ends up with 1 snapshot
"""
url = 'https://pypi.org/project/0805nexter'
loader = PyPILoader(url)
actual_load_status = loader.load()
assert actual_load_status['status'] == 'eventful'
stats = loader.storage.stat_counters()
assert {
'content': 3,
'directory': 2,
'origin': 1,
'origin_visit': 1,
'person': 1,
'release': 0,
'revision': 1,
'skipped_content': 0,
'snapshot': 1
} == stats
expected_contents = map(hash_to_bytes, [
'405859113963cb7a797642b45f171d6360425d16',
'e5686aa568fdb1d19d7f1329267082fe40482d31',
'83ecf6ec1114fd260ca7a833a2d165e71258c338',
])
assert list(loader.storage.content_missing_per_sha1(expected_contents))\
== []
expected_dirs = map(hash_to_bytes, [
'b178b66bd22383d5f16f4f5c923d39ca798861b4',
'c3a58f8b57433a4b56caaa5033ae2e0931405338',
])
assert list(loader.storage.directory_missing(expected_dirs)) == []
# {revision hash: directory hash}
expected_revs = {
hash_to_bytes('e445da4da22b31bfebb6ffc4383dbf839a074d21'): hash_to_bytes('b178b66bd22383d5f16f4f5c923d39ca798861b4'), # noqa
}
assert list(loader.storage.revision_missing(expected_revs)) == []
expected_branches = {
'releases/1.2.0': {
'target': 'e445da4da22b31bfebb6ffc4383dbf839a074d21',
'target_type': 'revision',
},
'HEAD': {
'target': 'releases/1.2.0',
'target_type': 'alias',
},
}
expected_snapshot = {
'id': 'dd0e4201a232b1c104433741dbf45895b8ac9355',
'branches': expected_branches,
}
check_snapshot(expected_snapshot, storage=loader.storage)
origin_visit = next(loader.storage.origin_visit_get(url))
assert origin_visit['status'] == 'partial'
-def test_visit_with_1_release_artifact(swh_config, local_get):
+def test_visit_with_1_release_artifact(swh_config, requests_mock_datadir):
"""With no prior visit, load a pypi project ends up with 1 snapshot
"""
url = 'https://pypi.org/project/0805nexter'
loader = PyPILoader(url)
actual_load_status = loader.load()
assert actual_load_status['status'] == 'eventful'
stats = loader.storage.stat_counters()
assert {
'content': 6,
'directory': 4,
'origin': 1,
'origin_visit': 1,
'person': 1,
'release': 0,
'revision': 2,
'skipped_content': 0,
'snapshot': 1
} == stats
expected_contents = map(hash_to_bytes, [
'a61e24cdfdab3bb7817f6be85d37a3e666b34566',
'938c33483285fd8ad57f15497f538320df82aeb8',
'a27576d60e08c94a05006d2e6d540c0fdb5f38c8',
'405859113963cb7a797642b45f171d6360425d16',
'e5686aa568fdb1d19d7f1329267082fe40482d31',
'83ecf6ec1114fd260ca7a833a2d165e71258c338',
])
assert list(loader.storage.content_missing_per_sha1(expected_contents))\
== []
expected_dirs = map(hash_to_bytes, [
'05219ba38bc542d4345d5638af1ed56c7d43ca7d',
'cf019eb456cf6f78d8c4674596f1c9a97ece8f44',
'b178b66bd22383d5f16f4f5c923d39ca798861b4',
'c3a58f8b57433a4b56caaa5033ae2e0931405338',
])
assert list(loader.storage.directory_missing(expected_dirs)) == []
# {revision hash: directory hash}
expected_revs = {
hash_to_bytes('4c99891f93b81450385777235a37b5e966dd1571'): hash_to_bytes('05219ba38bc542d4345d5638af1ed56c7d43ca7d'), # noqa
hash_to_bytes('e445da4da22b31bfebb6ffc4383dbf839a074d21'): hash_to_bytes('b178b66bd22383d5f16f4f5c923d39ca798861b4'), # noqa
}
assert list(loader.storage.revision_missing(expected_revs)) == []
expected_branches = {
'releases/1.1.0': {
'target': '4c99891f93b81450385777235a37b5e966dd1571',
'target_type': 'revision',
},
'releases/1.2.0': {
'target': 'e445da4da22b31bfebb6ffc4383dbf839a074d21',
'target_type': 'revision',
},
'HEAD': {
'target': 'releases/1.2.0',
'target_type': 'alias',
},
}
expected_snapshot = {
'id': 'ba6e158ada75d0b3cfb209ffdf6daa4ed34a227a',
'branches': expected_branches,
}
check_snapshot(expected_snapshot, loader.storage)
origin_visit = next(loader.storage.origin_visit_get(url))
assert origin_visit['status'] == 'full'
-def test_multiple_visits_with_no_change(swh_config, local_get):
+def test_multiple_visits_with_no_change(swh_config, requests_mock_datadir):
"""Multiple visits with no changes results in 1 same snapshot
"""
url = 'https://pypi.org/project/0805nexter'
loader = PyPILoader(url)
actual_load_status = loader.load()
assert actual_load_status['status'] == 'eventful'
stats = loader.storage.stat_counters()
assert {
'content': 6,
'directory': 4,
'origin': 1,
'origin_visit': 1,
'person': 1,
'release': 0,
'revision': 2,
'skipped_content': 0,
'snapshot': 1
} == stats
expected_branches = {
'releases/1.1.0': {
'target': '4c99891f93b81450385777235a37b5e966dd1571',
'target_type': 'revision',
},
'releases/1.2.0': {
'target': 'e445da4da22b31bfebb6ffc4383dbf839a074d21',
'target_type': 'revision',
},
'HEAD': {
'target': 'releases/1.2.0',
'target_type': 'alias',
},
}
snapshot_id = 'ba6e158ada75d0b3cfb209ffdf6daa4ed34a227a'
expected_snapshot = {
'id': snapshot_id,
'branches': expected_branches,
}
check_snapshot(expected_snapshot, loader.storage)
origin_visit = next(loader.storage.origin_visit_get(url))
assert origin_visit['status'] == 'full'
actual_load_status2 = loader.load()
assert actual_load_status2['status'] == 'uneventful'
stats2 = loader.storage.stat_counters()
expected_stats2 = stats.copy()
expected_stats2['origin_visit'] = 1 + 1
assert expected_stats2 == stats2
# same snapshot
actual_snapshot_id = origin_visit['snapshot']['id']
assert actual_snapshot_id == hash_to_bytes(snapshot_id)
-def test_incremental_visit(swh_config, local_get_visits):
+def test_incremental_visit(swh_config, requests_mock_datadir_visits):
"""With prior visit, 2nd load will result with a different snapshot
"""
url = 'https://pypi.org/project/0805nexter'
loader = PyPILoader(url)
visit1_actual_load_status = loader.load()
visit1_stats = loader.storage.stat_counters()
assert visit1_actual_load_status['status'] == 'eventful'
origin_visit1 = next(loader.storage.origin_visit_get(url))
assert origin_visit1['status'] == 'full'
assert {
'content': 6,
'directory': 4,
'origin': 1,
'origin_visit': 1,
'person': 1,
'release': 0,
'revision': 2,
'skipped_content': 0,
'snapshot': 1
} == visit1_stats
# Reset internal state
loader._info = None
visit2_actual_load_status = loader.load()
visit2_stats = loader.storage.stat_counters()
assert visit2_actual_load_status['status'] == 'eventful'
visits = list(loader.storage.origin_visit_get(url))
assert len(visits) == 2
assert visits[1]['status'] == 'full'
assert {
'content': 6 + 1, # 1 more content
'directory': 4 + 2, # 2 more directories
'origin': 1,
'origin_visit': 1 + 1,
'person': 1,
'release': 0,
'revision': 2 + 1, # 1 more revision
'skipped_content': 0,
'snapshot': 1 + 1, # 1 more snapshot
} == visit2_stats
expected_contents = map(hash_to_bytes, [
'a61e24cdfdab3bb7817f6be85d37a3e666b34566',
'938c33483285fd8ad57f15497f538320df82aeb8',
'a27576d60e08c94a05006d2e6d540c0fdb5f38c8',
'405859113963cb7a797642b45f171d6360425d16',
'e5686aa568fdb1d19d7f1329267082fe40482d31',
'83ecf6ec1114fd260ca7a833a2d165e71258c338',
'92689fa2b7fb4d4fc6fb195bf73a50c87c030639'
])
assert list(loader.storage.content_missing_per_sha1(expected_contents))\
== []
expected_dirs = map(hash_to_bytes, [
'05219ba38bc542d4345d5638af1ed56c7d43ca7d',
'cf019eb456cf6f78d8c4674596f1c9a97ece8f44',
'b178b66bd22383d5f16f4f5c923d39ca798861b4',
'c3a58f8b57433a4b56caaa5033ae2e0931405338',
'e226e7e4ad03b4fc1403d69a18ebdd6f2edd2b3a',
'52604d46843b898f5a43208045d09fcf8731631b',
])
assert list(loader.storage.directory_missing(expected_dirs)) == []
# {revision hash: directory hash}
expected_revs = {
hash_to_bytes('4c99891f93b81450385777235a37b5e966dd1571'): hash_to_bytes('05219ba38bc542d4345d5638af1ed56c7d43ca7d'), # noqa
hash_to_bytes('e445da4da22b31bfebb6ffc4383dbf839a074d21'): hash_to_bytes('b178b66bd22383d5f16f4f5c923d39ca798861b4'), # noqa
hash_to_bytes('51247143b01445c9348afa9edfae31bf7c5d86b1'): hash_to_bytes('e226e7e4ad03b4fc1403d69a18ebdd6f2edd2b3a'), # noqa
}
assert list(loader.storage.revision_missing(expected_revs)) == []
expected_branches = {
'releases/1.1.0': {
'target': '4c99891f93b81450385777235a37b5e966dd1571',
'target_type': 'revision',
},
'releases/1.2.0': {
'target': 'e445da4da22b31bfebb6ffc4383dbf839a074d21',
'target_type': 'revision',
},
'releases/1.3.0': {
'target': '51247143b01445c9348afa9edfae31bf7c5d86b1',
'target_type': 'revision',
},
'HEAD': {
'target': 'releases/1.3.0',
'target_type': 'alias',
},
}
expected_snapshot = {
'id': '2e5149a7b0725d18231a37b342e9b7c4e121f283',
'branches': expected_branches,
}
check_snapshot(expected_snapshot, loader.storage)
origin_visit = list(loader.storage.origin_visit_get(url))[-1]
assert origin_visit['status'] == 'full'
urls = [
- m.url for m in local_get_visits.request_history
+ m.url for m in requests_mock_datadir_visits.request_history
if m.url.startswith('https://files.pythonhosted.org')
]
# visited each artifact once across 2 visits
assert len(urls) == len(set(urls))
# release artifact, no new artifact
# {visit full, status uneventful, same snapshot as before}
# release artifact, old artifact with different checksums
# {visit full, status full, new snapshot with shared history and some new
# different history}
# release with multiple sdist artifacts per pypi "version"
# snapshot branch output is different
-def test_visit_1_release_with_2_artifacts(swh_config, local_get):
+def test_visit_1_release_with_2_artifacts(swh_config, requests_mock_datadir):
"""With no prior visit, load a pypi project ends up with 1 snapshot
"""
url = 'https://pypi.org/project/nexter'
loader = PyPILoader(url)
actual_load_status = loader.load()
assert actual_load_status['status'] == 'eventful'
expected_branches = {
'releases/1.1.0/nexter-1.1.0.zip': {
'target': '4c99891f93b81450385777235a37b5e966dd1571',
'target_type': 'revision',
},
'releases/1.1.0/nexter-1.1.0.tar.gz': {
'target': '0bf88f5760cca7665d0af4d6575d9301134fe11a',
'target_type': 'revision',
},
}
expected_snapshot = {
'id': 'a27e638a4dad6fbfa273c6ebec1c4bf320fb84c6',
'branches': expected_branches,
}
check_snapshot(expected_snapshot, loader.storage)
origin_visit = next(loader.storage.origin_visit_get(url))
assert origin_visit['status'] == 'full'
diff --git a/tox.ini b/tox.ini
index 0fb07c6..b5c9956 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,16 +1,17 @@
[tox]
envlist=flake8,py3
[testenv:py3]
deps =
.[testing]
pytest-cov
commands =
- pytest --cov=swh --cov-branch {posargs}
+ pytest --cov={envsitepackagesdir}/swh/loader/ --cov-branch \
+ {envsitepackagesdir}/swh/loader/ {posargs}
[testenv:flake8]
skip_install = true
deps =
flake8
commands =
{envpython} -m flake8

File Metadata

Mime Type
text/x-diff
Expires
Mon, Aug 25, 6:03 PM (3 d, 1 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3247444

Event Timeline