Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9749625
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
68 KB
Subscribers
None
View Options
diff --git a/MANIFEST.in b/MANIFEST.in
index f601d81..4c91126 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,7 +1,7 @@
include Makefile
include requirements.txt
include requirements-swh.txt
include version.txt
include README.md
recursive-include swh/loader/package/tests/ *.tar.gz
-recursive-include swh/loader/package/tests/resources/ *
+recursive-include swh/loader/package/tests/data/ *
diff --git a/requirements-swh.txt b/requirements-swh.txt
index f18315a..84de3de 100644
--- a/requirements-swh.txt
+++ b/requirements-swh.txt
@@ -1,3 +1,4 @@
+swh.core >= 0.0.72
swh.model >= 0.0.18
swh.storage >= 0.0.152
swh.deposit
diff --git a/requirements-test.txt b/requirements-test.txt
index 5821059..5847ac3 100644
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -1,2 +1,3 @@
pytest
requests_mock
+swh-core[testing]
diff --git a/swh/loader/package/tests/conftest.py b/swh/loader/package/tests/conftest.py
index d3b2b65..f6e787f 100644
--- a/swh/loader/package/tests/conftest.py
+++ b/swh/loader/package/tests/conftest.py
@@ -1,133 +1,15 @@
# Copyright (C) 2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-import logging
-import os
-import re
import pytest
-from functools import partial
from os import path
-from urllib.parse import urlparse
-
-
-logger = logging.getLogger(__name__)
-
-
-# Check get_local_factory function
-# Maximum number of iteration checks to generate requests responses
-MAX_VISIT_FILES = 10
@pytest.fixture
def swh_config(monkeypatch, datadir):
- conffile = os.path.join(datadir, 'loader.yml')
+ conffile = path.join(datadir, 'loader.yml')
monkeypatch.setenv('SWH_CONFIG_FILENAME', conffile)
return conffile
-
-
-def get_response_cb(request, context, datadir, ignore_urls=[], visits=None):
- """Mount point callback to fetch on disk the content of a request
-
- This is meant to be used as 'body' argument of the requests_mock.get()
- method.
-
- It will look for files on the local filesystem based on the requested URL,
- using the following rules:
-
- - files are searched in the datadir/<hostname> directory
-
- - the local file name is the path part of the URL with path hierarchy
- markers (aka '/') replaced by '_'
-
- Eg. if you use the requests_mock fixture in your test file as:
-
- requests_mock.get('https://nowhere.com', body=get_response_cb)
- # or even
- requests_mock.get(re.compile('https://'), body=get_response_cb)
-
- then a call requests.get like:
-
- requests.get('https://nowhere.com/path/to/resource')
-
- will look the content of the response in:
-
- datadir/nowhere.com/path_to_resource
-
- Args:
- request (requests.Request): Object requests
- context (requests.Context): Object holding response metadata
- information (status_code, headers, etc...)
- ignore_urls (List): urls whose status response should be 404 even if
- the local file exists
- visits (Optional[Dict]): Map of url, number of visits. If None, disable
- multi visit support (default)
-
- Returns:
- Optional[FileDescriptor] on the on disk file to read from the test
- context
-
- """
- logger.debug('get_response_cb(%s, %s)', request, context)
- logger.debug('url: %s', request.url)
- logger.debug('ignore_urls: %s', ignore_urls)
- if request.url in ignore_urls:
- context.status_code = 404
- return None
- url = urlparse(request.url)
- dirname = url.hostname # pypi.org | files.pythonhosted.org
- # url.path: pypi/<project>/json -> local file: pypi_<project>_json
- filename = url.path[1:]
- if filename.endswith('/'):
- filename = filename[:-1]
- filename = filename.replace('/', '_')
- filepath = path.join(datadir, dirname, filename)
- if visits is not None:
- visit = visits.get(url, 0)
- visits[url] = visit + 1
- if visit:
- filepath = filepath + '_visit%s' % visit
-
- if not path.isfile(filepath):
- logger.debug('not found filepath: %s', filepath)
- context.status_code = 404
- return None
- fd = open(filepath, 'rb')
- context.headers['content-length'] = str(path.getsize(filepath))
- return fd
-
-
-@pytest.fixture
-def datadir(request):
- """By default, returns the test directory
-
- """
- return path.join(path.dirname(request.fspath), 'data')
-
-
-def local_get_factory(ignore_urls=[],
- has_multi_visit=False):
- @pytest.fixture
- def local_get(requests_mock, datadir):
- if not has_multi_visit:
- cb = partial(get_response_cb,
- ignore_urls=ignore_urls,
- datadir=datadir)
- requests_mock.get(re.compile('https://'), body=cb)
- else:
- visits = {}
- requests_mock.get(re.compile('https://'), body=partial(
- get_response_cb, ignore_urls=ignore_urls, visits=visits,
- datadir=datadir)
- )
-
- return requests_mock
-
- return local_get
-
-
-local_get = local_get_factory([])
-
-local_get_visits = local_get_factory(has_multi_visit=True)
diff --git a/swh/loader/package/tests/fixture/__init__.py b/swh/loader/package/tests/fixture/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/swh/loader/package/tests/fixture/conftest.py b/swh/loader/package/tests/fixture/conftest.py
deleted file mode 100644
index 399adac..0000000
--- a/swh/loader/package/tests/fixture/conftest.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# Copyright (C) 2019 The Software Heritage developers
-# See the AUTHORS file at the top-level directory of this distribution
-# License: GNU General Public License version 3, or any later version
-# See top-level LICENSE file for more information
-
-import pytest
-
-from os import path
-
-
-DATADIR = path.join(path.abspath(path.dirname(__file__)), 'data')
-
-
-@pytest.fixture
-def datadir():
- return DATADIR
diff --git a/swh/loader/package/tests/fixture/data/example.com/file.json b/swh/loader/package/tests/fixture/data/example.com/file.json
deleted file mode 100644
index 000a8dd..0000000
--- a/swh/loader/package/tests/fixture/data/example.com/file.json
+++ /dev/null
@@ -1,3 +0,0 @@
-{
- "welcome": "you"
-}
diff --git a/swh/loader/package/tests/fixture/test_conftest.py b/swh/loader/package/tests/fixture/test_conftest.py
deleted file mode 100644
index 3cdcbba..0000000
--- a/swh/loader/package/tests/fixture/test_conftest.py
+++ /dev/null
@@ -1,24 +0,0 @@
-# Copyright (C) 2019 The Software Heritage developers
-# See the AUTHORS file at the top-level directory of this distribution
-# License: GNU General Public License version 3, or any later version
-# See top-level LICENSE file for more information
-
-import requests
-
-from .conftest import DATADIR
-
-# In this arborescence, we override in the local conftest.py module the
-# "datadir" fixture to specify where to retrieve the data files from.
-
-
-def test_local_get_with_datadir_fixture_override(local_get):
- """Override datadir fixture should retrieve data from elsewhere
-
- """
- response = requests.get('https://example.com/file.json')
- assert response.ok
- assert response.json() == {'welcome': 'you'}
-
-
-def test_data_dir_override(datadir):
- assert datadir == DATADIR
diff --git a/swh/loader/package/tests/test_conftest.py b/swh/loader/package/tests/test_conftest.py
deleted file mode 100644
index 8683ee3..0000000
--- a/swh/loader/package/tests/test_conftest.py
+++ /dev/null
@@ -1,89 +0,0 @@
-# Copyright (C) 2019 The Software Heritage developers
-# See the AUTHORS file at the top-level directory of this distribution
-# License: GNU General Public License version 3, or any later version
-# See top-level LICENSE file for more information
-
-import requests
-
-from os import path
-
-from swh.loader.package.tests.conftest import local_get_factory
-
-
-def test_get_response_cb_with_visits_nominal(local_get_visits):
- response = requests.get('https://example.com/file.json')
- assert response.ok
- assert response.json() == {'hello': 'you'}
-
- response = requests.get('https://example.com/file.json')
- assert response.ok
- assert response.json() == {'hello': 'world'}
-
- response = requests.get('https://example.com/file.json')
- assert not response.ok
- assert response.status_code == 404
-
-
-def test_get_response_cb_with_visits(local_get_visits):
- response = requests.get('https://example.com/file.json')
- assert response.ok
- assert response.json() == {'hello': 'you'}
-
- response = requests.get('https://example.com/other.json')
- assert response.ok
- assert response.json() == "foobar"
-
- response = requests.get('https://example.com/file.json')
- assert response.ok
- assert response.json() == {'hello': 'world'}
-
- response = requests.get('https://example.com/other.json')
- assert not response.ok
- assert response.status_code == 404
-
- response = requests.get('https://example.com/file.json')
- assert not response.ok
- assert response.status_code == 404
-
-
-def test_get_response_cb_no_visit(local_get):
- response = requests.get('https://example.com/file.json')
- assert response.ok
- assert response.json() == {'hello': 'you'}
-
- response = requests.get('https://example.com/file.json')
- assert response.ok
- assert response.json() == {'hello': 'you'}
-
-
-local_get_ignore = local_get_factory(
- ignore_urls=['https://example.com/file.json'],
- has_multi_visit=False,
-)
-
-
-def test_get_response_cb_ignore_url(local_get_ignore):
- response = requests.get('https://example.com/file.json')
- assert not response.ok
- assert response.status_code == 404
-
-
-local_get_ignore_and_visit = local_get_factory(
- ignore_urls=['https://example.com/file.json'],
- has_multi_visit=True,
-)
-
-
-def test_get_response_cb_ignore_url_with_visit(local_get_ignore_and_visit):
- response = requests.get('https://example.com/file.json')
- assert not response.ok
- assert response.status_code == 404
-
- response = requests.get('https://example.com/file.json')
- assert not response.ok
- assert response.status_code == 404
-
-
-def test_data_dir(datadir):
- expected_datadir = path.join(path.abspath(path.dirname(__file__)), 'data')
- assert datadir == expected_datadir
diff --git a/swh/loader/package/tests/test_deposit.py b/swh/loader/package/tests/test_deposit.py
index 9b6c222..8cc5723 100644
--- a/swh/loader/package/tests/test_deposit.py
+++ b/swh/loader/package/tests/test_deposit.py
@@ -1,199 +1,199 @@
# Copyright (C) 2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import re
from swh.model.hashutil import hash_to_bytes
from swh.loader.package.deposit import DepositLoader
from swh.loader.package.tests.common import (
check_snapshot, check_metadata_paths
)
-from swh.loader.package.tests.conftest import local_get_factory
+from swh.core.pytest_plugin import requests_mock_datadir_factory
def test_deposit_init_ok(swh_config):
url = 'some-url'
deposit_id = 999
loader = DepositLoader(url, deposit_id) # Something that does not exist
assert loader.url == url
assert loader.archive_url == '/%s/raw/' % deposit_id
assert loader.metadata_url == '/%s/meta/' % deposit_id
assert loader.deposit_update_url == '/%s/update/' % deposit_id
assert loader.client is not None
def test_deposit_loading_failure_to_fetch_metadata(swh_config):
"""Error during fetching artifact ends us with failed/partial visit
"""
# private api url form: 'https://deposit.s.o/1/private/hal/666/raw/'
url = 'some-url'
unknown_deposit_id = 666
loader = DepositLoader(url, unknown_deposit_id) # does not exist
actual_load_status = loader.load()
assert actual_load_status['status'] == 'failed'
stats = loader.storage.stat_counters()
assert {
'content': 0,
'directory': 0,
'origin': 1,
'origin_visit': 1,
'person': 0,
'release': 0,
'revision': 0,
'skipped_content': 0,
'snapshot': 0,
} == stats
origin_visit = next(loader.storage.origin_visit_get(url))
assert origin_visit['status'] == 'partial'
-local_get_missing_one = local_get_factory(ignore_urls=[
+requests_mock_datadir_missing_one = requests_mock_datadir_factory(ignore_urls=[
'https://deposit.softwareheritage.org/1/private/666/raw/',
])
def test_deposit_loading_failure_to_retrieve_1_artifact(
- swh_config, local_get_missing_one):
+ swh_config, requests_mock_datadir_missing_one):
"""Deposit with missing artifact ends up with an uneventful/partial visit
"""
# private api url form: 'https://deposit.s.o/1/private/hal/666/raw/'
url = 'some-url-2'
deposit_id = 666
loader = DepositLoader(url, deposit_id)
assert loader.archive_url
actual_load_status = loader.load()
assert actual_load_status['status'] == 'uneventful'
stats = loader.storage.stat_counters()
assert {
'content': 0,
'directory': 0,
'origin': 1,
'origin_visit': 1,
'person': 0,
'release': 0,
'revision': 0,
'skipped_content': 0,
'snapshot': 1,
} == stats
origin_visit = next(loader.storage.origin_visit_get(url))
assert origin_visit['status'] == 'partial'
-def test_revision_metadata_structure(swh_config, local_get, requests_mock):
+def test_revision_metadata_structure(swh_config, requests_mock_datadir):
# do not care for deposit update query
- requests_mock.put(re.compile('https'))
+ requests_mock_datadir.put(re.compile('https'))
url = 'https://hal-test.archives-ouvertes.fr/some-external-id'
deposit_id = 666
loader = DepositLoader(url, deposit_id)
assert loader.archive_url
actual_load_status = loader.load()
assert actual_load_status['status'] == 'eventful'
expected_revision_id = hash_to_bytes(
'9471c606239bccb1f269564c9ea114e1eeab9eb4')
revision = list(loader.storage.revision_get([expected_revision_id]))[0]
assert revision is not None
check_metadata_paths(revision['metadata'], paths=[
('extrinsic.provider', str),
('extrinsic.when', str),
('extrinsic.raw', dict),
('original_artifact.filename', str),
('original_artifact.length', int),
('original_artifact.checksums', dict),
])
-def test_deposit_loading_ok(swh_config, local_get, requests_mock):
- requests_mock.put(re.compile('https')) # do not care for put
+def test_deposit_loading_ok(swh_config, requests_mock_datadir):
+ requests_mock_datadir.put(re.compile('https')) # do not care for put
url = 'https://hal-test.archives-ouvertes.fr/some-external-id'
deposit_id = 666
loader = DepositLoader(url, deposit_id)
assert loader.archive_url
actual_load_status = loader.load()
assert actual_load_status['status'] == 'eventful'
stats = loader.storage.stat_counters()
assert {
'content': 303,
'directory': 12,
'origin': 1,
'origin_visit': 1,
'person': 1,
'release': 0,
'revision': 1,
'skipped_content': 0,
'snapshot': 1,
} == stats
origin_visit = next(loader.storage.origin_visit_get(url))
assert origin_visit['status'] == 'full'
expected_branches = {
'HEAD': {
'target': '9471c606239bccb1f269564c9ea114e1eeab9eb4',
'target_type': 'revision',
},
}
expected_snapshot = {
'id': '453f455d0efb69586143cd6b6e5897f9906b53a7',
'branches': expected_branches,
}
check_snapshot(expected_snapshot, storage=loader.storage)
# check metadata
tool = {
"name": "swh-deposit",
"version": "0.0.1",
"configuration": {
"sword_version": "2",
}
}
tool = loader.storage.tool_get(tool)
assert tool is not None
assert tool['id'] is not None
provider = {
"provider_name": "hal",
"provider_type": "deposit_client",
"provider_url": "https://hal-test.archives-ouvertes.fr/",
"metadata": None,
}
provider = loader.storage.metadata_provider_get_by(provider)
assert provider is not None
assert provider['id'] is not None
metadata = loader.storage.origin_metadata_get_by(
url, provider_type='deposit_client')
assert metadata is not None
assert isinstance(metadata, list)
assert len(metadata) == 1
metadata0 = metadata[0]
assert metadata0['provider_id'] == provider['id']
assert metadata0['provider_type'] == 'deposit_client'
assert metadata0['tool_id'] == tool['id']
diff --git a/swh/loader/package/tests/test_gnu.py b/swh/loader/package/tests/test_gnu.py
index 7b0fc63..ea70a83 100644
--- a/swh/loader/package/tests/test_gnu.py
+++ b/swh/loader/package/tests/test_gnu.py
@@ -1,348 +1,349 @@
# Copyright (C) 2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import os
import re
from swh.model.hashutil import hash_to_bytes
from swh.loader.package.gnu import GNULoader, get_version
from swh.loader.package.tests.common import (
check_snapshot, check_metadata_paths
)
def test_get_version():
"""From url to branch name should yield something relevant
"""
for url, expected_branchname in [
('https://gnu.org/sthg/info-2.1.0.tar.gz', '2.1.0'),
('https://gnu.org/sthg/info-2.1.2.zip', '2.1.2'),
('https://sthg.org/gnu/sthg.tar.gz', 'sthg'),
('https://sthg.org/gnu/DLDF-1.1.4.tar.gz', '1.1.4'),
('https://sthg.org/gnu/anubis-latest.tar.bz2', 'latest'),
('https://ftp.org/gnu/aris-w32.zip', 'w32'),
('https://ftp.org/gnu/aris-w32-2.2.zip', 'w32-2.2'),
('https://ftp.org/gnu/autogen.info.tar.gz', 'autogen.info'),
('https://ftp.org/gnu/crypto-build-demo.tar.gz',
'crypto-build-demo'),
('https://ftp.org/gnu/clue+clio+xit.clisp.tar.gz',
'clue+clio+xit.clisp'),
('https://ftp.org/gnu/clue+clio.for-pcl.tar.gz',
'clue+clio.for-pcl'),
('https://ftp.org/gnu/clisp-hppa2.0-hp-hpux10.20.tar.gz',
'hppa2.0-hp-hpux10.20'),
('clisp-i386-solaris2.6.tar.gz', 'i386-solaris2.6'),
('clisp-mips-sgi-irix6.5.tar.gz', 'mips-sgi-irix6.5'),
('clisp-powerpc-apple-macos.tar.gz', 'powerpc-apple-macos'),
('clisp-powerpc-unknown-linuxlibc6.tar.gz',
'powerpc-unknown-linuxlibc6'),
('clisp-rs6000-ibm-aix3.2.5.tar.gz', 'rs6000-ibm-aix3.2.5'),
('clisp-sparc-redhat51-linux.tar.gz', 'sparc-redhat51-linux'),
('clisp-sparc-sun-solaris2.4.tar.gz', 'sparc-sun-solaris2.4'),
('clisp-sparc-sun-sunos4.1.3_U1.tar.gz',
'sparc-sun-sunos4.1.3_U1'),
('clisp-2.25.1-powerpc-apple-MacOSX.tar.gz',
'2.25.1-powerpc-apple-MacOSX'),
('clisp-2.27-PowerMacintosh-powerpc-Darwin-1.3.7.tar.gz',
'2.27-PowerMacintosh-powerpc-Darwin-1.3.7'),
('clisp-2.27-i686-unknown-Linux-2.2.19.tar.gz',
'2.27-i686-unknown-Linux-2.2.19'),
('clisp-2.28-i386-i386-freebsd-4.3-RELEASE.tar.gz',
'2.28-i386-i386-freebsd-4.3-RELEASE'),
('clisp-2.28-i686-unknown-cygwin_me-4.90-1.3.10.tar.gz',
'2.28-i686-unknown-cygwin_me-4.90-1.3.10'),
('clisp-2.29-i386-i386-freebsd-4.6-STABLE.tar.gz',
'2.29-i386-i386-freebsd-4.6-STABLE'),
('clisp-2.29-i686-unknown-cygwin_nt-5.0-1.3.12.tar.gz',
'2.29-i686-unknown-cygwin_nt-5.0-1.3.12'),
('gcl-2.5.3-ansi-japi-xdr.20030701_mingw32.zip',
'2.5.3-ansi-japi-xdr.20030701_mingw32'),
('gettext-runtime-0.13.1.bin.woe32.zip', '0.13.1.bin.woe32'),
('sather-logo_images.tar.gz', 'sather-logo_images'),
('sather-specification-000328.html.tar.gz', '000328.html')
]:
actual_branchname = get_version(url)
assert actual_branchname == expected_branchname
_expected_new_contents_first_visit = [
'e9258d81faf5881a2f96a77ba609396f82cb97ad',
'1170cf105b04b7e2822a0e09d2acf71da7b9a130',
'fbd27c3f41f2668624ffc80b7ba5db9b92ff27ac',
'0057bec9b5422aff9256af240b177ac0e3ac2608',
'2b8d0d0b43a1078fc708930c8ddc2956a86c566e',
'27de3b3bc6545d2a797aeeb4657c0e215a0c2e55',
'2e6db43f5cd764e677f416ff0d0c78c7a82ef19b',
'ae9be03bd2a06ed8f4f118d3fe76330bb1d77f62',
'edeb33282b2bffa0e608e9d2fd960fd08093c0ea',
'd64e64d4c73679323f8d4cde2643331ba6c20af9',
'7a756602914be889c0a2d3952c710144b3e64cb0',
'84fb589b554fcb7f32b806951dcf19518d67b08f',
'8624bcdae55baeef00cd11d5dfcfa60f68710a02',
'e08441aeab02704cfbd435d6445f7c072f8f524e',
'f67935bc3a83a67259cda4b2d43373bd56703844',
'809788434b433eb2e3cfabd5d591c9a659d5e3d8',
'7d7c6c8c5ebaeff879f61f37083a3854184f6c41',
'b99fec102eb24bffd53ab61fc30d59e810f116a2',
'7d149b28eaa228b3871c91f0d5a95a2fa7cb0c68',
'f0c97052e567948adf03e641301e9983c478ccff',
'7fb724242e2b62b85ca64190c31dcae5303e19b3',
'4f9709e64a9134fe8aefb36fd827b84d8b617ab5',
'7350628ccf194c2c3afba4ac588c33e3f3ac778d',
'0bb892d9391aa706dc2c3b1906567df43cbe06a2',
'49d4c0ce1a16601f1e265d446b6c5ea6b512f27c',
'6b5cc594ac466351450f7f64a0b79fdaf4435ad3',
'3046e5d1f70297e2a507b98224b6222c9688d610',
'1572607d456d7f633bc6065a2b3048496d679a31',
]
_expected_new_directories_first_visit = [
'daabc65ec75d487b1335ffc101c0ac11c803f8fc',
'263be23b4a8101d3ad0d9831319a3e0f2b065f36',
'7f6e63ba6eb3e2236f65892cd822041f1a01dd5c',
'4db0a3ecbc976083e2dac01a62f93729698429a3',
'dfef1c80e1098dd5deda664bb44a9ab1f738af13',
'eca971d346ea54d95a6e19d5051f900237fafdaa',
'3aebc29ed1fccc4a6f2f2010fb8e57882406b528',
]
_expected_new_revisions_first_visit = {
'44183488c0774ce3c957fa19ba695cf18a4a42b3':
'3aebc29ed1fccc4a6f2f2010fb8e57882406b528'
}
_expected_branches_first_visit = {
'HEAD': {
'target_type': 'alias',
'target': 'releases/0.1.0',
},
'releases/0.1.0': {
'target_type': 'revision',
'target': '44183488c0774ce3c957fa19ba695cf18a4a42b3',
},
}
# hash is different then before as we changed the snapshot
# gnu used to use `release/` (singular) instead of plural
_expected_new_snapshot_first_visit_id = 'c419397fd912039825ebdbea378bc6283f006bf5' # noqa
def test_visit_with_no_artifact_found(swh_config, requests_mock):
package_url = 'https://ftp.gnu.org/gnu/8sync/'
tarballs = [{
'time': '944729610',
'archive': 'https://ftp.gnu.org/gnu/8sync/8sync-0.1.0.tar.gz',
'length': 221837,
}]
loader = GNULoader(package_url, tarballs)
requests_mock.get(re.compile('https://'), status_code=404)
actual_load_status = loader.load()
assert actual_load_status['status'] == 'uneventful'
stats = loader.storage.stat_counters()
assert {
'content': 0,
'directory': 0,
'origin': 1,
'origin_visit': 1,
'person': 0,
'release': 0,
'revision': 0,
'skipped_content': 0,
'snapshot': 1,
} == stats
origin_visit = next(loader.storage.origin_visit_get(package_url))
assert origin_visit['status'] == 'partial'
-def test_check_revision_metadata_structure(swh_config, local_get):
+def test_check_revision_metadata_structure(swh_config, requests_mock_datadir):
package_url = 'https://ftp.gnu.org/gnu/8sync/'
tarballs = [{
'time': '944729610',
'archive': 'https://ftp.gnu.org/gnu/8sync/8sync-0.1.0.tar.gz',
'length': 221837,
}]
loader = GNULoader(package_url, tarballs)
actual_load_status = loader.load()
assert actual_load_status['status'] == 'eventful'
expected_revision_id = hash_to_bytes(
'44183488c0774ce3c957fa19ba695cf18a4a42b3')
revision = list(loader.storage.revision_get([expected_revision_id]))[0]
assert revision is not None
check_metadata_paths(revision['metadata'], paths=[
('intrinsic', dict),
('extrinsic.provider', str),
('extrinsic.when', str),
('extrinsic.raw', dict),
('original_artifact.filename', str),
('original_artifact.length', int),
('original_artifact.checksums', dict),
])
-def test_visit_with_release_artifact_no_prior_visit(swh_config, local_get):
+def test_visit_with_release_artifact_no_prior_visit(
+ swh_config, requests_mock_datadir):
"""With no prior visit, load a gnu project ends up with 1 snapshot
"""
assert 'SWH_CONFIG_FILENAME' in os.environ # cf. tox.ini
package_url = 'https://ftp.gnu.org/gnu/8sync/'
tarballs = [{
'time': 944729610,
'archive': 'https://ftp.gnu.org/gnu/8sync/8sync-0.1.0.tar.gz',
'length': 221837,
}]
loader = GNULoader(package_url, tarballs)
actual_load_status = loader.load()
assert actual_load_status['status'] == 'eventful'
stats = loader.storage.stat_counters()
assert {
'content': len(_expected_new_contents_first_visit),
'directory': len(_expected_new_directories_first_visit),
'origin': 1,
'origin_visit': 1,
'person': 1,
'release': 0,
'revision': len(_expected_new_revisions_first_visit),
'skipped_content': 0,
'snapshot': 1
} == stats
expected_contents = map(hash_to_bytes, _expected_new_contents_first_visit)
assert list(loader.storage.content_missing_per_sha1(expected_contents)) \
== []
expected_dirs = map(hash_to_bytes, _expected_new_directories_first_visit)
assert list(loader.storage.directory_missing(expected_dirs)) == []
expected_revs = map(hash_to_bytes, _expected_new_revisions_first_visit)
assert list(loader.storage.revision_missing(expected_revs)) == []
expected_snapshot = {
'id': _expected_new_snapshot_first_visit_id,
'branches': _expected_branches_first_visit,
}
check_snapshot(expected_snapshot, loader.storage)
-def test_2_visits_without_change(swh_config, local_get):
+def test_2_visits_without_change(swh_config, requests_mock_datadir):
"""With no prior visit, load a gnu project ends up with 1 snapshot
"""
assert 'SWH_CONFIG_FILENAME' in os.environ # cf. tox.ini
url = 'https://ftp.gnu.org/gnu/8sync/'
tarballs = [{
'time': 944729610,
'archive': 'https://ftp.gnu.org/gnu/8sync/8sync-0.1.0.tar.gz',
'length': 221837,
}]
loader = GNULoader(url, tarballs)
actual_load_status = loader.load()
assert actual_load_status['status'] == 'eventful'
origin_visit = list(loader.storage.origin_visit_get(url))[-1]
assert origin_visit['status'] == 'full'
actual_load_status2 = loader.load()
assert actual_load_status2['status'] == 'uneventful'
origin_visit2 = list(loader.storage.origin_visit_get(url))[-1]
assert origin_visit2['status'] == 'full'
urls = [
- m.url for m in local_get.request_history
+ m.url for m in requests_mock_datadir.request_history
if m.url.startswith('https://ftp.gnu.org')
]
assert len(urls) == 1
-def test_2_visits_with_new_artifact(swh_config, local_get):
+def test_2_visits_with_new_artifact(swh_config, requests_mock_datadir):
"""With no prior visit, load a gnu project ends up with 1 snapshot
"""
assert 'SWH_CONFIG_FILENAME' in os.environ # cf. tox.ini
url = 'https://ftp.gnu.org/gnu/8sync/'
tarball1 = {
'time': 944729610,
'archive': 'https://ftp.gnu.org/gnu/8sync/8sync-0.1.0.tar.gz',
'length': 221837,
}
loader = GNULoader(url, [tarball1])
actual_load_status = loader.load()
assert actual_load_status['status'] == 'eventful'
origin_visit = list(loader.storage.origin_visit_get(url))[-1]
assert origin_visit['status'] == 'full'
stats = loader.storage.stat_counters()
assert {
'content': len(_expected_new_contents_first_visit),
'directory': len(_expected_new_directories_first_visit),
'origin': 1,
'origin_visit': 1,
'person': 1,
'release': 0,
'revision': len(_expected_new_revisions_first_visit),
'skipped_content': 0,
'snapshot': 1
} == stats
urls = [
- m.url for m in local_get.request_history
+ m.url for m in requests_mock_datadir.request_history
if m.url.startswith('https://ftp.gnu.org')
]
assert len(urls) == 1
tarball2 = {
'time': 1480991830,
'archive': 'https://ftp.gnu.org/gnu/8sync/8sync-0.2.0.tar.gz',
'length': 238466,
}
loader2 = GNULoader(url, [tarball1, tarball2])
# implementation detail: share the storage in between visits
loader2.storage = loader.storage
stats2 = loader2.storage.stat_counters()
assert stats == stats2 # ensure we share the storage
actual_load_status2 = loader2.load()
assert actual_load_status2['status'] == 'eventful'
stats2 = loader.storage.stat_counters()
assert {
'content': len(_expected_new_contents_first_visit) + 14,
'directory': len(_expected_new_directories_first_visit) + 8,
'origin': 1,
'origin_visit': 1 + 1,
'person': 1,
'release': 0,
'revision': len(_expected_new_revisions_first_visit) + 1,
'skipped_content': 0,
'snapshot': 1 + 1,
} == stats2
origin_visit2 = list(loader.storage.origin_visit_get(url))[-1]
assert origin_visit2['status'] == 'full'
urls = [
- m.url for m in local_get.request_history
+ m.url for m in requests_mock_datadir.request_history
if m.url.startswith('https://ftp.gnu.org')
]
# 1 artifact (2nd time no modification) + 1 new artifact
assert len(urls) == 2
diff --git a/swh/loader/package/tests/test_npm.py b/swh/loader/package/tests/test_npm.py
index 301d67c..bce41b0 100644
--- a/swh/loader/package/tests/test_npm.py
+++ b/swh/loader/package/tests/test_npm.py
@@ -1,525 +1,526 @@
# Copyright (C) 2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import json
import os
from swh.model.hashutil import hash_to_bytes
from swh.loader.package.npm import (
parse_npm_package_author, extract_npm_package_author
)
from swh.loader.package.tests.common import (
check_snapshot, check_metadata_paths
)
from swh.loader.package.npm import NpmLoader
def _parse_author_string_test(author_str, expected_result):
assert parse_npm_package_author(author_str) == expected_result
assert parse_npm_package_author(' %s' % author_str) == expected_result
assert parse_npm_package_author('%s ' % author_str) == expected_result
def test_parse_npm_package_author():
_parse_author_string_test(
'John Doe',
{
'name': 'John Doe'
}
)
_parse_author_string_test(
'<john.doe@foo.bar>',
{
'email': 'john.doe@foo.bar'
}
)
_parse_author_string_test(
'(https://john.doe)',
{
'url': 'https://john.doe'
}
)
_parse_author_string_test(
'John Doe <john.doe@foo.bar>',
{
'name': 'John Doe',
'email': 'john.doe@foo.bar'
}
)
_parse_author_string_test(
'John Doe<john.doe@foo.bar>',
{
'name': 'John Doe',
'email': 'john.doe@foo.bar'
}
)
_parse_author_string_test(
'John Doe (https://john.doe)',
{
'name': 'John Doe',
'url': 'https://john.doe'
}
)
_parse_author_string_test(
'John Doe(https://john.doe)',
{
'name': 'John Doe',
'url': 'https://john.doe'
}
)
_parse_author_string_test(
'<john.doe@foo.bar> (https://john.doe)',
{
'email': 'john.doe@foo.bar',
'url': 'https://john.doe'
}
)
_parse_author_string_test(
'(https://john.doe) <john.doe@foo.bar>',
{
'email': 'john.doe@foo.bar',
'url': 'https://john.doe'
}
)
_parse_author_string_test(
'John Doe <john.doe@foo.bar> (https://john.doe)',
{
'name': 'John Doe',
'email': 'john.doe@foo.bar',
'url': 'https://john.doe'
}
)
_parse_author_string_test(
'John Doe (https://john.doe) <john.doe@foo.bar>',
{
'name': 'John Doe',
'email': 'john.doe@foo.bar',
'url': 'https://john.doe'
}
)
_parse_author_string_test(
'John Doe<john.doe@foo.bar> (https://john.doe)',
{
'name': 'John Doe',
'email': 'john.doe@foo.bar',
'url': 'https://john.doe'
}
)
_parse_author_string_test(
'John Doe<john.doe@foo.bar>(https://john.doe)',
{
'name': 'John Doe',
'email': 'john.doe@foo.bar',
'url': 'https://john.doe'
}
)
_parse_author_string_test('', {})
_parse_author_string_test('<>', {})
_parse_author_string_test(' <>', {})
_parse_author_string_test('<>()', {})
_parse_author_string_test('<> ()', {})
_parse_author_string_test('()', {})
_parse_author_string_test(' ()', {})
_parse_author_string_test(
'John Doe <> ()',
{
'name': 'John Doe'
}
)
_parse_author_string_test(
'John Doe <>',
{
'name': 'John Doe'
}
)
_parse_author_string_test(
'John Doe ()',
{
'name': 'John Doe'
}
)
def test_extract_npm_package_author(datadir):
package_metadata_filepath = os.path.join(
datadir, 'replicate.npmjs.com', 'org_visit1')
with open(package_metadata_filepath) as json_file:
package_metadata = json.load(json_file)
extract_npm_package_author(package_metadata['versions']['0.0.2']) == \
{
'fullname': b'mooz <stillpedant@gmail.com>',
'name': b'mooz',
'email': b'stillpedant@gmail.com'
}
assert (
extract_npm_package_author(package_metadata['versions']['0.0.3']) ==
{
'fullname': b'Masafumi Oyamada <stillpedant@gmail.com>',
'name': b'Masafumi Oyamada',
'email': b'stillpedant@gmail.com'
}
)
package_json = json.loads('''
{
"name": "highlightjs-line-numbers.js",
"version": "2.7.0",
"description": "Highlight.js line numbers plugin.",
"main": "src/highlightjs-line-numbers.js",
"dependencies": {},
"devDependencies": {
"gulp": "^4.0.0",
"gulp-rename": "^1.4.0",
"gulp-replace": "^0.6.1",
"gulp-uglify": "^1.2.0"
},
"repository": {
"type": "git",
"url": "https://github.com/wcoder/highlightjs-line-numbers.js.git"
},
"author": "Yauheni Pakala <evgeniy.pakalo@gmail.com>",
"license": "MIT",
"bugs": {
"url": "https://github.com/wcoder/highlightjs-line-numbers.js/issues"
},
"homepage": "http://wcoder.github.io/highlightjs-line-numbers.js/"
}''') # noqa
assert extract_npm_package_author(package_json) == \
{
'fullname': b'Yauheni Pakala <evgeniy.pakalo@gmail.com>',
'name': b'Yauheni Pakala',
'email': b'evgeniy.pakalo@gmail.com'
}
package_json = json.loads('''
{
"name": "3-way-diff",
"version": "0.0.1",
"description": "3-way diffing of JavaScript objects",
"main": "index.js",
"authors": [
{
"name": "Shawn Walsh",
"url": "https://github.com/shawnpwalsh"
},
{
"name": "Markham F Rollins IV",
"url": "https://github.com/mrollinsiv"
}
],
"keywords": [
"3-way diff",
"3 way diff",
"three-way diff",
"three way diff"
],
"devDependencies": {
"babel-core": "^6.20.0",
"babel-preset-es2015": "^6.18.0",
"mocha": "^3.0.2"
},
"dependencies": {
"lodash": "^4.15.0"
}
}''')
assert extract_npm_package_author(package_json) == \
{
'fullname': b'Shawn Walsh',
'name': b'Shawn Walsh',
'email': None
}
package_json = json.loads('''
{
"name": "yfe-ynpm",
"version": "1.0.0",
"homepage": "http://gitlab.ywwl.com/yfe/yfe-ynpm",
"repository": {
"type": "git",
"url": "git@gitlab.ywwl.com:yfe/yfe-ynpm.git"
},
"author": [
"fengmk2 <fengmk2@gmail.com> (https://fengmk2.com)",
"xufuzi <xufuzi@ywwl.com> (https://7993.org)"
],
"license": "MIT"
}''')
assert extract_npm_package_author(package_json) == \
{
'fullname': b'fengmk2 <fengmk2@gmail.com>',
'name': b'fengmk2',
'email': b'fengmk2@gmail.com'
}
package_json = json.loads('''
{
"name": "umi-plugin-whale",
"version": "0.0.8",
"description": "Internal contract component",
"authors": {
"name": "xiaohuoni",
"email": "448627663@qq.com"
},
"repository": "alitajs/whale",
"devDependencies": {
"np": "^3.0.4",
"umi-tools": "*"
},
"license": "MIT"
}''')
assert extract_npm_package_author(package_json) == \
{
'fullname': b'xiaohuoni <448627663@qq.com>',
'name': b'xiaohuoni',
'email': b'448627663@qq.com'
}
def normalize_hashes(hashes):
if isinstance(hashes, str):
return hash_to_bytes(hashes)
if isinstance(hashes, list):
return [hash_to_bytes(x) for x in hashes]
return {hash_to_bytes(k): hash_to_bytes(v) for k, v in hashes.items()}
_expected_new_contents_first_visit = normalize_hashes([
'4ce3058e16ab3d7e077f65aabf855c34895bf17c',
'858c3ceee84c8311adc808f8cdb30d233ddc9d18',
'0fa33b4f5a4e0496da6843a38ff1af8b61541996',
'85a410f8ef8eb8920f2c384a9555566ad4a2e21b',
'9163ac8025923d5a45aaac482262893955c9b37b',
'692cf623b8dd2c5df2c2998fd95ae4ec99882fb4',
'18c03aac6d3e910efb20039c15d70ab5e0297101',
'41265c42446aac17ca769e67d1704f99e5a1394d',
'783ff33f5882813dca9239452c4a7cadd4dba778',
'b029cfb85107aee4590c2434a3329bfcf36f8fa1',
'112d1900b4c2e3e9351050d1b542c9744f9793f3',
'5439bbc4bd9a996f1a38244e6892b71850bc98fd',
'd83097a2f994b503185adf4e719d154123150159',
'd0939b4898e83090ee55fd9d8a60e312cfadfbaf',
'b3523a26f7147e4af40d9d462adaae6d49eda13e',
'cd065fb435d6fb204a8871bcd623d0d0e673088c',
'2854a40855ad839a54f4b08f5cff0cf52fca4399',
'b8a53bbaac34ebb8c6169d11a4b9f13b05c583fe',
'0f73d56e1cf480bded8a1ecf20ec6fc53c574713',
'0d9882b2dfafdce31f4e77fe307d41a44a74cefe',
'585fc5caab9ead178a327d3660d35851db713df1',
'e8cd41a48d79101977e3036a87aeb1aac730686f',
'5414efaef33cceb9f3c9eb5c4cc1682cd62d14f7',
'9c3cc2763bf9e9e37067d3607302c4776502df98',
'3649a68410e354c83cd4a38b66bd314de4c8f5c9',
'e96ed0c091de1ebdf587104eaf63400d1974a1fe',
'078ca03d2f99e4e6eab16f7b75fbb7afb699c86c',
'38de737da99514de6559ff163c988198bc91367a',
])
_expected_new_directories_first_visit = normalize_hashes([
'3370d20d6f96dc1c9e50f083e2134881db110f4f',
'42753c0c2ab00c4501b552ac4671c68f3cf5aece',
'd7895533ef5edbcffdea3f057d9fef3a1ef845ce',
'80579be563e2ef3e385226fe7a3f079b377f142c',
'3b0ddc6a9e58b4b53c222da4e27b280b6cda591c',
'bcad03ce58ac136f26f000990fc9064e559fe1c0',
'5fc7e82a1bc72e074665c6078c6d3fad2f13d7ca',
'e3cd26beba9b1e02f6762ef54bd9ac80cc5f25fd',
'584b5b4b6cf7f038095e820b99386a9c232de931',
'184c8d6d0d242f2b1792ef9d3bf396a5434b7f7a',
'bb5f4ee143c970367eb409f2e4c1104898048b9d',
'1b95491047add1103db0dfdfa84a9735dcb11e88',
'a00c6de13471a2d66e64aca140ddb21ef5521e62',
'5ce6c1cd5cda2d546db513aaad8c72a44c7771e2',
'c337091e349b6ac10d38a49cdf8c2401ef9bb0f2',
'202fafcd7c0f8230e89d5496ad7f44ab12b807bf',
'775cc516543be86c15c1dc172f49c0d4e6e78235',
'ff3d1ead85a14f891e8b3fa3a89de39db1b8de2e',
])
_expected_new_revisions_first_visit = normalize_hashes({
'd8a1c7474d2956ac598a19f0f27d52f7015f117e':
'42753c0c2ab00c4501b552ac4671c68f3cf5aece',
'5f9eb78af37ffd12949f235e86fac04898f9f72a':
'3370d20d6f96dc1c9e50f083e2134881db110f4f',
'ba019b192bdb94bd0b5bd68b3a5f92b5acc2239a':
'd7895533ef5edbcffdea3f057d9fef3a1ef845ce'}
)
_expected_new_snapshot_first_visit_id = normalize_hashes(
'd0587e1195aed5a8800411a008f2f2d627f18e2d')
_expected_branches_first_visit = {
'HEAD': {
'target': 'releases/0.0.4',
'target_type': 'alias'
},
'releases/0.0.2': {
'target': 'd8a1c7474d2956ac598a19f0f27d52f7015f117e',
'target_type': 'revision'
},
'releases/0.0.3': {
'target': '5f9eb78af37ffd12949f235e86fac04898f9f72a',
'target_type': 'revision'
},
'releases/0.0.4': {
'target': 'ba019b192bdb94bd0b5bd68b3a5f92b5acc2239a',
'target_type': 'revision'
}
}
def package_url(package):
return 'https://www.npmjs.com/package/%s' % package
def package_metadata_url(package):
return 'https://replicate.npmjs.com/%s/' % package
-def test_revision_metadata_structure(swh_config, local_get):
+def test_revision_metadata_structure(swh_config, requests_mock_datadir):
package = 'org'
loader = NpmLoader(package,
package_url(package),
package_metadata_url(package))
actual_load_status = loader.load()
assert actual_load_status['status'] == 'eventful'
expected_revision_id = hash_to_bytes(
'd8a1c7474d2956ac598a19f0f27d52f7015f117e')
revision = list(loader.storage.revision_get([expected_revision_id]))[0]
assert revision is not None
check_metadata_paths(revision['metadata'], paths=[
('intrinsic.tool', str),
('intrinsic.raw', dict),
('extrinsic.provider', str),
('extrinsic.when', str),
('extrinsic.raw', dict),
('original_artifact.filename', str),
('original_artifact.length', int),
('original_artifact.checksums', dict),
])
-def test_npm_loader_first_visit(swh_config, local_get):
+def test_npm_loader_first_visit(swh_config, requests_mock_datadir):
package = 'org'
loader = NpmLoader(package,
package_url(package),
package_metadata_url(package))
actual_load_status = loader.load()
assert actual_load_status['status'] == 'eventful'
stats = loader.storage.stat_counters()
assert {
'content': len(_expected_new_contents_first_visit),
'directory': len(_expected_new_directories_first_visit),
'origin': 1,
'origin_visit': 1,
'person': 2,
'release': 0,
'revision': len(_expected_new_revisions_first_visit),
'skipped_content': 0,
'snapshot': 1,
} == stats
assert len(list(loader.storage.content_get(
_expected_new_contents_first_visit))) == len(
_expected_new_contents_first_visit)
assert list(loader.storage.directory_missing(
_expected_new_directories_first_visit)) == []
assert list(loader.storage.revision_missing(
_expected_new_revisions_first_visit)) == []
expected_snapshot = {
'id': _expected_new_snapshot_first_visit_id,
'branches': _expected_branches_first_visit,
}
check_snapshot(expected_snapshot, loader.storage)
-def test_npm_loader_incremental_visit(swh_config, local_get_visits):
+def test_npm_loader_incremental_visit(
+ swh_config, requests_mock_datadir_visits):
package = 'org'
url = package_url(package)
metadata_url = package_metadata_url(package)
loader = NpmLoader(package, url, metadata_url)
actual_load_status = loader.load()
assert actual_load_status['status'] == 'eventful'
origin_visit = list(loader.storage.origin_visit_get(url))[-1]
assert origin_visit['status'] == 'full'
stats = loader.storage.stat_counters()
assert {
'content': len(_expected_new_contents_first_visit),
'directory': len(_expected_new_directories_first_visit),
'origin': 1,
'origin_visit': 1,
'person': 2,
'release': 0,
'revision': len(_expected_new_revisions_first_visit),
'skipped_content': 0,
'snapshot': 1,
} == stats
loader._info = None # reset loader internal state
actual_load_status2 = loader.load()
assert actual_load_status2['status'] == 'eventful'
origin_visit2 = list(loader.storage.origin_visit_get(url))[-1]
assert origin_visit2['status'] == 'full'
stats = loader.storage.stat_counters()
assert { # 3 new releases artifacts
'content': len(_expected_new_contents_first_visit) + 14,
'directory': len(_expected_new_directories_first_visit) + 15,
'origin': 1,
'origin_visit': 2,
'person': 2,
'release': 0,
'revision': len(_expected_new_revisions_first_visit) + 3,
'skipped_content': 0,
'snapshot': 2,
} == stats
urls = [
- m.url for m in local_get_visits.request_history
+ m.url for m in requests_mock_datadir_visits.request_history
if m.url.startswith('https://registry.npmjs.org')
]
assert len(urls) == len(set(urls)) # we visited each artifact once across
diff --git a/swh/loader/package/tests/test_pypi.py b/swh/loader/package/tests/test_pypi.py
index 014f301..9cac2dc 100644
--- a/swh/loader/package/tests/test_pypi.py
+++ b/swh/loader/package/tests/test_pypi.py
@@ -1,652 +1,653 @@
# Copyright (C) 2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import os
from os import path
import pytest
from unittest.mock import patch
from swh.core.tarball import uncompress
+from swh.core.pytest_plugin import requests_mock_datadir_factory
from swh.model.hashutil import hash_to_bytes
+
from swh.loader.package.pypi import (
PyPILoader, pypi_api_url, author, extract_intrinsic_metadata
)
from swh.loader.package.tests.common import (
check_snapshot, check_metadata_paths
)
-from swh.loader.package.tests.conftest import local_get_factory
-
def test_author_basic():
data = {
'author': "i-am-groot",
'author_email': 'iam@groot.org',
}
actual_author = author(data)
expected_author = {
'fullname': b'i-am-groot <iam@groot.org>',
'name': b'i-am-groot',
'email': b'iam@groot.org',
}
assert actual_author == expected_author
def test_author_empty_email():
data = {
'author': 'i-am-groot',
'author_email': '',
}
actual_author = author(data)
expected_author = {
'fullname': b'i-am-groot',
'name': b'i-am-groot',
'email': b'',
}
assert actual_author == expected_author
def test_author_empty_name():
data = {
'author': "",
'author_email': 'iam@groot.org',
}
actual_author = author(data)
expected_author = {
'fullname': b' <iam@groot.org>',
'name': b'',
'email': b'iam@groot.org',
}
assert actual_author == expected_author
def test_author_malformed():
data = {
'author': "['pierre', 'paul', 'jacques']",
'author_email': None,
}
actual_author = author(data)
expected_author = {
'fullname': b"['pierre', 'paul', 'jacques']",
'name': b"['pierre', 'paul', 'jacques']",
'email': None,
}
assert actual_author == expected_author
def test_author_malformed_2():
data = {
'author': '[marie, jeanne]',
'author_email': '[marie@some, jeanne@thing]',
}
actual_author = author(data)
expected_author = {
'fullname': b'[marie, jeanne] <[marie@some, jeanne@thing]>',
'name': b'[marie, jeanne]',
'email': b'[marie@some, jeanne@thing]',
}
assert actual_author == expected_author
def test_author_malformed_3():
data = {
'author': '[marie, jeanne, pierre]',
'author_email': '[marie@somewhere.org, jeanne@somewhere.org]',
}
actual_author = author(data)
expected_author = {
'fullname': b'[marie, jeanne, pierre] <[marie@somewhere.org, jeanne@somewhere.org]>', # noqa
'name': b'[marie, jeanne, pierre]',
'email': b'[marie@somewhere.org, jeanne@somewhere.org]',
}
actual_author == expected_author
# configuration error #
def test_badly_configured_loader_raise(monkeypatch):
"""Badly configured loader should raise"""
monkeypatch.delenv('SWH_CONFIG_FILENAME', raising=False)
with pytest.raises(ValueError) as e:
PyPILoader(url='some-url')
assert 'Misconfiguration' in e.value.args[0]
def test_pypi_api_url():
"""Compute pypi api url from the pypi project url should be ok"""
url = pypi_api_url('https://pypi.org/project/requests')
assert url == 'https://pypi.org/pypi/requests/json'
@pytest.mark.fs
def test_extract_intrinsic_metadata(tmp_path, datadir):
"""Parsing existing archive's PKG-INFO should yield results"""
uncompressed_archive_path = str(tmp_path)
archive_path = path.join(
datadir, 'files.pythonhosted.org', '0805nexter-1.1.0.zip')
uncompress(archive_path, dest=uncompressed_archive_path)
actual_metadata = extract_intrinsic_metadata(uncompressed_archive_path)
expected_metadata = {
'metadata_version': '1.0',
'name': '0805nexter',
'version': '1.1.0',
'summary': 'a simple printer of nested lest',
'home_page': 'http://www.hp.com',
'author': 'hgtkpython',
'author_email': '2868989685@qq.com',
'platforms': ['UNKNOWN'],
}
assert actual_metadata == expected_metadata
@pytest.mark.fs
def test_extract_intrinsic_metadata_failures(tmp_path):
"""Parsing inexistant path/archive/PKG-INFO yield None"""
# inexistant first level path
assert extract_intrinsic_metadata('/something-inexistant') == {}
# inexistant second level path (as expected by pypi archives)
assert extract_intrinsic_metadata(tmp_path) == {}
# inexistant PKG-INFO within second level path
existing_path_no_pkginfo = str(tmp_path / 'something')
os.mkdir(existing_path_no_pkginfo)
assert extract_intrinsic_metadata(tmp_path) == {}
# LOADER SCENARIO #
# "edge" cases (for the same origin) #
# no release artifact:
# {visit full, status: uneventful, no contents, etc...}
-local_get_missing_all = local_get_factory(ignore_urls=[
+requests_mock_datadir_missing_all = requests_mock_datadir_factory(ignore_urls=[
'https://files.pythonhosted.org/packages/ec/65/c0116953c9a3f47de89e71964d6c7b0c783b01f29fa3390584dbf3046b4d/0805nexter-1.1.0.zip', # noqa
'https://files.pythonhosted.org/packages/c4/a0/4562cda161dc4ecbbe9e2a11eb365400c0461845c5be70d73869786809c4/0805nexter-1.2.0.zip', # noqa
])
-def test_no_release_artifact(swh_config, local_get_missing_all):
+def test_no_release_artifact(swh_config, requests_mock_datadir_missing_all):
"""Load a pypi project with all artifacts missing ends up with no snapshot
"""
url = 'https://pypi.org/project/0805nexter'
loader = PyPILoader(url)
actual_load_status = loader.load()
assert actual_load_status['status'] == 'uneventful'
stats = loader.storage.stat_counters()
assert {
'content': 0,
'directory': 0,
'origin': 1,
'origin_visit': 1,
'person': 0,
'release': 0,
'revision': 0,
'skipped_content': 0,
'snapshot': 1,
} == stats
origin_visit = next(loader.storage.origin_visit_get(url))
assert origin_visit['status'] == 'partial'
# problem during loading:
# {visit: partial, status: uneventful, no snapshot}
def test_release_with_traceback(swh_config):
url = 'https://pypi.org/project/0805nexter'
with patch('swh.loader.package.pypi.PyPILoader.get_default_release',
side_effect=ValueError('Problem')):
loader = PyPILoader(url)
actual_load_status = loader.load()
assert actual_load_status['status'] == 'failed'
stats = loader.storage.stat_counters()
assert {
'content': 0,
'directory': 0,
'origin': 1,
'origin_visit': 1,
'person': 0,
'release': 0,
'revision': 0,
'skipped_content': 0,
'snapshot': 0,
} == stats
origin_visit = next(loader.storage.origin_visit_get(url))
assert origin_visit['status'] == 'partial'
# problem during loading: failure early enough in between swh contents...
# some contents (contents, directories, etc...) have been written in storage
# {visit: partial, status: eventful, no snapshot}
# problem during loading: failure late enough we can have snapshots (some
# revisions are written in storage already)
# {visit: partial, status: eventful, snapshot}
# "normal" cases (for the same origin) #
-local_get_missing_one = local_get_factory(ignore_urls=[
+requests_mock_datadir_missing_one = requests_mock_datadir_factory(ignore_urls=[
'https://files.pythonhosted.org/packages/ec/65/c0116953c9a3f47de89e71964d6c7b0c783b01f29fa3390584dbf3046b4d/0805nexter-1.1.0.zip', # noqa
])
# some missing release artifacts:
# {visit partial, status: eventful, 1 snapshot}
-def test_revision_metadata_structure(swh_config, local_get):
+def test_revision_metadata_structure(swh_config, requests_mock_datadir):
url = 'https://pypi.org/project/0805nexter'
loader = PyPILoader(url)
actual_load_status = loader.load()
assert actual_load_status['status'] == 'eventful'
expected_revision_id = hash_to_bytes(
'e445da4da22b31bfebb6ffc4383dbf839a074d21')
revision = list(loader.storage.revision_get([expected_revision_id]))[0]
assert revision is not None
check_metadata_paths(revision['metadata'], paths=[
('intrinsic.tool', str),
('intrinsic.raw', dict),
('extrinsic.provider', str),
('extrinsic.when', str),
('extrinsic.raw', dict),
('original_artifact.filename', str),
('original_artifact.length', int),
('original_artifact.checksums', dict),
])
-def test_visit_with_missing_artifact(swh_config, local_get_missing_one):
+def test_visit_with_missing_artifact(
+ swh_config, requests_mock_datadir_missing_one):
"""Load a pypi project with some missing artifacts ends up with 1 snapshot
"""
url = 'https://pypi.org/project/0805nexter'
loader = PyPILoader(url)
actual_load_status = loader.load()
assert actual_load_status['status'] == 'eventful'
stats = loader.storage.stat_counters()
assert {
'content': 3,
'directory': 2,
'origin': 1,
'origin_visit': 1,
'person': 1,
'release': 0,
'revision': 1,
'skipped_content': 0,
'snapshot': 1
} == stats
expected_contents = map(hash_to_bytes, [
'405859113963cb7a797642b45f171d6360425d16',
'e5686aa568fdb1d19d7f1329267082fe40482d31',
'83ecf6ec1114fd260ca7a833a2d165e71258c338',
])
assert list(loader.storage.content_missing_per_sha1(expected_contents))\
== []
expected_dirs = map(hash_to_bytes, [
'b178b66bd22383d5f16f4f5c923d39ca798861b4',
'c3a58f8b57433a4b56caaa5033ae2e0931405338',
])
assert list(loader.storage.directory_missing(expected_dirs)) == []
# {revision hash: directory hash}
expected_revs = {
hash_to_bytes('e445da4da22b31bfebb6ffc4383dbf839a074d21'): hash_to_bytes('b178b66bd22383d5f16f4f5c923d39ca798861b4'), # noqa
}
assert list(loader.storage.revision_missing(expected_revs)) == []
expected_branches = {
'releases/1.2.0': {
'target': 'e445da4da22b31bfebb6ffc4383dbf839a074d21',
'target_type': 'revision',
},
'HEAD': {
'target': 'releases/1.2.0',
'target_type': 'alias',
},
}
expected_snapshot = {
'id': 'dd0e4201a232b1c104433741dbf45895b8ac9355',
'branches': expected_branches,
}
check_snapshot(expected_snapshot, storage=loader.storage)
origin_visit = next(loader.storage.origin_visit_get(url))
assert origin_visit['status'] == 'partial'
-def test_visit_with_1_release_artifact(swh_config, local_get):
+def test_visit_with_1_release_artifact(swh_config, requests_mock_datadir):
"""With no prior visit, load a pypi project ends up with 1 snapshot
"""
url = 'https://pypi.org/project/0805nexter'
loader = PyPILoader(url)
actual_load_status = loader.load()
assert actual_load_status['status'] == 'eventful'
stats = loader.storage.stat_counters()
assert {
'content': 6,
'directory': 4,
'origin': 1,
'origin_visit': 1,
'person': 1,
'release': 0,
'revision': 2,
'skipped_content': 0,
'snapshot': 1
} == stats
expected_contents = map(hash_to_bytes, [
'a61e24cdfdab3bb7817f6be85d37a3e666b34566',
'938c33483285fd8ad57f15497f538320df82aeb8',
'a27576d60e08c94a05006d2e6d540c0fdb5f38c8',
'405859113963cb7a797642b45f171d6360425d16',
'e5686aa568fdb1d19d7f1329267082fe40482d31',
'83ecf6ec1114fd260ca7a833a2d165e71258c338',
])
assert list(loader.storage.content_missing_per_sha1(expected_contents))\
== []
expected_dirs = map(hash_to_bytes, [
'05219ba38bc542d4345d5638af1ed56c7d43ca7d',
'cf019eb456cf6f78d8c4674596f1c9a97ece8f44',
'b178b66bd22383d5f16f4f5c923d39ca798861b4',
'c3a58f8b57433a4b56caaa5033ae2e0931405338',
])
assert list(loader.storage.directory_missing(expected_dirs)) == []
# {revision hash: directory hash}
expected_revs = {
hash_to_bytes('4c99891f93b81450385777235a37b5e966dd1571'): hash_to_bytes('05219ba38bc542d4345d5638af1ed56c7d43ca7d'), # noqa
hash_to_bytes('e445da4da22b31bfebb6ffc4383dbf839a074d21'): hash_to_bytes('b178b66bd22383d5f16f4f5c923d39ca798861b4'), # noqa
}
assert list(loader.storage.revision_missing(expected_revs)) == []
expected_branches = {
'releases/1.1.0': {
'target': '4c99891f93b81450385777235a37b5e966dd1571',
'target_type': 'revision',
},
'releases/1.2.0': {
'target': 'e445da4da22b31bfebb6ffc4383dbf839a074d21',
'target_type': 'revision',
},
'HEAD': {
'target': 'releases/1.2.0',
'target_type': 'alias',
},
}
expected_snapshot = {
'id': 'ba6e158ada75d0b3cfb209ffdf6daa4ed34a227a',
'branches': expected_branches,
}
check_snapshot(expected_snapshot, loader.storage)
origin_visit = next(loader.storage.origin_visit_get(url))
assert origin_visit['status'] == 'full'
-def test_multiple_visits_with_no_change(swh_config, local_get):
+def test_multiple_visits_with_no_change(swh_config, requests_mock_datadir):
"""Multiple visits with no changes results in 1 same snapshot
"""
url = 'https://pypi.org/project/0805nexter'
loader = PyPILoader(url)
actual_load_status = loader.load()
assert actual_load_status['status'] == 'eventful'
stats = loader.storage.stat_counters()
assert {
'content': 6,
'directory': 4,
'origin': 1,
'origin_visit': 1,
'person': 1,
'release': 0,
'revision': 2,
'skipped_content': 0,
'snapshot': 1
} == stats
expected_branches = {
'releases/1.1.0': {
'target': '4c99891f93b81450385777235a37b5e966dd1571',
'target_type': 'revision',
},
'releases/1.2.0': {
'target': 'e445da4da22b31bfebb6ffc4383dbf839a074d21',
'target_type': 'revision',
},
'HEAD': {
'target': 'releases/1.2.0',
'target_type': 'alias',
},
}
snapshot_id = 'ba6e158ada75d0b3cfb209ffdf6daa4ed34a227a'
expected_snapshot = {
'id': snapshot_id,
'branches': expected_branches,
}
check_snapshot(expected_snapshot, loader.storage)
origin_visit = next(loader.storage.origin_visit_get(url))
assert origin_visit['status'] == 'full'
actual_load_status2 = loader.load()
assert actual_load_status2['status'] == 'uneventful'
stats2 = loader.storage.stat_counters()
expected_stats2 = stats.copy()
expected_stats2['origin_visit'] = 1 + 1
assert expected_stats2 == stats2
# same snapshot
actual_snapshot_id = origin_visit['snapshot']['id']
assert actual_snapshot_id == hash_to_bytes(snapshot_id)
-def test_incremental_visit(swh_config, local_get_visits):
+def test_incremental_visit(swh_config, requests_mock_datadir_visits):
"""With prior visit, 2nd load will result with a different snapshot
"""
url = 'https://pypi.org/project/0805nexter'
loader = PyPILoader(url)
visit1_actual_load_status = loader.load()
visit1_stats = loader.storage.stat_counters()
assert visit1_actual_load_status['status'] == 'eventful'
origin_visit1 = next(loader.storage.origin_visit_get(url))
assert origin_visit1['status'] == 'full'
assert {
'content': 6,
'directory': 4,
'origin': 1,
'origin_visit': 1,
'person': 1,
'release': 0,
'revision': 2,
'skipped_content': 0,
'snapshot': 1
} == visit1_stats
# Reset internal state
loader._info = None
visit2_actual_load_status = loader.load()
visit2_stats = loader.storage.stat_counters()
assert visit2_actual_load_status['status'] == 'eventful'
visits = list(loader.storage.origin_visit_get(url))
assert len(visits) == 2
assert visits[1]['status'] == 'full'
assert {
'content': 6 + 1, # 1 more content
'directory': 4 + 2, # 2 more directories
'origin': 1,
'origin_visit': 1 + 1,
'person': 1,
'release': 0,
'revision': 2 + 1, # 1 more revision
'skipped_content': 0,
'snapshot': 1 + 1, # 1 more snapshot
} == visit2_stats
expected_contents = map(hash_to_bytes, [
'a61e24cdfdab3bb7817f6be85d37a3e666b34566',
'938c33483285fd8ad57f15497f538320df82aeb8',
'a27576d60e08c94a05006d2e6d540c0fdb5f38c8',
'405859113963cb7a797642b45f171d6360425d16',
'e5686aa568fdb1d19d7f1329267082fe40482d31',
'83ecf6ec1114fd260ca7a833a2d165e71258c338',
'92689fa2b7fb4d4fc6fb195bf73a50c87c030639'
])
assert list(loader.storage.content_missing_per_sha1(expected_contents))\
== []
expected_dirs = map(hash_to_bytes, [
'05219ba38bc542d4345d5638af1ed56c7d43ca7d',
'cf019eb456cf6f78d8c4674596f1c9a97ece8f44',
'b178b66bd22383d5f16f4f5c923d39ca798861b4',
'c3a58f8b57433a4b56caaa5033ae2e0931405338',
'e226e7e4ad03b4fc1403d69a18ebdd6f2edd2b3a',
'52604d46843b898f5a43208045d09fcf8731631b',
])
assert list(loader.storage.directory_missing(expected_dirs)) == []
# {revision hash: directory hash}
expected_revs = {
hash_to_bytes('4c99891f93b81450385777235a37b5e966dd1571'): hash_to_bytes('05219ba38bc542d4345d5638af1ed56c7d43ca7d'), # noqa
hash_to_bytes('e445da4da22b31bfebb6ffc4383dbf839a074d21'): hash_to_bytes('b178b66bd22383d5f16f4f5c923d39ca798861b4'), # noqa
hash_to_bytes('51247143b01445c9348afa9edfae31bf7c5d86b1'): hash_to_bytes('e226e7e4ad03b4fc1403d69a18ebdd6f2edd2b3a'), # noqa
}
assert list(loader.storage.revision_missing(expected_revs)) == []
expected_branches = {
'releases/1.1.0': {
'target': '4c99891f93b81450385777235a37b5e966dd1571',
'target_type': 'revision',
},
'releases/1.2.0': {
'target': 'e445da4da22b31bfebb6ffc4383dbf839a074d21',
'target_type': 'revision',
},
'releases/1.3.0': {
'target': '51247143b01445c9348afa9edfae31bf7c5d86b1',
'target_type': 'revision',
},
'HEAD': {
'target': 'releases/1.3.0',
'target_type': 'alias',
},
}
expected_snapshot = {
'id': '2e5149a7b0725d18231a37b342e9b7c4e121f283',
'branches': expected_branches,
}
check_snapshot(expected_snapshot, loader.storage)
origin_visit = list(loader.storage.origin_visit_get(url))[-1]
assert origin_visit['status'] == 'full'
urls = [
- m.url for m in local_get_visits.request_history
+ m.url for m in requests_mock_datadir_visits.request_history
if m.url.startswith('https://files.pythonhosted.org')
]
# visited each artifact once across 2 visits
assert len(urls) == len(set(urls))
# release artifact, no new artifact
# {visit full, status uneventful, same snapshot as before}
# release artifact, old artifact with different checksums
# {visit full, status full, new snapshot with shared history and some new
# different history}
# release with multiple sdist artifacts per pypi "version"
# snapshot branch output is different
-def test_visit_1_release_with_2_artifacts(swh_config, local_get):
+def test_visit_1_release_with_2_artifacts(swh_config, requests_mock_datadir):
"""With no prior visit, load a pypi project ends up with 1 snapshot
"""
url = 'https://pypi.org/project/nexter'
loader = PyPILoader(url)
actual_load_status = loader.load()
assert actual_load_status['status'] == 'eventful'
expected_branches = {
'releases/1.1.0/nexter-1.1.0.zip': {
'target': '4c99891f93b81450385777235a37b5e966dd1571',
'target_type': 'revision',
},
'releases/1.1.0/nexter-1.1.0.tar.gz': {
'target': '0bf88f5760cca7665d0af4d6575d9301134fe11a',
'target_type': 'revision',
},
}
expected_snapshot = {
'id': 'a27e638a4dad6fbfa273c6ebec1c4bf320fb84c6',
'branches': expected_branches,
}
check_snapshot(expected_snapshot, loader.storage)
origin_visit = next(loader.storage.origin_visit_get(url))
assert origin_visit['status'] == 'full'
diff --git a/tox.ini b/tox.ini
index 0fb07c6..b5c9956 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,16 +1,17 @@
[tox]
envlist=flake8,py3
[testenv:py3]
deps =
.[testing]
pytest-cov
commands =
- pytest --cov=swh --cov-branch {posargs}
+ pytest --cov={envsitepackagesdir}/swh/loader/ --cov-branch \
+ {envsitepackagesdir}/swh/loader/ {posargs}
[testenv:flake8]
skip_install = true
deps =
flake8
commands =
{envpython} -m flake8
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Mon, Aug 25, 6:03 PM (3 d, 1 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3247444
Attached To
rDLDBASE Generic VCS/Package Loader
Event Timeline
Log In to Comment