Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9345362
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
16 KB
Subscribers
None
View Options
diff --git a/swh/loader/package/tests/test_pypi.py b/swh/loader/package/tests/test_pypi.py
index 778ed7b..e625690 100644
--- a/swh/loader/package/tests/test_pypi.py
+++ b/swh/loader/package/tests/test_pypi.py
@@ -1,541 +1,541 @@
# Copyright (C) 2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import os
from os import path
import pytest
from unittest.mock import patch
from swh.core.tarball import uncompress
from swh.model.hashutil import hash_to_bytes
from swh.loader.package.pypi import (
PyPILoader, pypi_api_url, author, extract_intrinsic_metadata
)
from swh.loader.package.tests.common import (
check_snapshot, DATADIR
)
from swh.loader.package.tests.conftest import local_get_factory
def test_author_basic():
data = {
'author': "i-am-groot",
'author_email': 'iam@groot.org',
}
actual_author = author(data)
expected_author = {
'fullname': b'i-am-groot <iam@groot.org>',
'name': b'i-am-groot',
'email': b'iam@groot.org',
}
assert actual_author == expected_author
def test_author_empty_email():
data = {
'author': 'i-am-groot',
'author_email': '',
}
actual_author = author(data)
expected_author = {
'fullname': b'i-am-groot',
'name': b'i-am-groot',
'email': b'',
}
assert actual_author == expected_author
def test_author_empty_name():
data = {
'author': "",
'author_email': 'iam@groot.org',
}
actual_author = author(data)
expected_author = {
'fullname': b' <iam@groot.org>',
'name': b'',
'email': b'iam@groot.org',
}
assert actual_author == expected_author
def test_author_malformed():
data = {
'author': "['pierre', 'paul', 'jacques']",
'author_email': None,
}
actual_author = author(data)
expected_author = {
'fullname': b"['pierre', 'paul', 'jacques']",
'name': b"['pierre', 'paul', 'jacques']",
'email': None,
}
assert actual_author == expected_author
def test_author_malformed_2():
data = {
'author': '[marie, jeanne]',
'author_email': '[marie@some, jeanne@thing]',
}
actual_author = author(data)
expected_author = {
'fullname': b'[marie, jeanne] <[marie@some, jeanne@thing]>',
'name': b'[marie, jeanne]',
'email': b'[marie@some, jeanne@thing]',
}
assert actual_author == expected_author
def test_author_malformed_3():
data = {
'author': '[marie, jeanne, pierre]',
'author_email': '[marie@somewhere.org, jeanne@somewhere.org]',
}
actual_author = author(data)
expected_author = {
'fullname': b'[marie, jeanne, pierre] <[marie@somewhere.org, jeanne@somewhere.org]>', # noqa
'name': b'[marie, jeanne, pierre]',
'email': b'[marie@somewhere.org, jeanne@somewhere.org]',
}
actual_author == expected_author
# configuration error #
def test_badly_configured_loader_raise(monkeypatch):
"""Badly configured loader should raise"""
monkeypatch.delenv('SWH_CONFIG_FILENAME', raising=False)
with pytest.raises(ValueError) as e:
PyPILoader(url='some-url')
assert 'Misconfiguration' in e.value.args[0]
def test_pypi_api_url():
"""Compute pypi api url from the pypi project url should be ok"""
url = pypi_api_url('https://pypi.org/project/requests')
assert url == 'https://pypi.org/pypi/requests/json'
@pytest.mark.fs
def test_extract_intrinsic_metadata(tmp_path):
"""Parsing existing archive's PKG-INFO should yield results"""
uncompressed_archive_path = str(tmp_path)
archive_path = path.join(
DATADIR, 'files.pythonhosted.org', '0805nexter-1.1.0.zip')
uncompress(archive_path, dest=uncompressed_archive_path)
- actual_sdist = extract_intrinsic_metadata(uncompressed_archive_path)
- expected_sdist = {
+ actual_metadata = extract_intrinsic_metadata(uncompressed_archive_path)
+ expected_metadata = {
'metadata_version': '1.0',
'name': '0805nexter',
'version': '1.1.0',
'summary': 'a simple printer of nested lest',
'home_page': 'http://www.hp.com',
'author': 'hgtkpython',
'author_email': '2868989685@qq.com',
'platforms': ['UNKNOWN'],
}
- assert actual_sdist == expected_sdist
+ assert actual_metadata == expected_metadata
@pytest.mark.fs
def test_extract_intrinsic_metadata_failures(tmp_path):
"""Parsing inexistant path/archive/PKG-INFO yield None"""
# inexistant first level path
assert extract_intrinsic_metadata('/something-inexistant') == {}
# inexistant second level path (as expected by pypi archives)
assert extract_intrinsic_metadata(tmp_path) == {}
# inexistant PKG-INFO within second level path
existing_path_no_pkginfo = str(tmp_path / 'something')
os.mkdir(existing_path_no_pkginfo)
assert extract_intrinsic_metadata(tmp_path) == {}
# LOADER SCENARIO #
# "edge" cases (for the same origin) #
# no release artifact:
# {visit full, status: uneventful, no contents, etc...}
local_get_missing_all = local_get_factory(ignore_urls=[
'https://files.pythonhosted.org/packages/ec/65/c0116953c9a3f47de89e71964d6c7b0c783b01f29fa3390584dbf3046b4d/0805nexter-1.1.0.zip', # noqa
'https://files.pythonhosted.org/packages/c4/a0/4562cda161dc4ecbbe9e2a11eb365400c0461845c5be70d73869786809c4/0805nexter-1.2.0.zip', # noqa
])
def test_no_release_artifact(swh_config, local_get_missing_all):
"""Load a pypi project with all artifacts missing ends up with no snapshot
"""
url = 'https://pypi.org/project/0805nexter'
loader = PyPILoader(url)
actual_load_status = loader.load()
assert actual_load_status == {'status': 'uneventful'}
stats = loader.storage.stat_counters()
assert {
'content': 0,
'directory': 0,
'origin': 1,
'origin_visit': 1,
'person': 0,
'release': 0,
'revision': 0,
'skipped_content': 0,
'snapshot': 1,
} == stats
origin_visit = next(loader.storage.origin_visit_get(url))
assert origin_visit['status'] == 'partial'
# problem during loading:
# {visit: partial, status: uneventful, no snapshot}
def test_release_with_traceback(swh_config):
url = 'https://pypi.org/project/0805nexter'
with patch('swh.loader.package.pypi.PyPILoader.get_default_release',
side_effect=ValueError('Problem')):
loader = PyPILoader(url)
actual_load_status = loader.load()
assert actual_load_status == {'status': 'uneventful'}
stats = loader.storage.stat_counters()
assert {
'content': 0,
'directory': 0,
'origin': 1,
'origin_visit': 1,
'person': 0,
'release': 0,
'revision': 0,
'skipped_content': 0,
'snapshot': 0,
} == stats
origin_visit = next(loader.storage.origin_visit_get(url))
assert origin_visit['status'] == 'partial'
# problem during loading: failure early enough in between swh contents...
# some contents (contents, directories, etc...) have been written in storage
# {visit: partial, status: eventful, no snapshot}
# problem during loading: failure late enough we can have snapshots (some
# revisions are written in storage already)
# {visit: partial, status: eventful, snapshot}
# "normal" cases (for the same origin) #
local_get_missing_one = local_get_factory(ignore_urls=[
'https://files.pythonhosted.org/packages/ec/65/c0116953c9a3f47de89e71964d6c7b0c783b01f29fa3390584dbf3046b4d/0805nexter-1.1.0.zip', # noqa
])
# some missing release artifacts:
# {visit partial, status: eventful, 1 snapshot}
def test_release_with_missing_artifact(swh_config, local_get_missing_one):
"""Load a pypi project with some missing artifacts ends up with 1 snapshot
"""
url = 'https://pypi.org/project/0805nexter'
loader = PyPILoader(url)
actual_load_status = loader.load()
assert actual_load_status == {'status': 'eventful'}
stats = loader.storage.stat_counters()
assert {
'content': 3,
'directory': 2,
'origin': 1,
'origin_visit': 1,
'person': 1,
'release': 0,
'revision': 1,
'skipped_content': 0,
'snapshot': 1
} == stats
expected_contents = map(hash_to_bytes, [
'405859113963cb7a797642b45f171d6360425d16',
'e5686aa568fdb1d19d7f1329267082fe40482d31',
'83ecf6ec1114fd260ca7a833a2d165e71258c338',
])
assert list(loader.storage.content_missing_per_sha1(expected_contents))\
== []
expected_dirs = map(hash_to_bytes, [
'b178b66bd22383d5f16f4f5c923d39ca798861b4',
'c3a58f8b57433a4b56caaa5033ae2e0931405338',
])
assert list(loader.storage.directory_missing(expected_dirs)) == []
# {revision hash: directory hash}
expected_revs = {
hash_to_bytes('e445da4da22b31bfebb6ffc4383dbf839a074d21'): hash_to_bytes('b178b66bd22383d5f16f4f5c923d39ca798861b4'), # noqa
}
assert list(loader.storage.revision_missing(expected_revs)) == []
expected_branches = {
'releases/1.2.0': {
'target': 'e445da4da22b31bfebb6ffc4383dbf839a074d21',
'target_type': 'revision',
},
'HEAD': {
'target': 'releases/1.2.0',
'target_type': 'alias',
},
}
expected_snapshot = {
'id': 'dd0e4201a232b1c104433741dbf45895b8ac9355',
'branches': expected_branches,
}
check_snapshot(expected_snapshot, storage=loader.storage)
origin_visit = next(loader.storage.origin_visit_get(url))
assert origin_visit['status'] == 'partial'
def test_release_artifact_no_prior_visit(swh_config, local_get):
"""With no prior visit, load a pypi project ends up with 1 snapshot
"""
url = 'https://pypi.org/project/0805nexter'
loader = PyPILoader(url)
actual_load_status = loader.load()
assert actual_load_status == {'status': 'eventful'}
stats = loader.storage.stat_counters()
assert {
'content': 6,
'directory': 4,
'origin': 1,
'origin_visit': 1,
'person': 1,
'release': 0,
'revision': 2,
'skipped_content': 0,
'snapshot': 1
} == stats
expected_contents = map(hash_to_bytes, [
'a61e24cdfdab3bb7817f6be85d37a3e666b34566',
'938c33483285fd8ad57f15497f538320df82aeb8',
'a27576d60e08c94a05006d2e6d540c0fdb5f38c8',
'405859113963cb7a797642b45f171d6360425d16',
'e5686aa568fdb1d19d7f1329267082fe40482d31',
'83ecf6ec1114fd260ca7a833a2d165e71258c338',
])
assert list(loader.storage.content_missing_per_sha1(expected_contents))\
== []
expected_dirs = map(hash_to_bytes, [
'05219ba38bc542d4345d5638af1ed56c7d43ca7d',
'cf019eb456cf6f78d8c4674596f1c9a97ece8f44',
'b178b66bd22383d5f16f4f5c923d39ca798861b4',
'c3a58f8b57433a4b56caaa5033ae2e0931405338',
])
assert list(loader.storage.directory_missing(expected_dirs)) == []
# {revision hash: directory hash}
expected_revs = {
hash_to_bytes('4c99891f93b81450385777235a37b5e966dd1571'): hash_to_bytes('05219ba38bc542d4345d5638af1ed56c7d43ca7d'), # noqa
hash_to_bytes('e445da4da22b31bfebb6ffc4383dbf839a074d21'): hash_to_bytes('b178b66bd22383d5f16f4f5c923d39ca798861b4'), # noqa
}
assert list(loader.storage.revision_missing(expected_revs)) == []
expected_branches = {
'releases/1.1.0': {
'target': '4c99891f93b81450385777235a37b5e966dd1571',
'target_type': 'revision',
},
'releases/1.2.0': {
'target': 'e445da4da22b31bfebb6ffc4383dbf839a074d21',
'target_type': 'revision',
},
'HEAD': {
'target': 'releases/1.2.0',
'target_type': 'alias',
},
}
expected_snapshot = {
'id': 'ba6e158ada75d0b3cfb209ffdf6daa4ed34a227a',
'branches': expected_branches,
}
check_snapshot(expected_snapshot, loader.storage)
origin_visit = next(loader.storage.origin_visit_get(url))
assert origin_visit['status'] == 'full'
# release artifact, new artifact
# {visit full, status full, new snapshot with shared history as prior snapshot}
def test_release_artifact_incremental_visit(swh_config, local_get_visits):
"""With prior visit, 2nd load will result with a different snapshot
with some shared history
"""
url = 'https://pypi.org/project/0805nexter'
loader = PyPILoader(url)
visit1_actual_load_status = loader.load()
visit1_stats = loader.storage.stat_counters()
assert visit1_actual_load_status == {'status': 'eventful'}
origin_visit1 = next(loader.storage.origin_visit_get(url))
assert origin_visit1['status'] == 'full'
assert {
'content': 6,
'directory': 4,
'origin': 1,
'origin_visit': 1,
'person': 1,
'release': 0,
'revision': 2,
'skipped_content': 0,
'snapshot': 1
} == visit1_stats
# Reset internal state
loader._info = None
visit2_actual_load_status = loader.load()
visit2_stats = loader.storage.stat_counters()
assert visit2_actual_load_status == {'status': 'eventful'}
visits = list(loader.storage.origin_visit_get(url))
assert len(visits) == 2
assert visits[1]['status'] == 'full'
assert {
'content': 6 + 1, # 1 more content
'directory': 4 + 2, # 2 more directories
'origin': 1,
'origin_visit': 1 + 1,
'person': 1,
'release': 0,
'revision': 2 + 1, # 1 more revision
'skipped_content': 0,
'snapshot': 1 + 1, # 1 more snapshot
} == visit2_stats
expected_contents = map(hash_to_bytes, [
'a61e24cdfdab3bb7817f6be85d37a3e666b34566',
'938c33483285fd8ad57f15497f538320df82aeb8',
'a27576d60e08c94a05006d2e6d540c0fdb5f38c8',
'405859113963cb7a797642b45f171d6360425d16',
'e5686aa568fdb1d19d7f1329267082fe40482d31',
'83ecf6ec1114fd260ca7a833a2d165e71258c338',
'92689fa2b7fb4d4fc6fb195bf73a50c87c030639'
])
assert list(loader.storage.content_missing_per_sha1(expected_contents))\
== []
expected_dirs = map(hash_to_bytes, [
'05219ba38bc542d4345d5638af1ed56c7d43ca7d',
'cf019eb456cf6f78d8c4674596f1c9a97ece8f44',
'b178b66bd22383d5f16f4f5c923d39ca798861b4',
'c3a58f8b57433a4b56caaa5033ae2e0931405338',
'e226e7e4ad03b4fc1403d69a18ebdd6f2edd2b3a',
'52604d46843b898f5a43208045d09fcf8731631b',
])
assert list(loader.storage.directory_missing(expected_dirs)) == []
# {revision hash: directory hash}
expected_revs = {
hash_to_bytes('4c99891f93b81450385777235a37b5e966dd1571'): hash_to_bytes('05219ba38bc542d4345d5638af1ed56c7d43ca7d'), # noqa
hash_to_bytes('e445da4da22b31bfebb6ffc4383dbf839a074d21'): hash_to_bytes('b178b66bd22383d5f16f4f5c923d39ca798861b4'), # noqa
hash_to_bytes('51247143b01445c9348afa9edfae31bf7c5d86b1'): hash_to_bytes('e226e7e4ad03b4fc1403d69a18ebdd6f2edd2b3a'), # noqa
}
assert list(loader.storage.revision_missing(expected_revs)) == []
expected_branches = {
'releases/1.1.0': {
'target': '4c99891f93b81450385777235a37b5e966dd1571',
'target_type': 'revision',
},
'releases/1.2.0': {
'target': 'e445da4da22b31bfebb6ffc4383dbf839a074d21',
'target_type': 'revision',
},
'releases/1.3.0': {
'target': '51247143b01445c9348afa9edfae31bf7c5d86b1',
'target_type': 'revision',
},
'HEAD': {
'target': 'releases/1.3.0',
'target_type': 'alias',
},
}
expected_snapshot = {
'id': '2e5149a7b0725d18231a37b342e9b7c4e121f283',
'branches': expected_branches,
}
check_snapshot(expected_snapshot, loader.storage)
origin_visit = list(loader.storage.origin_visit_get(url))[-1]
assert origin_visit['status'] == 'full'
urls = [
m.url for m in local_get_visits.request_history
if m.url.startswith('https://files.pythonhosted.org')
]
# visited each artifact once across 2 visits
assert len(urls) == len(set(urls))
# release artifact, no new artifact
# {visit full, status uneventful, same snapshot as before}
# release artifact, old artifact with different checksums
# {visit full, status full, new snapshot with shared history and some new
# different history}
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Fri, Jul 4, 3:19 PM (4 d, 19 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3266909
Attached To
rDLDBASE Generic VCS/Package Loader
Event Timeline
Log In to Comment