Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/functional/tests/test_functional.py
- This file was added.
# Copyright (C) 2020 The Software Heritage developers | |||||
# See the AUTHORS file at the top-level directory of this distribution | |||||
# License: GNU General Public License version 3, or any later version | |||||
# See top-level LICENSE file for more information | |||||
import pytest | |||||
from json.decoder import JSONDecodeError | |||||
from swh.loader.package.functional.loader import ( | |||||
FunctionalLoader, retrieve_sources | |||||
) | |||||
from swh.loader.package.tests.common import ( | |||||
get_stats, check_snapshot | |||||
) | |||||
sources_url = 'https://nix-community.github.io/nixpkgs-swh/sources.json' | |||||
def test_retrieve_sources(swh_config, requests_mock_datadir): | |||||
j = retrieve_sources(sources_url) | |||||
assert "sources" in j.keys() | |||||
assert len(j["sources"]) == 2 | |||||
def test_retrieve_non_existing(swh_config, requests_mock_datadir): | |||||
with pytest.raises(ValueError): | |||||
FunctionalLoader('https://non-existing-url') | |||||
def test_retrieve_non_json(swh_config, requests_mock_datadir): | |||||
with pytest.raises(JSONDecodeError): | |||||
FunctionalLoader('https://example.com/file.txt') | |||||
def test_loader_one_visit(swh_config, requests_mock_datadir): | |||||
loader = FunctionalLoader(sources_url) | |||||
res = loader.load() | |||||
assert res['status'] == 'eventful' | |||||
stats = get_stats(loader.storage) | |||||
assert { | |||||
'content': 1, | |||||
'directory': 3, | |||||
'origin': 1, | |||||
'origin_visit': 1, | |||||
'person': 1, | |||||
'release': 0, | |||||
'revision': 1, | |||||
'skipped_content': 0, | |||||
'snapshot': 1 | |||||
} == stats | |||||
origin_visit = next(loader.storage.origin_visit_get(sources_url)) | |||||
# The visit is partial because urls pointing to non tarball file | |||||
# are not handled yet | |||||
assert origin_visit['status'] == 'partial' | |||||
assert origin_visit['type'] == 'functional' | |||||
def test_uncompress_failure(swh_config, requests_mock_datadir): | |||||
"""Non tarball files are currently not supported and the uncompress | |||||
function fails on such kind of files. | |||||
However, even in this case of failure (because of the url | |||||
https://example.com/file.txt), a snapshot and a visit has to be | |||||
created (with a status partial since all files are not archived). | |||||
""" | |||||
loader = FunctionalLoader(sources_url) | |||||
loader_status = loader.load() | |||||
urls = [s['url'][0] for s in loader.sources] | |||||
assert "https://example.com/file.txt" in urls | |||||
assert loader_status['status'] == 'eventful' | |||||
origin_visit = next(loader.storage.origin_visit_get(sources_url)) | |||||
# The visit is partial because urls pointing to non tarball files | |||||
# are not handled yet | |||||
assert origin_visit['status'] == 'partial' | |||||
def test_loader_incremental(swh_config, requests_mock_datadir): | |||||
"""Ensure a second visit do not download artifact already | |||||
downloaded by the previous visit. | |||||
""" | |||||
loader = FunctionalLoader(sources_url) | |||||
load_status = loader.load() | |||||
# Reset internal state | |||||
loader._info = None | |||||
ardumont: as we are instantiating another loader anyway, it's not useful.
Plus it's at least pypi… | |||||
lewoAuthorUnsubmitted Done Inline ActionsRemoved. lewo: Removed. | |||||
loader = FunctionalLoader(sources_url) | |||||
loader.load() | |||||
expected_snapshot_id = '2c7f01ef3115f7999a013979fa27bfa12dcb63eb' | |||||
assert load_status == { | |||||
'status': 'eventful', | |||||
'snapshot_id': expected_snapshot_id | |||||
} | |||||
expected_branches = { | |||||
'https://github.com/owner-1/repository-1/revision-1.tgz': { | |||||
'target': '488ad4e7b8e2511258725063cf43a2b897c503b4', | |||||
'target_type': 'revision' | |||||
}, | |||||
} | |||||
expected_snapshot = { | |||||
'id': expected_snapshot_id, | |||||
'branches': expected_branches, | |||||
} | |||||
check_snapshot(expected_snapshot, storage=loader.storage) | |||||
urls = [ | |||||
m.url for m in requests_mock_datadir.request_history | |||||
if m.url == ('https://github.com/owner-1/repository-1/revision-1.tgz') | |||||
] | |||||
# The artifact | |||||
# 'https://github.com/owner-1/repository-1/revision-1.tgz' is only | |||||
# visited one time | |||||
assert len(urls) == 1 | |||||
ardumontUnsubmitted Done Inline Actions+1 (for that test scenario ;) ardumont: +1 (for that test scenario ;) | |||||
Done Inline ActionsComments should probably be more explicit about what changes between the two visits. I guess that the first visit only manages to fetch one tarball, and the second one manages to fetch both? olasd: Comments should probably be more explicit about what changes between the two visits. I guess… | |||||
def test_loader_two_visits(swh_config, requests_mock_datadir_visits): | |||||
"""To ensure there is only one origin, but two visits, two revisions and | |||||
two snapshots are created. | |||||
""" | |||||
loader = FunctionalLoader(sources_url, 'nixpkgs') | |||||
load_status = loader.load() | |||||
expected_snapshot_id = '2c7f01ef3115f7999a013979fa27bfa12dcb63eb' | |||||
assert load_status == { | |||||
'status': 'eventful', | |||||
'snapshot_id': expected_snapshot_id | |||||
} | |||||
expected_branches = { | |||||
'https://github.com/owner-1/repository-1/revision-1.tgz': { | |||||
'target': '488ad4e7b8e2511258725063cf43a2b897c503b4', | |||||
'target_type': 'revision' | |||||
} | |||||
} | |||||
expected_snapshot = { | |||||
'id': expected_snapshot_id, | |||||
'branches': expected_branches, | |||||
} | |||||
check_snapshot(expected_snapshot, storage=loader.storage) | |||||
stats = get_stats(loader.storage) | |||||
assert { | |||||
'content': 1, | |||||
'directory': 3, | |||||
'origin': 1, | |||||
'origin_visit': 1, | |||||
'person': 1, | |||||
'release': 0, | |||||
'revision': 1, | |||||
'skipped_content': 0, | |||||
'snapshot': 1 | |||||
} == stats | |||||
# Reset internal state | |||||
loader._info = None | |||||
ardumontUnsubmitted Done Inline ActionsThis should be removed as per my previous comment. ardumont: This should be removed as per my previous comment. | |||||
lewoAuthorUnsubmitted Done Inline ActionsDone lewo: Done | |||||
loader = FunctionalLoader(sources_url, 'nixpkgs') | |||||
load_status = loader.load() | |||||
expected_snapshot_id = '9c4fbfd991b35c7de876cd66bcda2967a8f476ac' | |||||
assert load_status == { | |||||
'status': 'eventful', | |||||
'snapshot_id': expected_snapshot_id | |||||
} | |||||
# This ensures visits are incremental. Indeed, if we request a | |||||
# second time an url, because of the requests_mock_datadir_visits | |||||
# fixture, the file has to end with `_visit1`. | |||||
expected_branches = { | |||||
'https://github.com/owner-1/repository-1/revision-1.tgz': { | |||||
'target': '488ad4e7b8e2511258725063cf43a2b897c503b4', | |||||
'target_type': 'revision' | |||||
}, | |||||
'https://github.com/owner-2/repository-1/revision-1.tgz': { | |||||
'target': '85e0bad74e33e390aaeb74f139853ae3863ee544', | |||||
'target_type': 'revision' | |||||
} | |||||
} | |||||
expected_snapshot = { | |||||
'id': expected_snapshot_id, | |||||
'branches': expected_branches, | |||||
} | |||||
check_snapshot(expected_snapshot, storage=loader.storage) | |||||
stats = get_stats(loader.storage) | |||||
assert { | |||||
'content': 2, | |||||
'directory': 5, | |||||
'origin': 1, | |||||
'origin_visit': 2, | |||||
'person': 1, | |||||
'release': 0, | |||||
'revision': 2, | |||||
'skipped_content': 0, | |||||
'snapshot': 2 | |||||
} == stats | |||||
def test_resolve_revision_from(swh_config, requests_mock_datadir): | |||||
loader = FunctionalLoader(sources_url) | |||||
known_artifacts = { | |||||
'id1': {'extrinsic': {'raw': {'url': "url1"}}}, | |||||
'id2': {'extrinsic': {'raw': {'url': "url2"}}} | |||||
} | |||||
metadata = {'url': 'url1'} | |||||
assert loader.resolve_revision_from(known_artifacts, metadata) == 'id1' | |||||
metadata = {'url': 'url3'} | |||||
assert loader.resolve_revision_from(known_artifacts, metadata) == None # noqa |
as we are instantiating another loader anyway, it's not useful.
Plus it's at least pypi specific :)