Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/nixguix/tests/test_nixguix.py
Show First 20 Lines • Show All 94 Lines • ▼ Show 20 Lines | for rev in revisions: | ||||
assert rev is not None | assert rev is not None | ||||
metadata = rev.metadata | metadata = rev.metadata | ||||
assert metadata is not None | assert metadata is not None | ||||
raw = metadata["extrinsic"]["raw"] | raw = metadata["extrinsic"]["raw"] | ||||
assert "url" in raw | assert "url" in raw | ||||
assert "integrity" in raw | assert "integrity" in raw | ||||
def test_retrieve_sources(swh_config, requests_mock_datadir): | def test_retrieve_sources(swh_storage, requests_mock_datadir): | ||||
j = parse_sources(retrieve_sources(sources_url)) | j = parse_sources(retrieve_sources(sources_url)) | ||||
assert "sources" in j.keys() | assert "sources" in j.keys() | ||||
assert len(j["sources"]) == 2 | assert len(j["sources"]) == 2 | ||||
def test_nixguix_url_not_found(swh_config, requests_mock_datadir): | def test_nixguix_url_not_found(swh_storage, requests_mock_datadir): | ||||
"""When failing to read from the url, the visit is marked as not_found. | """When failing to read from the url, the visit is marked as not_found. | ||||
Here the sources url does not exist, so requests_mock_datadir returns a 404. | Here the sources url does not exist, so requests_mock_datadir returns a 404. | ||||
Resulting in a NotFound raised within the package loader's main loop. | Resulting in a NotFound raised within the package loader's main loop. | ||||
This results in the task with status failed and a visit_status with status | This results in the task with status failed and a visit_status with status | ||||
"not_found". | "not_found". | ||||
""" | """ | ||||
unknown_url = "https://non-existing-url/" | unknown_url = "https://non-existing-url/" | ||||
loader = NixGuixLoader(unknown_url) | loader = NixGuixLoader(swh_storage, unknown_url) | ||||
# during the retrieval step | # during the retrieval step | ||||
load_status = loader.load() | load_status = loader.load() | ||||
assert load_status == {"status": "failed"} | assert load_status == {"status": "failed"} | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
loader.storage, unknown_url, status="not_found", type="nixguix", snapshot=None | swh_storage, unknown_url, status="not_found", type="nixguix", snapshot=None | ||||
) | ) | ||||
assert len(requests_mock_datadir.request_history) == 1 | assert len(requests_mock_datadir.request_history) == 1 | ||||
assert requests_mock_datadir.request_history[0].url == unknown_url | assert requests_mock_datadir.request_history[0].url == unknown_url | ||||
def test_nixguix_url_with_decoding_error(swh_config, requests_mock_datadir): | def test_nixguix_url_with_decoding_error(swh_storage, requests_mock_datadir): | ||||
"""Other errors during communication with the url, the visit is marked as failed | """Other errors during communication with the url, the visit is marked as failed | ||||
requests_mock_datadir will intercept the requests to sources_url. Since the file | requests_mock_datadir will intercept the requests to sources_url. Since the file | ||||
exists, returns a 200 with the requested content of the query. As file.txt is no | exists, returns a 200 with the requested content of the query. As file.txt is no | ||||
json, fails do decode and raises a JSONDecodeError. In effect failing the visit. | json, fails do decode and raises a JSONDecodeError. In effect failing the visit. | ||||
""" | """ | ||||
sources_url = "https://example.com/file.txt" | sources_url = "https://example.com/file.txt" | ||||
loader = NixGuixLoader(sources_url) | loader = NixGuixLoader(swh_storage, sources_url) | ||||
load_status = loader.load() | load_status = loader.load() | ||||
assert load_status == {"status": "failed"} | assert load_status == {"status": "failed"} | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
loader.storage, sources_url, status="failed", type="nixguix", snapshot=None | swh_storage, sources_url, status="failed", type="nixguix", snapshot=None | ||||
) | ) | ||||
assert len(requests_mock_datadir.request_history) == 1 | assert len(requests_mock_datadir.request_history) == 1 | ||||
assert requests_mock_datadir.request_history[0].url == sources_url | assert requests_mock_datadir.request_history[0].url == sources_url | ||||
def test_clean_sources_invalid_schema(swh_config, requests_mock_datadir): | def test_clean_sources_invalid_schema(swh_storage, requests_mock_datadir): | ||||
sources = {} | sources = {} | ||||
with pytest.raises(ValueError, match="sources structure invalid, missing: .*"): | with pytest.raises(ValueError, match="sources structure invalid, missing: .*"): | ||||
clean_sources(sources) | clean_sources(sources) | ||||
def test_clean_sources_invalid_version(swh_config, requests_mock_datadir): | def test_clean_sources_invalid_version(swh_storage, requests_mock_datadir): | ||||
for version_ok in [1, "1"]: # Check those versions are fine | for version_ok in [1, "1"]: # Check those versions are fine | ||||
clean_sources({"version": version_ok, "sources": [], "revision": "my-revision"}) | clean_sources({"version": version_ok, "sources": [], "revision": "my-revision"}) | ||||
for version_ko in [0, "0", 2, "2"]: # Check version != 1 raise an error | for version_ko in [0, "0", 2, "2"]: # Check version != 1 raise an error | ||||
with pytest.raises( | with pytest.raises( | ||||
ValueError, match="sources structure version .* is not supported" | ValueError, match="sources structure version .* is not supported" | ||||
): | ): | ||||
clean_sources( | clean_sources( | ||||
{"version": version_ko, "sources": [], "revision": "my-revision"} | {"version": version_ko, "sources": [], "revision": "my-revision"} | ||||
) | ) | ||||
def test_clean_sources_invalid_sources(swh_config, requests_mock_datadir): | def test_clean_sources_invalid_sources(swh_storage, requests_mock_datadir): | ||||
valid_sources = [ | valid_sources = [ | ||||
# 1 valid source | # 1 valid source | ||||
{"type": "url", "urls": ["my-url.tar.gz"], "integrity": "my-integrity"}, | {"type": "url", "urls": ["my-url.tar.gz"], "integrity": "my-integrity"}, | ||||
] | ] | ||||
sources = { | sources = { | ||||
"version": 1, | "version": 1, | ||||
"sources": valid_sources | "sources": valid_sources | ||||
+ [ | + [ | ||||
Show All 29 Lines | for supported_ext in supported_extensions: | ||||
assert not actual_match | assert not actual_match | ||||
for unsupported_ext in unsupported_extensions: | for unsupported_ext in unsupported_extensions: | ||||
unsupported_filepath = f"something.{unsupported_ext}" | unsupported_filepath = f"something.{unsupported_ext}" | ||||
actual_match = actual_unsupported_pattern.match(unsupported_filepath) | actual_match = actual_unsupported_pattern.match(unsupported_filepath) | ||||
assert actual_match | assert actual_match | ||||
def test_clean_sources_unsupported_artifacts(swh_config, requests_mock_datadir): | def test_clean_sources_unsupported_artifacts(swh_storage, requests_mock_datadir): | ||||
unsupported_file_extensions = [ | unsupported_file_extensions = [ | ||||
"iso", | "iso", | ||||
"whl", | "whl", | ||||
"gem", | "gem", | ||||
"pom", | "pom", | ||||
"msi", | "msi", | ||||
"pod", | "pod", | ||||
"png", | "png", | ||||
▲ Show 20 Lines • Show All 43 Lines • ▼ Show 20 Lines | sources = { | ||||
"revision": "my-revision", | "revision": "my-revision", | ||||
} | } | ||||
clean = clean_sources(sources, unsupported_file_extensions) | clean = clean_sources(sources, unsupported_file_extensions) | ||||
assert len(clean["sources"]) == len(supported_sources) | assert len(clean["sources"]) == len(supported_sources) | ||||
def test_loader_one_visit(swh_config, requests_mock_datadir, raw_sources): | def test_loader_one_visit(swh_storage, requests_mock_datadir, raw_sources): | ||||
loader = NixGuixLoader(sources_url) | loader = NixGuixLoader(swh_storage, sources_url) | ||||
res = loader.load() | res = loader.load() | ||||
assert res["status"] == "eventful" | assert res["status"] == "eventful" | ||||
stats = get_stats(loader.storage) | stats = get_stats(swh_storage) | ||||
assert { | assert { | ||||
"content": 1, | "content": 1, | ||||
"directory": 3, | "directory": 3, | ||||
"origin": 1, | "origin": 1, | ||||
"origin_visit": 1, | "origin_visit": 1, | ||||
"release": 0, | "release": 0, | ||||
"revision": 1, | "revision": 1, | ||||
"skipped_content": 0, | "skipped_content": 0, | ||||
"snapshot": 1, | "snapshot": 1, | ||||
} == stats | } == stats | ||||
# The visit is partial because urls pointing to non tarball file | # The visit is partial because urls pointing to non tarball file | ||||
# are not handled yet | # are not handled yet | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
loader.storage, sources_url, status="partial", type="nixguix" | swh_storage, sources_url, status="partial", type="nixguix" | ||||
) | ) | ||||
visit_status = origin_get_latest_visit_status(loader.storage, sources_url) | visit_status = origin_get_latest_visit_status(swh_storage, sources_url) | ||||
snapshot_swhid = SWHID( | snapshot_swhid = SWHID( | ||||
object_type="snapshot", object_id=hash_to_hex(visit_status.snapshot) | object_type="snapshot", object_id=hash_to_hex(visit_status.snapshot) | ||||
) | ) | ||||
metadata_authority = MetadataAuthority( | metadata_authority = MetadataAuthority( | ||||
type=MetadataAuthorityType.FORGE, url=sources_url, | type=MetadataAuthorityType.FORGE, url=sources_url, | ||||
) | ) | ||||
expected_metadata = [ | expected_metadata = [ | ||||
RawExtrinsicMetadata( | RawExtrinsicMetadata( | ||||
type=MetadataTargetType.SNAPSHOT, | type=MetadataTargetType.SNAPSHOT, | ||||
target=snapshot_swhid, | target=snapshot_swhid, | ||||
authority=metadata_authority, | authority=metadata_authority, | ||||
fetcher=MetadataFetcher( | fetcher=MetadataFetcher( | ||||
name="swh.loader.package.nixguix.loader.NixGuixLoader", | name="swh.loader.package.nixguix.loader.NixGuixLoader", | ||||
version=__version__, | version=__version__, | ||||
), | ), | ||||
discovery_date=loader.visit_date, | discovery_date=loader.visit_date, | ||||
format="nixguix-sources-json", | format="nixguix-sources-json", | ||||
metadata=raw_sources, | metadata=raw_sources, | ||||
origin=sources_url, | origin=sources_url, | ||||
) | ) | ||||
] | ] | ||||
assert loader.storage.raw_extrinsic_metadata_get( | assert swh_storage.raw_extrinsic_metadata_get( | ||||
MetadataTargetType.SNAPSHOT, snapshot_swhid, metadata_authority, | MetadataTargetType.SNAPSHOT, snapshot_swhid, metadata_authority, | ||||
) == PagedResult(next_page_token=None, results=expected_metadata,) | ) == PagedResult(next_page_token=None, results=expected_metadata,) | ||||
def test_uncompress_failure(swh_config, requests_mock_datadir): | def test_uncompress_failure(swh_storage, requests_mock_datadir): | ||||
"""Non tarball files are currently not supported and the uncompress | """Non tarball files are currently not supported and the uncompress | ||||
function fails on such kind of files. | function fails on such kind of files. | ||||
However, even in this case of failure (because of the url | However, even in this case of failure (because of the url | ||||
https://example.com/file.txt), a snapshot and a visit has to be | https://example.com/file.txt), a snapshot and a visit has to be | ||||
created (with a status partial since all files are not archived). | created (with a status partial since all files are not archived). | ||||
""" | """ | ||||
loader = NixGuixLoader(sources_url) | loader = NixGuixLoader(swh_storage, sources_url) | ||||
loader_status = loader.load() | loader_status = loader.load() | ||||
sources = loader.supported_sources()["sources"] | sources = loader.supported_sources()["sources"] | ||||
urls = [s["urls"][0] for s in sources] | urls = [s["urls"][0] for s in sources] | ||||
assert "https://example.com/file.txt" in urls | assert "https://example.com/file.txt" in urls | ||||
assert loader_status["status"] == "eventful" | assert loader_status["status"] == "eventful" | ||||
# The visit is partial because urls pointing to non tarball files | # The visit is partial because urls pointing to non tarball files | ||||
# are not handled yet | # are not handled yet | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
loader.storage, sources_url, status="partial", type="nixguix" | swh_storage, sources_url, status="partial", type="nixguix" | ||||
) | ) | ||||
def test_loader_incremental(swh_config, requests_mock_datadir): | def test_loader_incremental(swh_storage, requests_mock_datadir): | ||||
"""Ensure a second visit do not download artifact already | """Ensure a second visit do not download artifact already | ||||
downloaded by the previous visit. | downloaded by the previous visit. | ||||
""" | """ | ||||
loader = NixGuixLoader(sources_url) | loader = NixGuixLoader(swh_storage, sources_url) | ||||
load_status = loader.load() | load_status = loader.load() | ||||
loader.load() | loader.load() | ||||
assert load_status == {"status": "eventful", "snapshot_id": SNAPSHOT1.id.hex()} | assert load_status == {"status": "eventful", "snapshot_id": SNAPSHOT1.id.hex()} | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
loader.storage, | swh_storage, | ||||
sources_url, | sources_url, | ||||
status="partial", | status="partial", | ||||
type="nixguix", | type="nixguix", | ||||
snapshot=SNAPSHOT1.id, | snapshot=SNAPSHOT1.id, | ||||
) | ) | ||||
check_snapshot(SNAPSHOT1, storage=loader.storage) | check_snapshot(SNAPSHOT1, storage=swh_storage) | ||||
urls = [ | urls = [ | ||||
m.url | m.url | ||||
for m in requests_mock_datadir.request_history | for m in requests_mock_datadir.request_history | ||||
if m.url == ("https://github.com/owner-1/repository-1/revision-1.tgz") | if m.url == ("https://github.com/owner-1/repository-1/revision-1.tgz") | ||||
] | ] | ||||
# The artifact | # The artifact | ||||
# 'https://github.com/owner-1/repository-1/revision-1.tgz' is only | # 'https://github.com/owner-1/repository-1/revision-1.tgz' is only | ||||
# visited one time | # visited one time | ||||
assert len(urls) == 1 | assert len(urls) == 1 | ||||
def test_loader_two_visits(swh_config, requests_mock_datadir_visits): | def test_loader_two_visits(swh_storage, requests_mock_datadir_visits): | ||||
"""To ensure there is only one origin, but two visits, two revisions | """To ensure there is only one origin, but two visits, two revisions | ||||
and two snapshots are created. | and two snapshots are created. | ||||
The first visit creates a snapshot containing one tarball. The | The first visit creates a snapshot containing one tarball. The | ||||
second visit creates a snapshot containing the same tarball and | second visit creates a snapshot containing the same tarball and | ||||
another tarball. | another tarball. | ||||
""" | """ | ||||
loader = NixGuixLoader(sources_url) | loader = NixGuixLoader(swh_storage, sources_url) | ||||
load_status = loader.load() | load_status = loader.load() | ||||
assert load_status == {"status": "eventful", "snapshot_id": SNAPSHOT1.id.hex()} | assert load_status == {"status": "eventful", "snapshot_id": SNAPSHOT1.id.hex()} | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
loader.storage, | swh_storage, | ||||
sources_url, | sources_url, | ||||
status="partial", | status="partial", | ||||
type="nixguix", | type="nixguix", | ||||
snapshot=SNAPSHOT1.id, | snapshot=SNAPSHOT1.id, | ||||
) | ) | ||||
check_snapshot(SNAPSHOT1, storage=loader.storage) | check_snapshot(SNAPSHOT1, storage=swh_storage) | ||||
stats = get_stats(loader.storage) | stats = get_stats(swh_storage) | ||||
assert { | assert { | ||||
"content": 1, | "content": 1, | ||||
"directory": 3, | "directory": 3, | ||||
"origin": 1, | "origin": 1, | ||||
"origin_visit": 1, | "origin_visit": 1, | ||||
"release": 0, | "release": 0, | ||||
"revision": 1, | "revision": 1, | ||||
"skipped_content": 0, | "skipped_content": 0, | ||||
"snapshot": 1, | "snapshot": 1, | ||||
} == stats | } == stats | ||||
loader = NixGuixLoader(sources_url) | loader = NixGuixLoader(swh_storage, sources_url) | ||||
load_status = loader.load() | load_status = loader.load() | ||||
expected_snapshot_id_hex = "b0bfa75cbd0cc90aac3b9e95fb0f59c731176d97" | expected_snapshot_id_hex = "b0bfa75cbd0cc90aac3b9e95fb0f59c731176d97" | ||||
expected_snapshot_id = hash_to_bytes(expected_snapshot_id_hex) | expected_snapshot_id = hash_to_bytes(expected_snapshot_id_hex) | ||||
assert load_status == { | assert load_status == { | ||||
"status": "eventful", | "status": "eventful", | ||||
"snapshot_id": expected_snapshot_id_hex, | "snapshot_id": expected_snapshot_id_hex, | ||||
} | } | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
loader.storage, | swh_storage, | ||||
sources_url, | sources_url, | ||||
status="partial", | status="partial", | ||||
type="nixguix", | type="nixguix", | ||||
snapshot=expected_snapshot_id, | snapshot=expected_snapshot_id, | ||||
) | ) | ||||
# This ensures visits are incremental. Indeed, if we request a | # This ensures visits are incremental. Indeed, if we request a | ||||
# second time an url, because of the requests_mock_datadir_visits | # second time an url, because of the requests_mock_datadir_visits | ||||
Show All 10 Lines | expected_snapshot = Snapshot( | ||||
target_type=TargetType.REVISION, | target_type=TargetType.REVISION, | ||||
), | ), | ||||
b"https://github.com/owner-2/repository-1/revision-1.tgz": SnapshotBranch( | b"https://github.com/owner-2/repository-1/revision-1.tgz": SnapshotBranch( | ||||
target=hash_to_bytes("85e0bad74e33e390aaeb74f139853ae3863ee544"), | target=hash_to_bytes("85e0bad74e33e390aaeb74f139853ae3863ee544"), | ||||
target_type=TargetType.REVISION, | target_type=TargetType.REVISION, | ||||
), | ), | ||||
}, | }, | ||||
) | ) | ||||
check_snapshot(expected_snapshot, storage=loader.storage) | check_snapshot(expected_snapshot, storage=swh_storage) | ||||
stats = get_stats(loader.storage) | stats = get_stats(swh_storage) | ||||
assert { | assert { | ||||
"content": 2, | "content": 2, | ||||
"directory": 5, | "directory": 5, | ||||
"origin": 1, | "origin": 1, | ||||
"origin_visit": 2, | "origin_visit": 2, | ||||
"release": 0, | "release": 0, | ||||
"revision": 2, | "revision": 2, | ||||
"skipped_content": 0, | "skipped_content": 0, | ||||
"snapshot": 2, | "snapshot": 2, | ||||
} == stats | } == stats | ||||
def test_resolve_revision_from(swh_config, requests_mock_datadir, datadir): | def test_resolve_revision_from(swh_storage, requests_mock_datadir, datadir): | ||||
loader = NixGuixLoader(sources_url) | loader = NixGuixLoader(swh_storage, sources_url) | ||||
known_artifacts = { | known_artifacts = { | ||||
"id1": {"extrinsic": {"raw": {"url": "url1", "integrity": "integrity1"}}}, | "id1": {"extrinsic": {"raw": {"url": "url1", "integrity": "integrity1"}}}, | ||||
"id2": {"extrinsic": {"raw": {"url": "url2", "integrity": "integrity2"}}}, | "id2": {"extrinsic": {"raw": {"url": "url2", "integrity": "integrity2"}}}, | ||||
} | } | ||||
p_info = NixGuixPackageInfo.from_metadata( | p_info = NixGuixPackageInfo.from_metadata( | ||||
{"url": "url1", "integrity": "integrity1"} | {"url": "url1", "integrity": "integrity1"} | ||||
) | ) | ||||
assert loader.resolve_revision_from(known_artifacts, p_info) == "id1" | assert loader.resolve_revision_from(known_artifacts, p_info) == "id1" | ||||
p_info = NixGuixPackageInfo.from_metadata( | p_info = NixGuixPackageInfo.from_metadata( | ||||
{"url": "url3", "integrity": "integrity3"} | {"url": "url3", "integrity": "integrity3"} | ||||
) | ) | ||||
assert loader.resolve_revision_from(known_artifacts, p_info) == None # noqa | assert loader.resolve_revision_from(known_artifacts, p_info) == None # noqa | ||||
def test_evaluation_branch(swh_config, requests_mock_datadir): | def test_evaluation_branch(swh_storage, requests_mock_datadir): | ||||
loader = NixGuixLoader(sources_url) | loader = NixGuixLoader(swh_storage, sources_url) | ||||
res = loader.load() | res = loader.load() | ||||
assert res["status"] == "eventful" | assert res["status"] == "eventful" | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
loader.storage, | swh_storage, | ||||
sources_url, | sources_url, | ||||
status="partial", | status="partial", | ||||
type="nixguix", | type="nixguix", | ||||
snapshot=SNAPSHOT1.id, | snapshot=SNAPSHOT1.id, | ||||
) | ) | ||||
check_snapshot(SNAPSHOT1, storage=loader.storage) | check_snapshot(SNAPSHOT1, storage=swh_storage) | ||||
def test_eoferror(swh_config, requests_mock_datadir): | def test_eoferror(swh_storage, requests_mock_datadir): | ||||
"""Load a truncated archive which is invalid to make the uncompress | """Load a truncated archive which is invalid to make the uncompress | ||||
function raising the exception EOFError. We then check if a | function raising the exception EOFError. We then check if a | ||||
snapshot is created, meaning this error is well managed. | snapshot is created, meaning this error is well managed. | ||||
""" | """ | ||||
sources = ( | sources = ( | ||||
"https://nix-community.github.io/nixpkgs-swh/sources-EOFError.json" # noqa | "https://nix-community.github.io/nixpkgs-swh/sources-EOFError.json" # noqa | ||||
) | ) | ||||
loader = NixGuixLoader(sources) | loader = NixGuixLoader(swh_storage, sources) | ||||
loader.load() | loader.load() | ||||
expected_snapshot = Snapshot( | expected_snapshot = Snapshot( | ||||
id=hash_to_bytes("4257fa2350168c6bfec726a06452ea27a2c0cb33"), | id=hash_to_bytes("4257fa2350168c6bfec726a06452ea27a2c0cb33"), | ||||
branches={ | branches={ | ||||
b"evaluation": SnapshotBranch( | b"evaluation": SnapshotBranch( | ||||
target=hash_to_bytes("cc4e04c26672dd74e5fd0fecb78b435fb55368f7"), | target=hash_to_bytes("cc4e04c26672dd74e5fd0fecb78b435fb55368f7"), | ||||
target_type=TargetType.REVISION, | target_type=TargetType.REVISION, | ||||
), | ), | ||||
}, | }, | ||||
) | ) | ||||
check_snapshot(expected_snapshot, storage=loader.storage) | check_snapshot(expected_snapshot, storage=swh_storage) | ||||
def fake_download( | def fake_download( | ||||
url: str, | url: str, | ||||
dest: str, | dest: str, | ||||
hashes: Dict = {}, | hashes: Dict = {}, | ||||
filename: Optional[str] = None, | filename: Optional[str] = None, | ||||
auth: Optional[Tuple[str, str]] = None, | auth: Optional[Tuple[str, str]] = None, | ||||
) -> Tuple[str, Dict]: | ) -> Tuple[str, Dict]: | ||||
"""Fake download which raises HashCollision (for the sake of test simpliciy, | """Fake download which raises HashCollision (for the sake of test simpliciy, | ||||
let's accept that makes sense) | let's accept that makes sense) | ||||
For tests purpose only. | For tests purpose only. | ||||
""" | """ | ||||
if url == "https://example.com/file.txt": | if url == "https://example.com/file.txt": | ||||
# instead of failing because it's a file not dealt with by the nix guix | # instead of failing because it's a file not dealt with by the nix guix | ||||
# loader, make it raise a hash collision | # loader, make it raise a hash collision | ||||
raise HashCollision("sha1", "f92d74e3874587aaf443d1db961d4e26dde13e9c", []) | raise HashCollision("sha1", "f92d74e3874587aaf443d1db961d4e26dde13e9c", []) | ||||
return download(url, dest, hashes, filename, auth) | return download(url, dest, hashes, filename, auth) | ||||
def test_raise_exception(swh_config, requests_mock_datadir, mocker): | def test_raise_exception(swh_storage, requests_mock_datadir, mocker): | ||||
mock_download = mocker.patch("swh.loader.package.loader.download") | mock_download = mocker.patch("swh.loader.package.loader.download") | ||||
mock_download.side_effect = fake_download | mock_download.side_effect = fake_download | ||||
loader = NixGuixLoader(sources_url) | loader = NixGuixLoader(swh_storage, sources_url) | ||||
res = loader.load() | res = loader.load() | ||||
assert res == { | assert res == { | ||||
"status": "eventful", | "status": "eventful", | ||||
"snapshot_id": SNAPSHOT1.id.hex(), | "snapshot_id": SNAPSHOT1.id.hex(), | ||||
} | } | ||||
check_snapshot(SNAPSHOT1, storage=loader.storage) | check_snapshot(SNAPSHOT1, storage=swh_storage) | ||||
assert len(mock_download.mock_calls) == 2 | assert len(mock_download.mock_calls) == 2 | ||||
# The visit is partial because some artifact downloads failed | # The visit is partial because some artifact downloads failed | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
loader.storage, sources_url, status="partial", type="nixguix" | swh_storage, sources_url, status="partial", type="nixguix" | ||||
) | ) | ||||
def test_load_nixguix_one_common_artifact_from_other_loader( | def test_load_nixguix_one_common_artifact_from_other_loader( | ||||
swh_config, datadir, requests_mock_datadir_visits, caplog | swh_storage, datadir, requests_mock_datadir_visits, caplog | ||||
): | ): | ||||
"""Misformatted revision should be caught and logged, then loading continues | """Misformatted revision should be caught and logged, then loading continues | ||||
""" | """ | ||||
caplog.set_level(logging.ERROR, "swh.loader.package.nixguix.loader") | caplog.set_level(logging.ERROR, "swh.loader.package.nixguix.loader") | ||||
# 1. first ingest with for example the archive loader | # 1. first ingest with for example the archive loader | ||||
gnu_url = "https://ftp.gnu.org/gnu/8sync/" | gnu_url = "https://ftp.gnu.org/gnu/8sync/" | ||||
release = "0.1.0" | release = "0.1.0" | ||||
artifact_url = f"https://ftp.gnu.org/gnu/8sync/8sync-{release}.tar.gz" | artifact_url = f"https://ftp.gnu.org/gnu/8sync/8sync-{release}.tar.gz" | ||||
gnu_artifacts = [ | gnu_artifacts = [ | ||||
{ | { | ||||
"time": 944729610, | "time": 944729610, | ||||
"url": artifact_url, | "url": artifact_url, | ||||
"length": 221837, | "length": 221837, | ||||
"filename": f"8sync-{release}.tar.gz", | "filename": f"8sync-{release}.tar.gz", | ||||
"version": release, | "version": release, | ||||
} | } | ||||
] | ] | ||||
archive_loader = ArchiveLoader(url=gnu_url, artifacts=gnu_artifacts) | archive_loader = ArchiveLoader(swh_storage, url=gnu_url, artifacts=gnu_artifacts) | ||||
actual_load_status = archive_loader.load() | actual_load_status = archive_loader.load() | ||||
expected_snapshot_id = "c419397fd912039825ebdbea378bc6283f006bf5" | expected_snapshot_id = "c419397fd912039825ebdbea378bc6283f006bf5" | ||||
assert actual_load_status["status"] == "eventful" | assert actual_load_status["status"] == "eventful" | ||||
assert actual_load_status["snapshot_id"] == expected_snapshot_id # noqa | assert actual_load_status["snapshot_id"] == expected_snapshot_id # noqa | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
archive_loader.storage, gnu_url, status="full", type="tar" | archive_loader.storage, gnu_url, status="full", type="tar" | ||||
) | ) | ||||
Show All 17 Lines | for source in all_sources["sources"]: | ||||
if source["urls"][0] == artifact_url: | if source["urls"][0] == artifact_url: | ||||
found = True | found = True | ||||
assert ( | assert ( | ||||
found is True | found is True | ||||
), f"test setup error: {artifact_url} must be in {data_sources}" | ), f"test setup error: {artifact_url} must be in {data_sources}" | ||||
# first visit with a snapshot, ok | # first visit with a snapshot, ok | ||||
sources_url = "https://nix-community.github.io/nixpkgs-swh/sources_special.json" | sources_url = "https://nix-community.github.io/nixpkgs-swh/sources_special.json" | ||||
loader = NixGuixLoader(sources_url) | loader = NixGuixLoader(swh_storage, sources_url) | ||||
actual_load_status2 = loader.load() | actual_load_status2 = loader.load() | ||||
assert actual_load_status2["status"] == "eventful" | assert actual_load_status2["status"] == "eventful" | ||||
assert_last_visit_matches( | assert_last_visit_matches(swh_storage, sources_url, status="full", type="nixguix") | ||||
loader.storage, sources_url, status="full", type="nixguix" | |||||
) | |||||
snapshot_id = actual_load_status2["snapshot_id"] | snapshot_id = actual_load_status2["snapshot_id"] | ||||
snapshot = snapshot_get_all_branches(loader.storage, hash_to_bytes(snapshot_id)) | snapshot = snapshot_get_all_branches(swh_storage, hash_to_bytes(snapshot_id)) | ||||
assert snapshot | assert snapshot | ||||
# simulate a snapshot already seen with a revision with the wrong metadata structure | # simulate a snapshot already seen with a revision with the wrong metadata structure | ||||
# This revision should be skipped, thus making the artifact being ingested again. | # This revision should be skipped, thus making the artifact being ingested again. | ||||
with patch( | with patch( | ||||
"swh.loader.package.loader.PackageLoader.last_snapshot" | "swh.loader.package.loader.PackageLoader.last_snapshot" | ||||
) as last_snapshot: | ) as last_snapshot: | ||||
# mutate the snapshot to target a revision with the wrong metadata structure | # mutate the snapshot to target a revision with the wrong metadata structure | ||||
# snapshot["branches"][artifact_url.encode("utf-8")] = first_revision | # snapshot["branches"][artifact_url.encode("utf-8")] = first_revision | ||||
old_revision = loader.storage.revision_get([first_revision.target])[0] | old_revision = swh_storage.revision_get([first_revision.target])[0] | ||||
# assert that revision is not in the right format | # assert that revision is not in the right format | ||||
assert old_revision.metadata["extrinsic"]["raw"].get("integrity", {}) == {} | assert old_revision.metadata["extrinsic"]["raw"].get("integrity", {}) == {} | ||||
# mutate snapshot to create a clash | # mutate snapshot to create a clash | ||||
snapshot = attr.evolve( | snapshot = attr.evolve( | ||||
snapshot, | snapshot, | ||||
branches={ | branches={ | ||||
**snapshot.branches, | **snapshot.branches, | ||||
artifact_url.encode("utf-8"): SnapshotBranch( | artifact_url.encode("utf-8"): SnapshotBranch( | ||||
target_type=TargetType.REVISION, | target_type=TargetType.REVISION, | ||||
target=hash_to_bytes(old_revision.id), | target=hash_to_bytes(old_revision.id), | ||||
), | ), | ||||
}, | }, | ||||
) | ) | ||||
# modify snapshot to actually change revision metadata structure so we simulate | # modify snapshot to actually change revision metadata structure so we simulate | ||||
# a revision written by somebody else (structure different) | # a revision written by somebody else (structure different) | ||||
last_snapshot.return_value = snapshot | last_snapshot.return_value = snapshot | ||||
loader = NixGuixLoader(sources_url) | loader = NixGuixLoader(swh_storage, sources_url) | ||||
actual_load_status3 = loader.load() | actual_load_status3 = loader.load() | ||||
assert last_snapshot.called | assert last_snapshot.called | ||||
assert actual_load_status3["status"] == "eventful" | assert actual_load_status3["status"] == "eventful" | ||||
assert_last_visit_matches( | assert_last_visit_matches( | ||||
loader.storage, sources_url, status="full", type="nixguix" | swh_storage, sources_url, status="full", type="nixguix" | ||||
) | ) | ||||
new_snapshot_id = "32ff641e510aceefc3a6d0dcbf208b2854d2e965" | new_snapshot_id = "32ff641e510aceefc3a6d0dcbf208b2854d2e965" | ||||
assert actual_load_status3["snapshot_id"] == new_snapshot_id | assert actual_load_status3["snapshot_id"] == new_snapshot_id | ||||
last_snapshot = snapshot_get_all_branches( | last_snapshot = snapshot_get_all_branches( | ||||
loader.storage, hash_to_bytes(new_snapshot_id) | swh_storage, hash_to_bytes(new_snapshot_id) | ||||
) | ) | ||||
new_revision_branch = last_snapshot.branches[artifact_url.encode("utf-8")] | new_revision_branch = last_snapshot.branches[artifact_url.encode("utf-8")] | ||||
assert new_revision_branch.target_type == TargetType.REVISION | assert new_revision_branch.target_type == TargetType.REVISION | ||||
new_revision = loader.storage.revision_get([new_revision_branch.target])[0] | new_revision = swh_storage.revision_get([new_revision_branch.target])[0] | ||||
# the new revision has the correct structure, so it got ingested alright by the | # the new revision has the correct structure, so it got ingested alright by the | ||||
# new run | # new run | ||||
assert new_revision.metadata["extrinsic"]["raw"]["integrity"] is not None | assert new_revision.metadata["extrinsic"]["raw"]["integrity"] is not None | ||||
nb_detections = 0 | nb_detections = 0 | ||||
actual_detection: Dict | actual_detection: Dict | ||||
for record in caplog.records: | for record in caplog.records: | ||||
Show All 14 Lines |