Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/debian/tests/test_debian.py
# Copyright (C) 2019-2020 The Software Heritage developers | # Copyright (C) 2019-2021 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import logging | import logging | ||||
from os import path | from os import path | ||||
import random | import random | ||||
▲ Show 20 Lines • Show All 84 Lines • ▼ Show 20 Lines | |||||
PACKAGES_PER_VERSION = { | PACKAGES_PER_VERSION = { | ||||
"stretch/contrib/0.7.2-3": PACKAGE_FILES, | "stretch/contrib/0.7.2-3": PACKAGE_FILES, | ||||
"buster/contrib/0.7.2-4": PACKAGE_FILES2, | "buster/contrib/0.7.2-4": PACKAGE_FILES2, | ||||
} | } | ||||
def test_debian_first_visit(swh_config, requests_mock_datadir): | def test_debian_first_visit(swh_storage, requests_mock_datadir): | ||||
"""With no prior visit, load a gnu project ends up with 1 snapshot | """With no prior visit, load a gnu project ends up with 1 snapshot | ||||
""" | """ | ||||
loader = DebianLoader( | loader = DebianLoader( | ||||
url=URL, date="2019-10-12T05:58:09.165557+00:00", packages=PACKAGE_PER_VERSION, | swh_storage, | ||||
URL, | |||||
date="2019-10-12T05:58:09.165557+00:00", | |||||
packages=PACKAGE_PER_VERSION, | |||||
) | ) | ||||
actual_load_status = loader.load() | actual_load_status = loader.load() | ||||
expected_snapshot_id = "3b6b66e6ee4e7d903a379a882684a2a50480c0b4" | expected_snapshot_id = "3b6b66e6ee4e7d903a379a882684a2a50480c0b4" | ||||
assert actual_load_status == { | assert actual_load_status == { | ||||
"status": "eventful", | "status": "eventful", | ||||
"snapshot_id": expected_snapshot_id, | "snapshot_id": expected_snapshot_id, | ||||
} | } | ||||
assert_last_visit_matches(loader.storage, URL, status="full", type="deb") | assert_last_visit_matches(swh_storage, URL, status="full", type="deb") | ||||
stats = get_stats(loader.storage) | stats = get_stats(swh_storage) | ||||
assert { | assert { | ||||
"content": 42, | "content": 42, | ||||
"directory": 2, | "directory": 2, | ||||
"origin": 1, | "origin": 1, | ||||
"origin_visit": 1, | "origin_visit": 1, | ||||
"release": 0, | "release": 0, | ||||
"revision": 1, # all artifacts under 1 revision | "revision": 1, # all artifacts under 1 revision | ||||
"skipped_content": 0, | "skipped_content": 0, | ||||
"snapshot": 1, | "snapshot": 1, | ||||
} == stats | } == stats | ||||
expected_snapshot = Snapshot( | expected_snapshot = Snapshot( | ||||
id=hash_to_bytes(expected_snapshot_id), | id=hash_to_bytes(expected_snapshot_id), | ||||
branches={ | branches={ | ||||
b"releases/stretch/contrib/0.7.2-3": SnapshotBranch( | b"releases/stretch/contrib/0.7.2-3": SnapshotBranch( | ||||
target_type=TargetType.REVISION, | target_type=TargetType.REVISION, | ||||
target=hash_to_bytes("2807f5b3f84368b4889a9ae827fe85854ffecf07"), | target=hash_to_bytes("2807f5b3f84368b4889a9ae827fe85854ffecf07"), | ||||
) | ) | ||||
}, | }, | ||||
) # different than the previous loader as no release is done | ) # different than the previous loader as no release is done | ||||
check_snapshot(expected_snapshot, loader.storage) | check_snapshot(expected_snapshot, swh_storage) | ||||
def test_debian_first_visit_then_another_visit(swh_config, requests_mock_datadir): | def test_debian_first_visit_then_another_visit(swh_storage, requests_mock_datadir): | ||||
"""With no prior visit, load a debian project ends up with 1 snapshot | """With no prior visit, load a debian project ends up with 1 snapshot | ||||
""" | """ | ||||
loader = DebianLoader( | loader = DebianLoader( | ||||
url=URL, date="2019-10-12T05:58:09.165557+00:00", packages=PACKAGE_PER_VERSION | swh_storage, | ||||
URL, | |||||
date="2019-10-12T05:58:09.165557+00:00", | |||||
packages=PACKAGE_PER_VERSION, | |||||
) | ) | ||||
actual_load_status = loader.load() | actual_load_status = loader.load() | ||||
expected_snapshot_id = "3b6b66e6ee4e7d903a379a882684a2a50480c0b4" | expected_snapshot_id = "3b6b66e6ee4e7d903a379a882684a2a50480c0b4" | ||||
assert actual_load_status == { | assert actual_load_status == { | ||||
"status": "eventful", | "status": "eventful", | ||||
"snapshot_id": expected_snapshot_id, | "snapshot_id": expected_snapshot_id, | ||||
} | } | ||||
assert_last_visit_matches(loader.storage, URL, status="full", type="deb") | assert_last_visit_matches(swh_storage, URL, status="full", type="deb") | ||||
stats = get_stats(loader.storage) | stats = get_stats(swh_storage) | ||||
assert { | assert { | ||||
"content": 42, | "content": 42, | ||||
"directory": 2, | "directory": 2, | ||||
"origin": 1, | "origin": 1, | ||||
"origin_visit": 1, | "origin_visit": 1, | ||||
"release": 0, | "release": 0, | ||||
"revision": 1, # all artifacts under 1 revision | "revision": 1, # all artifacts under 1 revision | ||||
"skipped_content": 0, | "skipped_content": 0, | ||||
"snapshot": 1, | "snapshot": 1, | ||||
} == stats | } == stats | ||||
expected_snapshot = Snapshot( | expected_snapshot = Snapshot( | ||||
id=hash_to_bytes(expected_snapshot_id), | id=hash_to_bytes(expected_snapshot_id), | ||||
branches={ | branches={ | ||||
b"releases/stretch/contrib/0.7.2-3": SnapshotBranch( | b"releases/stretch/contrib/0.7.2-3": SnapshotBranch( | ||||
target_type=TargetType.REVISION, | target_type=TargetType.REVISION, | ||||
target=hash_to_bytes("2807f5b3f84368b4889a9ae827fe85854ffecf07"), | target=hash_to_bytes("2807f5b3f84368b4889a9ae827fe85854ffecf07"), | ||||
) | ) | ||||
}, | }, | ||||
) # different than the previous loader as no release is done | ) # different than the previous loader as no release is done | ||||
check_snapshot(expected_snapshot, loader.storage) | check_snapshot(expected_snapshot, swh_storage) | ||||
# No change in between load | # No change in between load | ||||
actual_load_status2 = loader.load() | actual_load_status2 = loader.load() | ||||
assert actual_load_status2["status"] == "uneventful" | assert actual_load_status2["status"] == "uneventful" | ||||
assert_last_visit_matches(loader.storage, URL, status="full", type="deb") | assert_last_visit_matches(swh_storage, URL, status="full", type="deb") | ||||
stats2 = get_stats(loader.storage) | stats2 = get_stats(swh_storage) | ||||
assert { | assert { | ||||
"content": 42 + 0, | "content": 42 + 0, | ||||
"directory": 2 + 0, | "directory": 2 + 0, | ||||
"origin": 1, | "origin": 1, | ||||
"origin_visit": 1 + 1, # a new visit occurred | "origin_visit": 1 + 1, # a new visit occurred | ||||
"release": 0, | "release": 0, | ||||
"revision": 1, | "revision": 1, | ||||
"skipped_content": 0, | "skipped_content": 0, | ||||
"snapshot": 1, # same snapshot across 2 visits | "snapshot": 1, # same snapshot across 2 visits | ||||
} == stats2 | } == stats2 | ||||
urls = [ | urls = [ | ||||
m.url | m.url | ||||
for m in requests_mock_datadir.request_history | for m in requests_mock_datadir.request_history | ||||
if m.url.startswith("http://deb.debian.org") | if m.url.startswith("http://deb.debian.org") | ||||
] | ] | ||||
# visited each package artifact twice across 2 visits | # visited each package artifact twice across 2 visits | ||||
assert len(urls) == len(set(urls)) | assert len(urls) == len(set(urls)) | ||||
def test_uid_to_person(): | def test_debian_uid_to_person(): | ||||
uid = "Someone Name <someone@orga.org>" | uid = "Someone Name <someone@orga.org>" | ||||
actual_person = uid_to_person(uid) | actual_person = uid_to_person(uid) | ||||
assert actual_person == { | assert actual_person == { | ||||
"name": "Someone Name", | "name": "Someone Name", | ||||
"email": "someone@orga.org", | "email": "someone@orga.org", | ||||
"fullname": uid, | "fullname": uid, | ||||
} | } | ||||
def test_prepare_person(): | def test_debian_prepare_person(): | ||||
actual_author = prepare_person( | actual_author = prepare_person( | ||||
{ | { | ||||
"name": "Someone Name", | "name": "Someone Name", | ||||
"email": "someone@orga.org", | "email": "someone@orga.org", | ||||
"fullname": "Someone Name <someone@orga.org>", | "fullname": "Someone Name <someone@orga.org>", | ||||
} | } | ||||
) | ) | ||||
assert actual_author == Person( | assert actual_author == Person( | ||||
name=b"Someone Name", | name=b"Someone Name", | ||||
email=b"someone@orga.org", | email=b"someone@orga.org", | ||||
fullname=b"Someone Name <someone@orga.org>", | fullname=b"Someone Name <someone@orga.org>", | ||||
) | ) | ||||
def test_download_package(datadir, tmpdir, requests_mock_datadir): | def test_debian_download_package(datadir, tmpdir, requests_mock_datadir): | ||||
tmpdir = str(tmpdir) # py3.5 work around (LocalPath issue) | tmpdir = str(tmpdir) # py3.5 work around (LocalPath issue) | ||||
p_info = DebianPackageInfo.from_metadata(PACKAGE_FILES, url=URL) | p_info = DebianPackageInfo.from_metadata(PACKAGE_FILES, url=URL) | ||||
all_hashes = download_package(p_info, tmpdir) | all_hashes = download_package(p_info, tmpdir) | ||||
assert all_hashes == { | assert all_hashes == { | ||||
"cicero_0.7.2-3.diff.gz": { | "cicero_0.7.2-3.diff.gz": { | ||||
"checksums": { | "checksums": { | ||||
"sha1": "0815282053f21601b0ec4adf7a8fe47eace3c0bc", | "sha1": "0815282053f21601b0ec4adf7a8fe47eace3c0bc", | ||||
"sha256": "f039c9642fe15c75bed5254315e2a29f9f2700da0e29d9b0729b3ffc46c8971c", # noqa | "sha256": "f039c9642fe15c75bed5254315e2a29f9f2700da0e29d9b0729b3ffc46c8971c", # noqa | ||||
Show All 26 Lines | assert all_hashes == { | ||||
"url": ( | "url": ( | ||||
"http://deb.debian.org/debian/pool/contrib/c/cicero/" | "http://deb.debian.org/debian/pool/contrib/c/cicero/" | ||||
"cicero_0.7.2.orig.tar.gz" | "cicero_0.7.2.orig.tar.gz" | ||||
), | ), | ||||
}, | }, | ||||
} | } | ||||
def test_dsc_information_ok(): | def test_debian_dsc_information_ok(): | ||||
fname = "cicero_0.7.2-3.dsc" | fname = "cicero_0.7.2-3.dsc" | ||||
p_info = DebianPackageInfo.from_metadata(PACKAGE_FILES, url=URL) | p_info = DebianPackageInfo.from_metadata(PACKAGE_FILES, url=URL) | ||||
dsc_url, dsc_name = dsc_information(p_info) | dsc_url, dsc_name = dsc_information(p_info) | ||||
assert dsc_url == PACKAGE_FILES["files"][fname]["uri"] | assert dsc_url == PACKAGE_FILES["files"][fname]["uri"] | ||||
assert dsc_name == PACKAGE_FILES["files"][fname]["name"] | assert dsc_name == PACKAGE_FILES["files"][fname]["name"] | ||||
def test_dsc_information_not_found(): | def test_debian_dsc_information_not_found(): | ||||
fname = "cicero_0.7.2-3.dsc" | fname = "cicero_0.7.2-3.dsc" | ||||
p_info = DebianPackageInfo.from_metadata(PACKAGE_FILES, url=URL) | p_info = DebianPackageInfo.from_metadata(PACKAGE_FILES, url=URL) | ||||
p_info.files.pop(fname) | p_info.files.pop(fname) | ||||
dsc_url, dsc_name = dsc_information(p_info) | dsc_url, dsc_name = dsc_information(p_info) | ||||
assert dsc_url is None | assert dsc_url is None | ||||
assert dsc_name is None | assert dsc_name is None | ||||
def test_dsc_information_too_many_dsc_entries(): | def test_debian_dsc_information_too_many_dsc_entries(): | ||||
# craft an extra dsc file | # craft an extra dsc file | ||||
fname = "cicero_0.7.2-3.dsc" | fname = "cicero_0.7.2-3.dsc" | ||||
p_info = DebianPackageInfo.from_metadata(PACKAGE_FILES, url=URL) | p_info = DebianPackageInfo.from_metadata(PACKAGE_FILES, url=URL) | ||||
data = p_info.files[fname] | data = p_info.files[fname] | ||||
fname2 = fname.replace("cicero", "ciceroo") | fname2 = fname.replace("cicero", "ciceroo") | ||||
p_info.files[fname2] = data | p_info.files[fname2] = data | ||||
with pytest.raises( | with pytest.raises( | ||||
ValueError, | ValueError, | ||||
match="Package %s_%s references several dsc" | match="Package %s_%s references several dsc" | ||||
% (PACKAGE_FILES["name"], PACKAGE_FILES["version"]), | % (PACKAGE_FILES["name"], PACKAGE_FILES["version"]), | ||||
): | ): | ||||
dsc_information(p_info) | dsc_information(p_info) | ||||
def test_get_intrinsic_package_metadata(requests_mock_datadir, datadir, tmp_path): | def test_debian_get_intrinsic_package_metadata( | ||||
requests_mock_datadir, datadir, tmp_path | |||||
): | |||||
tmp_path = str(tmp_path) # py3.5 compat. | tmp_path = str(tmp_path) # py3.5 compat. | ||||
p_info = DebianPackageInfo.from_metadata(PACKAGE_FILES, url=URL) | p_info = DebianPackageInfo.from_metadata(PACKAGE_FILES, url=URL) | ||||
logger.debug("p_info: %s", p_info) | logger.debug("p_info: %s", p_info) | ||||
# download the packages | # download the packages | ||||
all_hashes = download_package(p_info, tmp_path) | all_hashes = download_package(p_info, tmp_path) | ||||
Show All 40 Lines | assert actual_package_info == IntrinsicPackageMetadata( | ||||
"name": "Samuel Thibault", | "name": "Samuel Thibault", | ||||
}, | }, | ||||
], | ], | ||||
name="cicero", | name="cicero", | ||||
version="0.7.2-3", | version="0.7.2-3", | ||||
) | ) | ||||
def test_debian_multiple_packages(swh_config, requests_mock_datadir): | def test_debian_multiple_packages(swh_storage, requests_mock_datadir): | ||||
loader = DebianLoader( | loader = DebianLoader( | ||||
url=URL, date="2019-10-12T05:58:09.165557+00:00", packages=PACKAGES_PER_VERSION | swh_storage, | ||||
URL, | |||||
date="2019-10-12T05:58:09.165557+00:00", | |||||
packages=PACKAGES_PER_VERSION, | |||||
) | ) | ||||
actual_load_status = loader.load() | actual_load_status = loader.load() | ||||
expected_snapshot_id = "defc19021187f3727293121fcf6c5c82cb923604" | expected_snapshot_id = "defc19021187f3727293121fcf6c5c82cb923604" | ||||
assert actual_load_status == { | assert actual_load_status == { | ||||
"status": "eventful", | "status": "eventful", | ||||
"snapshot_id": expected_snapshot_id, | "snapshot_id": expected_snapshot_id, | ||||
} | } | ||||
assert_last_visit_matches(loader.storage, URL, status="full", type="deb") | assert_last_visit_matches(swh_storage, URL, status="full", type="deb") | ||||
expected_snapshot = Snapshot( | expected_snapshot = Snapshot( | ||||
id=hash_to_bytes(expected_snapshot_id), | id=hash_to_bytes(expected_snapshot_id), | ||||
branches={ | branches={ | ||||
b"releases/stretch/contrib/0.7.2-3": SnapshotBranch( | b"releases/stretch/contrib/0.7.2-3": SnapshotBranch( | ||||
target_type=TargetType.REVISION, | target_type=TargetType.REVISION, | ||||
target=hash_to_bytes("2807f5b3f84368b4889a9ae827fe85854ffecf07"), | target=hash_to_bytes("2807f5b3f84368b4889a9ae827fe85854ffecf07"), | ||||
), | ), | ||||
b"releases/buster/contrib/0.7.2-4": SnapshotBranch( | b"releases/buster/contrib/0.7.2-4": SnapshotBranch( | ||||
target_type=TargetType.REVISION, | target_type=TargetType.REVISION, | ||||
target=hash_to_bytes("8224139c274c984147ef4b09aa0e462c55a10bd3"), | target=hash_to_bytes("8224139c274c984147ef4b09aa0e462c55a10bd3"), | ||||
), | ), | ||||
}, | }, | ||||
) | ) | ||||
check_snapshot(expected_snapshot, loader.storage) | check_snapshot(expected_snapshot, swh_storage) | ||||
def test_resolve_revision_from_edge_cases(): | def test_debian_resolve_revision_from_edge_cases(): | ||||
"""Solving revision with empty data will result in unknown revision | """Solving revision with empty data will result in unknown revision | ||||
""" | """ | ||||
empty_artifact = { | empty_artifact = { | ||||
"name": PACKAGE_FILES["name"], | "name": PACKAGE_FILES["name"], | ||||
"version": PACKAGE_FILES["version"], | "version": PACKAGE_FILES["version"], | ||||
} | } | ||||
for package_artifacts in [empty_artifact, PACKAGE_FILES]: | for package_artifacts in [empty_artifact, PACKAGE_FILES]: | ||||
Show All 15 Lines | known_package_artifacts = { | ||||
# ... removed the unnecessary intermediary data | # ... removed the unnecessary intermediary data | ||||
} | } | ||||
} | } | ||||
assert not resolve_revision_from( | assert not resolve_revision_from( | ||||
known_package_artifacts, DebianPackageInfo.from_metadata(PACKAGE_FILES, url=URL) | known_package_artifacts, DebianPackageInfo.from_metadata(PACKAGE_FILES, url=URL) | ||||
) | ) | ||||
def test_resolve_revision_from_edge_cases_hit_and_miss(): | def test_debian_resolve_revision_from_edge_cases_hit_and_miss(): | ||||
"""Solving revision with inconsistent data will result in unknown revision | """Solving revision with inconsistent data will result in unknown revision | ||||
""" | """ | ||||
artifact_metadata = PACKAGE_FILES2 | artifact_metadata = PACKAGE_FILES2 | ||||
p_info = DebianPackageInfo.from_metadata(artifact_metadata, url=URL) | p_info = DebianPackageInfo.from_metadata(artifact_metadata, url=URL) | ||||
expected_revision_id = ( | expected_revision_id = ( | ||||
b"(\x08\xf5\xb3\xf8Ch\xb4\x88\x9a\x9a\xe8'\xff\x85\x85O\xfe\xcf\x07" # noqa | b"(\x08\xf5\xb3\xf8Ch\xb4\x88\x9a\x9a\xe8'\xff\x85\x85O\xfe\xcf\x07" # noqa | ||||
) | ) | ||||
known_package_artifacts = { | known_package_artifacts = { | ||||
expected_revision_id: { | expected_revision_id: { | ||||
"extrinsic": {"raw": PACKAGE_FILES,}, | "extrinsic": {"raw": PACKAGE_FILES,}, | ||||
# ... removed the unnecessary intermediary data | # ... removed the unnecessary intermediary data | ||||
} | } | ||||
} | } | ||||
actual_revision = resolve_revision_from(known_package_artifacts, p_info) | actual_revision = resolve_revision_from(known_package_artifacts, p_info) | ||||
assert actual_revision is None | assert actual_revision is None | ||||
def test_resolve_revision_from(): | def test_debian_resolve_revision_from(): | ||||
"""Solving revision with consistent data will solve the revision | """Solving revision with consistent data will solve the revision | ||||
""" | """ | ||||
artifact_metadata = PACKAGE_FILES | artifact_metadata = PACKAGE_FILES | ||||
p_info = DebianPackageInfo.from_metadata(artifact_metadata, url=URL) | p_info = DebianPackageInfo.from_metadata(artifact_metadata, url=URL) | ||||
expected_revision_id = ( | expected_revision_id = ( | ||||
b"(\x07\xf5\xb3\xf8Ch\xb4\x88\x9a\x9a\xe8'\xfe\x85\x85O\xfe\xcf\x07" # noqa | b"(\x07\xf5\xb3\xf8Ch\xb4\x88\x9a\x9a\xe8'\xfe\x85\x85O\xfe\xcf\x07" # noqa | ||||
) | ) | ||||
Show All 21 Lines |