Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/debian/tests/test_debian.py
# Copyright (C) 2019-2020 The Software Heritage developers | # Copyright (C) 2019-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import copy | |||||
import logging | import logging | ||||
import pytest | import pytest | ||||
import random | import random | ||||
from os import path | from os import path | ||||
from swh.loader.package.debian.loader import ( | from swh.loader.package.debian.loader import ( | ||||
DebianLoader, | DebianLoader, | ||||
DebianPackageInfo, | |||||
DebianPackageChangelog, | |||||
IntrinsicPackageMetadata, | |||||
download_package, | download_package, | ||||
dsc_information, | dsc_information, | ||||
uid_to_person, | uid_to_person, | ||||
prepare_person, | prepare_person, | ||||
get_package_metadata, | get_intrinsic_package_metadata, | ||||
extract_package, | extract_package, | ||||
) | ) | ||||
from swh.loader.tests import ( | from swh.loader.tests import ( | ||||
assert_last_visit_matches, | assert_last_visit_matches, | ||||
check_snapshot, | check_snapshot, | ||||
get_stats, | get_stats, | ||||
) | ) | ||||
from swh.loader.package.debian.loader import resolve_revision_from | from swh.loader.package.debian.loader import resolve_revision_from | ||||
from swh.model.hashutil import hash_to_bytes | from swh.model.hashutil import hash_to_bytes | ||||
from swh.model.model import Person, Snapshot, SnapshotBranch, TargetType | from swh.model.model import Person, Snapshot, SnapshotBranch, TargetType | ||||
logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||
URL = "deb://Debian/packages/cicero" | |||||
PACKAGE_FILES = { | PACKAGE_FILES = { | ||||
"name": "cicero", | "name": "cicero", | ||||
"version": "0.7.2-3", | "version": "0.7.2-3", | ||||
"files": { | "files": { | ||||
"cicero_0.7.2-3.diff.gz": { | "cicero_0.7.2-3.diff.gz": { | ||||
"md5sum": "a93661b6a48db48d59ba7d26796fc9ce", | "md5sum": "a93661b6a48db48d59ba7d26796fc9ce", | ||||
"name": "cicero_0.7.2-3.diff.gz", | "name": "cicero_0.7.2-3.diff.gz", | ||||
"sha256": "f039c9642fe15c75bed5254315e2a29f9f2700da0e29d9b0729b3ffc46c8971c", # noqa | "sha256": "f039c9642fe15c75bed5254315e2a29f9f2700da0e29d9b0729b3ffc46c8971c", # noqa | ||||
▲ Show 20 Lines • Show All 56 Lines • ▼ Show 20 Lines | PACKAGES_PER_VERSION = { | ||||
"buster/contrib/0.7.2-4": PACKAGE_FILES2, | "buster/contrib/0.7.2-4": PACKAGE_FILES2, | ||||
} | } | ||||
def test_debian_first_visit(swh_config, requests_mock_datadir): | def test_debian_first_visit(swh_config, requests_mock_datadir): | ||||
"""With no prior visit, load a gnu project ends up with 1 snapshot | """With no prior visit, load a gnu project ends up with 1 snapshot | ||||
""" | """ | ||||
url = "deb://Debian/packages/cicero" | |||||
loader = DebianLoader( | loader = DebianLoader( | ||||
url=url, date="2019-10-12T05:58:09.165557+00:00", packages=PACKAGE_PER_VERSION, | url=URL, date="2019-10-12T05:58:09.165557+00:00", packages=PACKAGE_PER_VERSION, | ||||
) | ) | ||||
actual_load_status = loader.load() | actual_load_status = loader.load() | ||||
expected_snapshot_id = "3b6b66e6ee4e7d903a379a882684a2a50480c0b4" | expected_snapshot_id = "3b6b66e6ee4e7d903a379a882684a2a50480c0b4" | ||||
assert actual_load_status == { | assert actual_load_status == { | ||||
"status": "eventful", | "status": "eventful", | ||||
"snapshot_id": expected_snapshot_id, | "snapshot_id": expected_snapshot_id, | ||||
} | } | ||||
assert_last_visit_matches(loader.storage, url, status="full", type="deb") | assert_last_visit_matches(loader.storage, URL, status="full", type="deb") | ||||
stats = get_stats(loader.storage) | stats = get_stats(loader.storage) | ||||
assert { | assert { | ||||
"content": 42, | "content": 42, | ||||
"directory": 2, | "directory": 2, | ||||
"origin": 1, | "origin": 1, | ||||
"origin_visit": 1, | "origin_visit": 1, | ||||
"person": 1, | "person": 1, | ||||
Show All 15 Lines | def test_debian_first_visit(swh_config, requests_mock_datadir): | ||||
check_snapshot(expected_snapshot, loader.storage) | check_snapshot(expected_snapshot, loader.storage) | ||||
def test_debian_first_visit_then_another_visit(swh_config, requests_mock_datadir): | def test_debian_first_visit_then_another_visit(swh_config, requests_mock_datadir): | ||||
"""With no prior visit, load a debian project ends up with 1 snapshot | """With no prior visit, load a debian project ends up with 1 snapshot | ||||
""" | """ | ||||
url = "deb://Debian/packages/cicero" | |||||
loader = DebianLoader( | loader = DebianLoader( | ||||
url=url, date="2019-10-12T05:58:09.165557+00:00", packages=PACKAGE_PER_VERSION | url=URL, date="2019-10-12T05:58:09.165557+00:00", packages=PACKAGE_PER_VERSION | ||||
) | ) | ||||
actual_load_status = loader.load() | actual_load_status = loader.load() | ||||
expected_snapshot_id = "3b6b66e6ee4e7d903a379a882684a2a50480c0b4" | expected_snapshot_id = "3b6b66e6ee4e7d903a379a882684a2a50480c0b4" | ||||
assert actual_load_status == { | assert actual_load_status == { | ||||
"status": "eventful", | "status": "eventful", | ||||
"snapshot_id": expected_snapshot_id, | "snapshot_id": expected_snapshot_id, | ||||
} | } | ||||
assert_last_visit_matches(loader.storage, url, status="full", type="deb") | assert_last_visit_matches(loader.storage, URL, status="full", type="deb") | ||||
stats = get_stats(loader.storage) | stats = get_stats(loader.storage) | ||||
assert { | assert { | ||||
"content": 42, | "content": 42, | ||||
"directory": 2, | "directory": 2, | ||||
"origin": 1, | "origin": 1, | ||||
"origin_visit": 1, | "origin_visit": 1, | ||||
"person": 1, | "person": 1, | ||||
Show All 13 Lines | expected_snapshot = Snapshot( | ||||
}, | }, | ||||
) # different than the previous loader as no release is done | ) # different than the previous loader as no release is done | ||||
check_snapshot(expected_snapshot, loader.storage) | check_snapshot(expected_snapshot, loader.storage) | ||||
# No change in between load | # No change in between load | ||||
actual_load_status2 = loader.load() | actual_load_status2 = loader.load() | ||||
assert actual_load_status2["status"] == "uneventful" | assert actual_load_status2["status"] == "uneventful" | ||||
assert_last_visit_matches(loader.storage, url, status="full", type="deb") | assert_last_visit_matches(loader.storage, URL, status="full", type="deb") | ||||
stats2 = get_stats(loader.storage) | stats2 = get_stats(loader.storage) | ||||
assert { | assert { | ||||
"content": 42 + 0, | "content": 42 + 0, | ||||
"directory": 2 + 0, | "directory": 2 + 0, | ||||
"origin": 1, | "origin": 1, | ||||
"origin_visit": 1 + 1, # a new visit occurred | "origin_visit": 1 + 1, # a new visit occurred | ||||
"person": 1, | "person": 1, | ||||
Show All 36 Lines | assert actual_author == Person( | ||||
name=b"Someone Name", | name=b"Someone Name", | ||||
email=b"someone@orga.org", | email=b"someone@orga.org", | ||||
fullname=b"Someone Name <someone@orga.org>", | fullname=b"Someone Name <someone@orga.org>", | ||||
) | ) | ||||
def test_download_package(datadir, tmpdir, requests_mock_datadir): | def test_download_package(datadir, tmpdir, requests_mock_datadir): | ||||
tmpdir = str(tmpdir) # py3.5 work around (LocalPath issue) | tmpdir = str(tmpdir) # py3.5 work around (LocalPath issue) | ||||
all_hashes = download_package(PACKAGE_FILES, tmpdir) | p_info = DebianPackageInfo.from_metadata(PACKAGE_FILES, url=URL) | ||||
all_hashes = download_package(p_info, tmpdir) | |||||
assert all_hashes == { | assert all_hashes == { | ||||
"cicero_0.7.2-3.diff.gz": { | "cicero_0.7.2-3.diff.gz": { | ||||
"checksums": { | "checksums": { | ||||
"sha1": "0815282053f21601b0ec4adf7a8fe47eace3c0bc", | "sha1": "0815282053f21601b0ec4adf7a8fe47eace3c0bc", | ||||
"sha256": "f039c9642fe15c75bed5254315e2a29f9f2700da0e29d9b0729b3ffc46c8971c", # noqa | "sha256": "f039c9642fe15c75bed5254315e2a29f9f2700da0e29d9b0729b3ffc46c8971c", # noqa | ||||
}, | }, | ||||
"filename": "cicero_0.7.2-3.diff.gz", | "filename": "cicero_0.7.2-3.diff.gz", | ||||
"length": 3964, | "length": 3964, | ||||
Show All 14 Lines | assert all_hashes == { | ||||
"filename": "cicero_0.7.2.orig.tar.gz", | "filename": "cicero_0.7.2.orig.tar.gz", | ||||
"length": 96527, | "length": 96527, | ||||
}, | }, | ||||
} | } | ||||
def test_dsc_information_ok(): | def test_dsc_information_ok(): | ||||
fname = "cicero_0.7.2-3.dsc" | fname = "cicero_0.7.2-3.dsc" | ||||
dsc_url, dsc_name = dsc_information(PACKAGE_FILES) | p_info = DebianPackageInfo.from_metadata(PACKAGE_FILES, url=URL) | ||||
dsc_url, dsc_name = dsc_information(p_info) | |||||
assert dsc_url == PACKAGE_FILES["files"][fname]["uri"] | assert dsc_url == PACKAGE_FILES["files"][fname]["uri"] | ||||
assert dsc_name == PACKAGE_FILES["files"][fname]["name"] | assert dsc_name == PACKAGE_FILES["files"][fname]["name"] | ||||
def test_dsc_information_not_found(): | def test_dsc_information_not_found(): | ||||
fname = "cicero_0.7.2-3.dsc" | fname = "cicero_0.7.2-3.dsc" | ||||
package_files = copy.deepcopy(PACKAGE_FILES) | p_info = DebianPackageInfo.from_metadata(PACKAGE_FILES, url=URL) | ||||
package_files["files"].pop(fname) | p_info.files.pop(fname) | ||||
dsc_url, dsc_name = dsc_information(package_files) | dsc_url, dsc_name = dsc_information(p_info) | ||||
assert dsc_url is None | assert dsc_url is None | ||||
assert dsc_name is None | assert dsc_name is None | ||||
def test_dsc_information_too_many_dsc_entries(): | def test_dsc_information_too_many_dsc_entries(): | ||||
# craft an extra dsc file | # craft an extra dsc file | ||||
fname = "cicero_0.7.2-3.dsc" | fname = "cicero_0.7.2-3.dsc" | ||||
package_files = copy.deepcopy(PACKAGE_FILES) | p_info = DebianPackageInfo.from_metadata(PACKAGE_FILES, url=URL) | ||||
data = package_files["files"][fname] | data = p_info.files[fname] | ||||
fname2 = fname.replace("cicero", "ciceroo") | fname2 = fname.replace("cicero", "ciceroo") | ||||
package_files["files"][fname2] = data | p_info.files[fname2] = data | ||||
with pytest.raises( | with pytest.raises( | ||||
ValueError, | ValueError, | ||||
match="Package %s_%s references several dsc" | match="Package %s_%s references several dsc" | ||||
% (package_files["name"], package_files["version"]), | % (PACKAGE_FILES["name"], PACKAGE_FILES["version"]), | ||||
): | ): | ||||
dsc_information(package_files) | dsc_information(p_info) | ||||
def test_get_package_metadata(requests_mock_datadir, datadir, tmp_path): | def test_get_intrinsic_package_metadata(requests_mock_datadir, datadir, tmp_path): | ||||
tmp_path = str(tmp_path) # py3.5 compat. | tmp_path = str(tmp_path) # py3.5 compat. | ||||
package = PACKAGE_FILES | p_info = DebianPackageInfo.from_metadata(PACKAGE_FILES, url=URL) | ||||
logger.debug("package: %s", package) | logger.debug("p_info: %s", p_info) | ||||
# download the packages | # download the packages | ||||
all_hashes = download_package(package, tmp_path) | all_hashes = download_package(p_info, tmp_path) | ||||
# Retrieve information from package | # Retrieve information from package | ||||
_, dsc_name = dsc_information(package) | _, dsc_name = dsc_information(p_info) | ||||
dl_artifacts = [(tmp_path, hashes) for hashes in all_hashes.values()] | dl_artifacts = [(tmp_path, hashes) for hashes in all_hashes.values()] | ||||
# Extract information from package | # Extract information from package | ||||
extracted_path = extract_package(dl_artifacts, tmp_path) | extracted_path = extract_package(dl_artifacts, tmp_path) | ||||
# Retrieve information on package | # Retrieve information on package | ||||
dsc_path = path.join(path.dirname(extracted_path), dsc_name) | dsc_path = path.join(path.dirname(extracted_path), dsc_name) | ||||
actual_package_info = get_package_metadata(package, dsc_path, extracted_path) | actual_package_info = get_intrinsic_package_metadata( | ||||
p_info, dsc_path, extracted_path | |||||
) | |||||
logger.debug("actual_package_info: %s", actual_package_info) | logger.debug("actual_package_info: %s", actual_package_info) | ||||
assert actual_package_info == { | assert actual_package_info == IntrinsicPackageMetadata( | ||||
"changelog": { | changelog=DebianPackageChangelog( | ||||
"date": "2014-10-19T16:52:35+02:00", | date="2014-10-19T16:52:35+02:00", | ||||
"history": [ | history=[ | ||||
("cicero", "0.7.2-2"), | ("cicero", "0.7.2-2"), | ||||
("cicero", "0.7.2-1"), | ("cicero", "0.7.2-1"), | ||||
("cicero", "0.7-1"), | ("cicero", "0.7-1"), | ||||
], | ], | ||||
"person": { | person={ | ||||
"email": "sthibault@debian.org", | "email": "sthibault@debian.org", | ||||
"fullname": "Samuel Thibault <sthibault@debian.org>", | "fullname": "Samuel Thibault <sthibault@debian.org>", | ||||
"name": "Samuel Thibault", | "name": "Samuel Thibault", | ||||
}, | }, | ||||
}, | ), | ||||
"maintainers": [ | maintainers=[ | ||||
{ | { | ||||
"email": "debian-accessibility@lists.debian.org", | "email": "debian-accessibility@lists.debian.org", | ||||
"fullname": "Debian Accessibility Team " | "fullname": "Debian Accessibility Team " | ||||
"<debian-accessibility@lists.debian.org>", | "<debian-accessibility@lists.debian.org>", | ||||
"name": "Debian Accessibility Team", | "name": "Debian Accessibility Team", | ||||
}, | }, | ||||
{ | { | ||||
"email": "sthibault@debian.org", | "email": "sthibault@debian.org", | ||||
"fullname": "Samuel Thibault <sthibault@debian.org>", | "fullname": "Samuel Thibault <sthibault@debian.org>", | ||||
"name": "Samuel Thibault", | "name": "Samuel Thibault", | ||||
}, | }, | ||||
], | ], | ||||
"name": "cicero", | name="cicero", | ||||
"version": "0.7.2-3", | version="0.7.2-3", | ||||
} | ) | ||||
def test_debian_multiple_packages(swh_config, requests_mock_datadir): | def test_debian_multiple_packages(swh_config, requests_mock_datadir): | ||||
url = "deb://Debian/packages/cicero" | |||||
loader = DebianLoader( | loader = DebianLoader( | ||||
url=url, date="2019-10-12T05:58:09.165557+00:00", packages=PACKAGES_PER_VERSION | url=URL, date="2019-10-12T05:58:09.165557+00:00", packages=PACKAGES_PER_VERSION | ||||
) | ) | ||||
actual_load_status = loader.load() | actual_load_status = loader.load() | ||||
expected_snapshot_id = "defc19021187f3727293121fcf6c5c82cb923604" | expected_snapshot_id = "defc19021187f3727293121fcf6c5c82cb923604" | ||||
assert actual_load_status == { | assert actual_load_status == { | ||||
"status": "eventful", | "status": "eventful", | ||||
"snapshot_id": expected_snapshot_id, | "snapshot_id": expected_snapshot_id, | ||||
} | } | ||||
assert_last_visit_matches(loader.storage, url, status="full", type="deb") | assert_last_visit_matches(loader.storage, URL, status="full", type="deb") | ||||
expected_snapshot = Snapshot( | expected_snapshot = Snapshot( | ||||
id=hash_to_bytes(expected_snapshot_id), | id=hash_to_bytes(expected_snapshot_id), | ||||
branches={ | branches={ | ||||
b"releases/stretch/contrib/0.7.2-3": SnapshotBranch( | b"releases/stretch/contrib/0.7.2-3": SnapshotBranch( | ||||
target_type=TargetType.REVISION, | target_type=TargetType.REVISION, | ||||
target=hash_to_bytes("2807f5b3f84368b4889a9ae827fe85854ffecf07"), | target=hash_to_bytes("2807f5b3f84368b4889a9ae827fe85854ffecf07"), | ||||
), | ), | ||||
b"releases/buster/contrib/0.7.2-4": SnapshotBranch( | b"releases/buster/contrib/0.7.2-4": SnapshotBranch( | ||||
target_type=TargetType.REVISION, | target_type=TargetType.REVISION, | ||||
target=hash_to_bytes("8224139c274c984147ef4b09aa0e462c55a10bd3"), | target=hash_to_bytes("8224139c274c984147ef4b09aa0e462c55a10bd3"), | ||||
), | ), | ||||
}, | }, | ||||
) | ) | ||||
check_snapshot(expected_snapshot, loader.storage) | check_snapshot(expected_snapshot, loader.storage) | ||||
def test_resolve_revision_from_edge_cases(): | def test_resolve_revision_from_edge_cases(): | ||||
"""Solving revision with empty data will result in unknown revision | """Solving revision with empty data will result in unknown revision | ||||
""" | """ | ||||
for package_artifacts in [{}, PACKAGE_FILES]: | empty_artifact = { | ||||
actual_revision = resolve_revision_from({}, package_artifacts) | "name": PACKAGE_FILES["name"], | ||||
"version": PACKAGE_FILES["version"], | |||||
} | |||||
for package_artifacts in [empty_artifact, PACKAGE_FILES]: | |||||
p_info = DebianPackageInfo.from_metadata(package_artifacts, url=URL) | |||||
actual_revision = resolve_revision_from({}, p_info) | |||||
assert actual_revision is None | assert actual_revision is None | ||||
for known_artifacts in [{}, PACKAGE_FILES]: | for known_artifacts in [{}, PACKAGE_FILES]: | ||||
actual_revision = resolve_revision_from(known_artifacts, {}) | actual_revision = resolve_revision_from( | ||||
known_artifacts, DebianPackageInfo.from_metadata(empty_artifact, url=URL) | |||||
) | |||||
assert actual_revision is None | assert actual_revision is None | ||||
known_package_artifacts = { | known_package_artifacts = { | ||||
b"(\x07\xf5\xb3\xf8Ch\xb4\x88\x9a\x9a\xe8'\xfe\x85\x85O\xfe\xcf\x07": { | b"(\x07\xf5\xb3\xf8Ch\xb4\x88\x9a\x9a\xe8'\xfe\x85\x85O\xfe\xcf\x07": { | ||||
"extrinsic": { | "extrinsic": { | ||||
# empty | # empty | ||||
}, | }, | ||||
# ... removed the unnecessary intermediary data | # ... removed the unnecessary intermediary data | ||||
} | } | ||||
} | } | ||||
assert not resolve_revision_from(known_package_artifacts, PACKAGE_FILES) | assert not resolve_revision_from( | ||||
known_package_artifacts, DebianPackageInfo.from_metadata(PACKAGE_FILES, url=URL) | |||||
) | |||||
def test_resolve_revision_from_edge_cases_hit_and_miss(): | def test_resolve_revision_from_edge_cases_hit_and_miss(): | ||||
"""Solving revision with inconsistent data will result in unknown revision | """Solving revision with inconsistent data will result in unknown revision | ||||
""" | """ | ||||
artifact_metadata = PACKAGE_FILES2 | artifact_metadata = PACKAGE_FILES2 | ||||
p_info = DebianPackageInfo.from_metadata(artifact_metadata, url=URL) | |||||
expected_revision_id = ( | expected_revision_id = ( | ||||
b"(\x08\xf5\xb3\xf8Ch\xb4\x88\x9a\x9a\xe8'\xff\x85\x85O\xfe\xcf\x07" # noqa | b"(\x08\xf5\xb3\xf8Ch\xb4\x88\x9a\x9a\xe8'\xff\x85\x85O\xfe\xcf\x07" # noqa | ||||
) | ) | ||||
known_package_artifacts = { | known_package_artifacts = { | ||||
expected_revision_id: { | expected_revision_id: { | ||||
"extrinsic": {"raw": PACKAGE_FILES,}, | "extrinsic": {"raw": PACKAGE_FILES,}, | ||||
# ... removed the unnecessary intermediary data | # ... removed the unnecessary intermediary data | ||||
} | } | ||||
} | } | ||||
actual_revision = resolve_revision_from(known_package_artifacts, artifact_metadata) | actual_revision = resolve_revision_from(known_package_artifacts, p_info) | ||||
assert actual_revision is None | assert actual_revision is None | ||||
def test_resolve_revision_from(): | def test_resolve_revision_from(): | ||||
"""Solving revision with consistent data will solve the revision | """Solving revision with consistent data will solve the revision | ||||
""" | """ | ||||
artifact_metadata = PACKAGE_FILES | artifact_metadata = PACKAGE_FILES | ||||
p_info = DebianPackageInfo.from_metadata(artifact_metadata, url=URL) | |||||
expected_revision_id = ( | expected_revision_id = ( | ||||
b"(\x07\xf5\xb3\xf8Ch\xb4\x88\x9a\x9a\xe8'\xfe\x85\x85O\xfe\xcf\x07" # noqa | b"(\x07\xf5\xb3\xf8Ch\xb4\x88\x9a\x9a\xe8'\xfe\x85\x85O\xfe\xcf\x07" # noqa | ||||
) | ) | ||||
files = artifact_metadata["files"] | files = artifact_metadata["files"] | ||||
# shuffling dict's keys | # shuffling dict's keys | ||||
keys = list(files.keys()) | keys = list(files.keys()) | ||||
random.shuffle(keys) | random.shuffle(keys) | ||||
package_files = {"files": {k: files[k] for k in keys}} | package_files = { | ||||
"name": PACKAGE_FILES["name"], | |||||
"version": PACKAGE_FILES["version"], | |||||
"files": {k: files[k] for k in keys}, | |||||
} | |||||
known_package_artifacts = { | known_package_artifacts = { | ||||
expected_revision_id: { | expected_revision_id: { | ||||
"extrinsic": {"raw": package_files,}, | "extrinsic": {"raw": package_files,}, | ||||
# ... removed the unnecessary intermediary data | # ... removed the unnecessary intermediary data | ||||
} | } | ||||
} | } | ||||
actual_revision = resolve_revision_from(known_package_artifacts, artifact_metadata) | actual_revision = resolve_revision_from(known_package_artifacts, p_info) | ||||
assert actual_revision == expected_revision_id | assert actual_revision == expected_revision_id |