Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/archive/tests/test_archive.py
# Copyright (C) 2019-2022 The Software Heritage developers | # Copyright (C) 2019-2022 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import copy | import copy | ||||
import datetime | import datetime | ||||
import hashlib | import hashlib | ||||
from io import BytesIO | from io import BytesIO | ||||
from pathlib import Path | from pathlib import Path | ||||
import string | import string | ||||
from typing import Optional | |||||
import attr | import attr | ||||
import pytest | import pytest | ||||
from requests.exceptions import ContentDecodingError | from requests.exceptions import ContentDecodingError | ||||
from swh.loader.package.archive.loader import ArchiveLoader, ArchivePackageInfo | from swh.loader.package.archive.loader import ArchiveLoader, ArchivePackageInfo | ||||
from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats | from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats | ||||
from swh.model.hashutil import hash_to_bytes, hash_to_hex | from swh.model.hashutil import hash_to_bytes, hash_to_hex | ||||
Show All 20 Lines | GNU_ARTIFACTS = [ | ||||
"time": 1480991830, | "time": 1480991830, | ||||
"url": "https://ftp.gnu.org/gnu/8sync/8sync-0.2.0.tar.gz", | "url": "https://ftp.gnu.org/gnu/8sync/8sync-0.2.0.tar.gz", | ||||
"length": 238466, | "length": 238466, | ||||
"filename": "8sync-0.2.0.tar.gz", | "filename": "8sync-0.2.0.tar.gz", | ||||
"version": "0.2.0", | "version": "0.2.0", | ||||
}, | }, | ||||
] | ] | ||||
NIXGUIX_URL_ARTIFACT = "https://downloads.sourceforge.net/cm-unicode/cm-unicode" | |||||
NIXGUIX_ARTIFACTS = [ | |||||
{ | |||||
"url": f"{NIXGUIX_URL_ARTIFACT}/0.7.0/cm-unicode-0.7.0-otf.tar.xz", | |||||
"integrity": "sha256-9vrgYyaJPU2yjLQY1N99OIHmvpOGvNWxl5QOjKE//+c=", | |||||
}, | |||||
] | |||||
_expected_new_contents_first_visit = [ | _expected_new_contents_first_visit = [ | ||||
"e9258d81faf5881a2f96a77ba609396f82cb97ad", | "e9258d81faf5881a2f96a77ba609396f82cb97ad", | ||||
"1170cf105b04b7e2822a0e09d2acf71da7b9a130", | "1170cf105b04b7e2822a0e09d2acf71da7b9a130", | ||||
"fbd27c3f41f2668624ffc80b7ba5db9b92ff27ac", | "fbd27c3f41f2668624ffc80b7ba5db9b92ff27ac", | ||||
"0057bec9b5422aff9256af240b177ac0e3ac2608", | "0057bec9b5422aff9256af240b177ac0e3ac2608", | ||||
"2b8d0d0b43a1078fc708930c8ddc2956a86c566e", | "2b8d0d0b43a1078fc708930c8ddc2956a86c566e", | ||||
"27de3b3bc6545d2a797aeeb4657c0e215a0c2e55", | "27de3b3bc6545d2a797aeeb4657c0e215a0c2e55", | ||||
"2e6db43f5cd764e677f416ff0d0c78c7a82ef19b", | "2e6db43f5cd764e677f416ff0d0c78c7a82ef19b", | ||||
▲ Show 20 Lines • Show All 401 Lines • ▼ Show 20 Lines | def test_archive_2_visits_without_change_not_gnu(swh_storage, requests_mock_datadir): | ||||
assert len(urls) == 1 | assert len(urls) == 1 | ||||
def test_archive_extid(): | def test_archive_extid(): | ||||
"""Compute primary key should return the right identity""" | """Compute primary key should return the right identity""" | ||||
@attr.s | @attr.s | ||||
class TestPackageInfo(ArchivePackageInfo): | class TestPackageInfo(ArchivePackageInfo): | ||||
a = attr.ib() | # mandatory change since ArchivePackageinfo defines optional values | ||||
b = attr.ib() | a = attr.ib(type=Optional[str], default=None) | ||||
b = attr.ib(type=Optional[str], default=None) | |||||
metadata = GNU_ARTIFACTS[0] | metadata = GNU_ARTIFACTS[0] | ||||
p_info = TestPackageInfo( | p_info = TestPackageInfo( | ||||
raw_info={**metadata, "a": 1, "b": 2}, | raw_info={**metadata, "a": 1, "b": 2}, | ||||
a=1, | a=1, | ||||
b=2, | b=2, | ||||
**metadata, | **metadata, | ||||
▲ Show 20 Lines • Show All 132 Lines • ▼ Show 20 Lines | ): | ||||
assert actual_load_status["status"] == "eventful" | assert actual_load_status["status"] == "eventful" | ||||
assert actual_load_status["snapshot_id"] is not None | assert actual_load_status["snapshot_id"] is not None | ||||
snapshot = loader.last_snapshot() | snapshot = loader.last_snapshot() | ||||
assert len(snapshot.branches) == 2 | assert len(snapshot.branches) == 2 | ||||
assert b"releases/0.1.0" in snapshot.branches | assert b"releases/0.1.0" in snapshot.branches | ||||
def test_archive_visit_no_time_for_tarball(swh_storage, requests_mock_datadir): | def test_archive_with_integrity(swh_storage, requests_mock_datadir): | ||||
artifacts = copy.deepcopy(GNU_ARTIFACTS) | artifacts = copy.deepcopy(NIXGUIX_ARTIFACTS) | ||||
for artifact in artifacts: | |||||
artifact["time"] = None | |||||
loader = ArchiveLoader(swh_storage, URL, artifacts=artifacts) | loader = ArchiveLoader(swh_storage, NIXGUIX_URL_ARTIFACT, artifacts=artifacts) | ||||
actual_load_status = loader.load() | actual_load_status = loader.load() | ||||
assert actual_load_status["status"] == "eventful" | assert actual_load_status["status"] == "eventful" | ||||
assert_last_visit_matches(swh_storage, URL, status="full", type="tar") | assert_last_visit_matches(swh_storage, URL, status="full", type="tar") |