Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/archive/tests/test_archive.py
# Copyright (C) 2019-2021 The Software Heritage developers | # Copyright (C) 2019-2021 The Software Heritage developers | ||||||||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||||||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||||||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||||||||
import hashlib | import hashlib | ||||||||||
from io import BytesIO | |||||||||||
from pathlib import Path | |||||||||||
import string | import string | ||||||||||
import attr | import attr | ||||||||||
import pytest | import pytest | ||||||||||
from requests.exceptions import ContentDecodingError | |||||||||||
from swh.loader.package.archive.loader import ArchiveLoader, ArchivePackageInfo | from swh.loader.package.archive.loader import ArchiveLoader, ArchivePackageInfo | ||||||||||
from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats | from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats | ||||||||||
from swh.model.hashutil import hash_to_bytes | from swh.model.hashutil import hash_to_bytes | ||||||||||
from swh.model.model import Snapshot, SnapshotBranch, TargetType | from swh.model.model import Snapshot, SnapshotBranch, TargetType | ||||||||||
URL = "https://ftp.gnu.org/gnu/8sync/" | URL = "https://ftp.gnu.org/gnu/8sync/" | ||||||||||
GNU_ARTIFACTS = [ | GNU_ARTIFACTS = [ | ||||||||||
▲ Show 20 Lines • Show All 392 Lines • ▼ Show 20 Lines | def test_archive_snapshot_append_branch_override(swh_storage, requests_mock_datadir): | ||||||||||
# check expected snapshot, should contain the same branch as previously | # check expected snapshot, should contain the same branch as previously | ||||||||||
# but with different target | # but with different target | ||||||||||
snapshot = loader.last_snapshot() | snapshot = loader.last_snapshot() | ||||||||||
assert len(snapshot.branches) == 2 | assert len(snapshot.branches) == 2 | ||||||||||
assert branch_artifact1_name in snapshot.branches | assert branch_artifact1_name in snapshot.branches | ||||||||||
branch_target_second_visit = snapshot.branches[branch_artifact1_name].target | branch_target_second_visit = snapshot.branches[branch_artifact1_name].target | ||||||||||
assert branch_target_first_visit != branch_target_second_visit | assert branch_target_first_visit != branch_target_second_visit | ||||||||||
@pytest.fixture | |||||||||||
def not_gzipped_tarball_bytes(datadir): | |||||||||||
return Path(datadir, "not_gzipped_tarball.tar.gz").read_bytes() | |||||||||||
def test_archive_not_gzipped_tarball( | |||||||||||
swh_storage, requests_mock, not_gzipped_tarball_bytes | |||||||||||
): | |||||||||||
"""Check that a tarball erroneously marked as gzip compressed can still | |||||||||||
ardumontUnsubmitted Not Done Inline Actions
ardumont: | |||||||||||
Done Inline Actionsgood catch, thanks ! anlambert: good catch, thanks ! | |||||||||||
be downloaded and processed. | |||||||||||
""" | |||||||||||
filename = "not_gzipped_tarball.tar.gz" | |||||||||||
url = f"https://example.org/ftp/{filename}" | |||||||||||
requests_mock.get( | |||||||||||
url, | |||||||||||
[ | |||||||||||
{"exc": ContentDecodingError,}, | |||||||||||
{"body": BytesIO(not_gzipped_tarball_bytes),}, | |||||||||||
], | |||||||||||
) | |||||||||||
loader = ArchiveLoader( | |||||||||||
swh_storage, | |||||||||||
url, | |||||||||||
artifacts=[ | |||||||||||
{ | |||||||||||
"time": 944729610, | |||||||||||
"url": url, | |||||||||||
"length": 221837, | |||||||||||
"filename": filename, | |||||||||||
"version": "0.1.0", | |||||||||||
} | |||||||||||
], | |||||||||||
) | |||||||||||
actual_load_status = loader.load() | |||||||||||
assert actual_load_status["status"] == "eventful" | |||||||||||
assert actual_load_status["snapshot_id"] is not None | |||||||||||
snapshot = loader.last_snapshot() | |||||||||||
assert len(snapshot.branches) == 2 | |||||||||||
assert b"releases/0.1.0" in snapshot.branches |