Page MenuHomeSoftware Heritage

D6750.id24511.diff
No OneTemporary

D6750.id24511.diff

diff --git a/swh/loader/package/debian/loader.py b/swh/loader/package/debian/loader.py
--- a/swh/loader/package/debian/loader.py
+++ b/swh/loader/package/debian/loader.py
@@ -16,8 +16,8 @@
from debian.deb822 import Dsc
from swh.loader.package.loader import BasePackageInfo, PackageLoader, PartialExtID
-from swh.loader.package.utils import download, release_name
-from swh.model.hashutil import hash_to_bytes
+from swh.loader.package.utils import DOWNLOAD_HASHES, download, release_name
+from swh.model.hashutil import ALGORITHMS, hash_to_bytes
from swh.model.model import ObjectType, Person, Release, Sha1Git, TimestampWithTimezone
from swh.storage.interface import StorageInterface
@@ -37,15 +37,14 @@
class DebianFileMetadata:
name = attr.ib(type=str)
"""Filename"""
- sha256 = attr.ib(type=str)
+
size = attr.ib(type=int)
uri = attr.ib(type=str)
"""URL of this specific file"""
- # md5sum is not always available, make it optional
+ # all checksums are not always available, make them optional
+ sha256 = attr.ib(type=str, default="")
md5sum = attr.ib(type=str, default="")
-
- # sha1 is not always available, make it optional
sha1 = attr.ib(type=str, default="")
# Some of the DSC files imported in swh apparently had a Checksums-SHA512
@@ -173,6 +172,9 @@
"""
super().__init__(storage=storage, url=url, max_content_size=max_content_size)
self.packages = packages
+ # add md5 support in swh.model.hashutil.MultiHash
+ ALGORITHMS.add("md5")
+ DOWNLOAD_HASHES.add("md5")
def get_versions(self) -> Sequence[str]:
"""Returns the keys of the packages input (e.g.
@@ -307,9 +309,14 @@
for filename, fileinfo in p_info.files.items():
uri = fileinfo.uri
logger.debug("fileinfo: %s", fileinfo)
- extrinsic_hashes = {"sha256": fileinfo.sha256}
+ if fileinfo.sha256:
+ extrinsic_hashes = {"sha256": fileinfo.sha256}
+ elif fileinfo.sha1:
+ extrinsic_hashes = {"sha1": fileinfo.sha1}
+ else:
+ extrinsic_hashes = {"md5": fileinfo.md5sum}
logger.debug("extrinsic_hashes(%s): %s", filename, extrinsic_hashes)
- filepath, hashes = download(
+ _, hashes = download(
uri, dest=tmpdir, filename=filename, hashes=extrinsic_hashes
)
all_hashes[filename] = hashes
diff --git a/swh/loader/package/debian/tests/test_debian.py b/swh/loader/package/debian/tests/test_debian.py
--- a/swh/loader/package/debian/tests/test_debian.py
+++ b/swh/loader/package/debian/tests/test_debian.py
@@ -287,6 +287,7 @@
assert all_hashes == {
"cicero_0.7.2-3.diff.gz": {
"checksums": {
+ "md5": "a93661b6a48db48d59ba7d26796fc9ce",
"sha1": "0815282053f21601b0ec4adf7a8fe47eace3c0bc",
"sha256": "f039c9642fe15c75bed5254315e2a29f9f2700da0e29d9b0729b3ffc46c8971c", # noqa
},
@@ -299,6 +300,7 @@
},
"cicero_0.7.2-3.dsc": {
"checksums": {
+ "md5": "d5dac83eb9cfc9bb52a15eb618b4670a",
"sha1": "abbec4e8efbbc80278236e1dd136831eac08accd",
"sha256": "35b7f1048010c67adfd8d70e4961aefd8800eb9a83a4d1cc68088da0009d9a03", # noqa
},
@@ -310,6 +312,7 @@
},
"cicero_0.7.2.orig.tar.gz": {
"checksums": {
+ "md5": "4353dede07c5728319ba7f5595a7230a",
"sha1": "a286efd63fe2c9c9f7bb30255c3d6fcdcf390b43",
"sha256": "63f40f2436ea9f67b44e2d4bd669dbabe90e2635a204526c20e0b3c8ee957786", # noqa
},
@@ -476,3 +479,44 @@
)
check_snapshot(expected_snapshot, swh_storage)
+
+
+def test_debian_loader_only_md5_sum_in_dsc(swh_storage, requests_mock_datadir):
+
+ packages_per_version = deepcopy(PACKAGES_PER_VERSION)
+ for package_files in packages_per_version.values():
+ for package_data in package_files["files"].values():
+ del package_data["sha256"]
+
+ loader = DebianLoader(swh_storage, URL, packages=packages_per_version)
+
+ actual_load_status = loader.load()
+ expected_snapshot_id = "a83fa5c089b048161f0677b9614a4aae96a6ca18"
+ assert actual_load_status == {
+ "status": "eventful",
+ "snapshot_id": expected_snapshot_id,
+ }
+
+ assert_last_visit_matches(
+ swh_storage,
+ URL,
+ status="full",
+ type="deb",
+ snapshot=hash_to_bytes(expected_snapshot_id),
+ )
+
+ expected_snapshot = Snapshot(
+ id=hash_to_bytes(expected_snapshot_id),
+ branches={
+ b"releases/stretch/contrib/0.7.2-3": SnapshotBranch(
+ target_type=TargetType.RELEASE,
+ target=hash_to_bytes("73e0ede9c21f7074ad1f9c81a774cfcb9e02addf"),
+ ),
+ b"releases/buster/contrib/0.7.2-4": SnapshotBranch(
+ target_type=TargetType.RELEASE,
+ target=hash_to_bytes("9f6d8d868514f991af0d9f5d7173aba1236a5a75"),
+ ),
+ },
+ )
+
+ check_snapshot(expected_snapshot, swh_storage)

File Metadata

Mime Type
text/plain
Expires
Dec 21 2024, 9:30 AM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3229718

Event Timeline