diff --git a/swh/loader/package/debian/loader.py b/swh/loader/package/debian/loader.py --- a/swh/loader/package/debian/loader.py +++ b/swh/loader/package/debian/loader.py @@ -45,6 +45,9 @@ # md5sum is not always available, make it optional md5sum = attr.ib(type=str, default="") + # sha1 is not always available, make it optional + sha1 = attr.ib(type=str, default="") + # Some of the DSC files imported in swh apparently had a Checksums-SHA512 # field which got recorded in the archive. Current versions of dpkg-source # don't seem to generate them, but keep the field available for @@ -126,7 +129,6 @@ self, storage: StorageInterface, url: str, - date: str, packages: Mapping[str, Any], max_content_size: Optional[int] = None, ): diff --git a/swh/loader/package/debian/tasks.py b/swh/loader/package/debian/tasks.py --- a/swh/loader/package/debian/tasks.py +++ b/swh/loader/package/debian/tasks.py @@ -1,4 +1,4 @@ -# Copyright (C) 2019 The Software Heritage developers +# Copyright (C) 2019-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -9,7 +9,7 @@ @shared_task(name=__name__ + ".LoadDebian") -def load_deb_package(*, url, date, packages): +def load_deb(*, url, packages): """Load Debian package""" - loader = DebianLoader.from_configfile(url=url, date=date, packages=packages) + loader = DebianLoader.from_configfile(url=url, packages=packages) return loader.load() diff --git a/swh/loader/package/debian/tests/test_debian.py b/swh/loader/package/debian/tests/test_debian.py --- a/swh/loader/package/debian/tests/test_debian.py +++ b/swh/loader/package/debian/tests/test_debian.py @@ -4,10 +4,12 @@ # See top-level LICENSE file for more information from copy import deepcopy +import hashlib import logging from os import path import pytest +import requests from swh.loader.package.debian.loader import ( DebianLoader, @@ -111,12 +113,7 @@ """With no prior visit, load a gnu project ends up with 1 snapshot """ - loader = DebianLoader( - swh_storage, - URL, - date="2019-10-12T05:58:09.165557+00:00", - packages=PACKAGE_PER_VERSION, - ) + loader = DebianLoader(swh_storage, URL, packages=PACKAGE_PER_VERSION,) actual_load_status = loader.load() expected_snapshot_id = "ad1367b5470a03857be7c7325a5a8bde698e1800" @@ -182,12 +179,7 @@ """With no prior visit, load a debian project ends up with 1 snapshot """ - loader = DebianLoader( - swh_storage, - URL, - date="2019-10-12T05:58:09.165557+00:00", - packages=PACKAGE_PER_VERSION, - ) + loader = DebianLoader(swh_storage, URL, packages=PACKAGE_PER_VERSION,) actual_load_status = loader.load() @@ -363,6 +355,19 @@ assert not debian_file_metadata.md5sum +def test_debian_dsc_information_extra_sha1(requests_mock_datadir): + package_files = deepcopy(PACKAGE_FILES) + + for package_metadata in package_files["files"].values(): + file_bytes = requests.get(package_metadata["uri"]).content + package_metadata["sha1"] = hashlib.sha1(file_bytes).hexdigest() + + p_info = DebianPackageInfo.from_metadata(package_files, url=URL, version="0.7.2-3") + + for debian_file_metadata in p_info.files.values(): + assert debian_file_metadata.sha1 + + def test_debian_dsc_information_too_many_dsc_entries(): # craft an extra dsc file fname = "cicero_0.7.2-3.dsc" @@ -439,12 +444,7 @@ def test_debian_multiple_packages(swh_storage, requests_mock_datadir): - loader = DebianLoader( - swh_storage, - URL, - date="2019-10-12T05:58:09.165557+00:00", - packages=PACKAGES_PER_VERSION, - ) + loader = DebianLoader(swh_storage, URL, packages=PACKAGES_PER_VERSION,) actual_load_status = loader.load() expected_snapshot_id = "a83fa5c089b048161f0677b9614a4aae96a6ca18" diff --git a/swh/loader/package/debian/tests/test_tasks.py b/swh/loader/package/debian/tests/test_tasks.py --- a/swh/loader/package/debian/tests/test_tasks.py +++ b/swh/loader/package/debian/tests/test_tasks.py @@ -12,7 +12,7 @@ res = swh_scheduler_celery_app.send_task( "swh.loader.package.debian.tasks.LoadDebian", - kwargs=dict(url="some-url", date="some-date", packages={}), + kwargs=dict(url="some-url", packages={}), ) assert res res.wait()