Changeset View
Standalone View
swh/loader/package/debian/loader.py
Show All 31 Lines | class DscCountError(ValueError): | ||||
pass | pass | ||||
@attr.s | @attr.s | ||||
class DebianFileMetadata: | class DebianFileMetadata: | ||||
name = attr.ib(type=str) | name = attr.ib(type=str) | ||||
"""Filename""" | """Filename""" | ||||
sha256 = attr.ib(type=str) | |||||
size = attr.ib(type=int) | size = attr.ib(type=int) | ||||
uri = attr.ib(type=str) | uri = attr.ib(type=str) | ||||
"""URL of this specific file""" | """URL of this specific file""" | ||||
# md5sum is not always available, make it optional | # all checksums are not always available, make them optional | ||||
sha256 = attr.ib(type=str, default="") | |||||
md5sum = attr.ib(type=str, default="") | md5sum = attr.ib(type=str, default="") | ||||
# sha1 is not always available, make it optional | |||||
sha1 = attr.ib(type=str, default="") | sha1 = attr.ib(type=str, default="") | ||||
# Some of the DSC files imported in swh apparently had a Checksums-SHA512 | # Some of the DSC files imported in swh apparently had a Checksums-SHA512 | ||||
# field which got recorded in the archive. Current versions of dpkg-source | # field which got recorded in the archive. Current versions of dpkg-source | ||||
# don't seem to generate them, but keep the field available for | # don't seem to generate them, but keep the field available for | ||||
# future-proofing. | # future-proofing. | ||||
sha512 = attr.ib(type=str, default="") | sha512 = attr.ib(type=str, default="") | ||||
▲ Show 20 Lines • Show All 110 Lines • ▼ Show 20 Lines | ): | ||||
} | } | ||||
}, | }, | ||||
}, | }, | ||||
# ... | # ... | ||||
} | } | ||||
""" | """ | ||||
super().__init__(storage=storage, url=url, max_content_size=max_content_size) | super().__init__(storage=storage, url=url, max_content_size=max_content_size) | ||||
self.packages = packages | self.packages = packages | ||||
def get_versions(self) -> Sequence[str]: | def get_versions(self) -> Sequence[str]: | ||||
olasd: Oof, that looks pretty leaky.
I guess we should:
- actually add md5 as a supported algorithm… | |||||
Done Inline ActionsYeah, I went for a quick and dirty fix here as this is the only case where md5 sum is needed, will update accordingly then. anlambert: Yeah, I went for a quick and dirty fix here as this is the only case where md5 sum is needed… | |||||
Done Inline Actions
anlambert: > actually add md5 as a supported algorithm in swh.model.hashutil.MultiHash
D6755 | |||||
Not Done Inline ActionsOk, so you actually need to land that pile of diff in swh.model and then rebase that one so you can use your other diff's code, right? ardumont: Ok, so you actually need to land that pile of diff in swh.model and then rebase that one so you… | |||||
Done Inline ActionsI landed swh-model diffs and tagged a v3.1.0 release, build is green so it looks we are good here. anlambert: I landed `swh-model` diffs and tagged a `v3.1.0` release, build is green so it looks we are… | |||||
Done Inline Actions
I opted for a simpler solution by merging the default DOWNLOAD_HASHES set with the one derived from the keys of the hashes parameter of swh.loader.package.utils.download function. anlambert: > turn the use of DOWNLOAD_HASHES into a class attribute of the base package loader (with a… | |||||
Not Done Inline ActionsAh, even better! olasd: Ah, even better! | |||||
"""Returns the keys of the packages input (e.g. | """Returns the keys of the packages input (e.g. | ||||
stretch/contrib/0.7.2-3, etc...) | stretch/contrib/0.7.2-3, etc...) | ||||
""" | """ | ||||
return list(self.packages.keys()) | return list(self.packages.keys()) | ||||
def get_package_info(self, version: str) -> Iterator[Tuple[str, DebianPackageInfo]]: | def get_package_info(self, version: str) -> Iterator[Tuple[str, DebianPackageInfo]]: | ||||
meta = self.packages[version] | meta = self.packages[version] | ||||
▲ Show 20 Lines • Show All 116 Lines • ▼ Show 20 Lines | def download_package(p_info: DebianPackageInfo, tmpdir: Any) -> Mapping[str, Any]: | ||||
Returns: | Returns: | ||||
Dict of swh hashes per filename key | Dict of swh hashes per filename key | ||||
""" | """ | ||||
all_hashes = {} | all_hashes = {} | ||||
for filename, fileinfo in p_info.files.items(): | for filename, fileinfo in p_info.files.items(): | ||||
uri = fileinfo.uri | uri = fileinfo.uri | ||||
logger.debug("fileinfo: %s", fileinfo) | logger.debug("fileinfo: %s", fileinfo) | ||||
extrinsic_hashes = {"sha256": fileinfo.sha256} | extrinsic_hashes = {"md5": fileinfo.md5sum} | ||||
if fileinfo.sha256: | |||||
extrinsic_hashes["sha256"] = fileinfo.sha256 | |||||
if fileinfo.sha1: | |||||
extrinsic_hashes["sha1"] = fileinfo.sha1 | |||||
logger.debug("extrinsic_hashes(%s): %s", filename, extrinsic_hashes) | logger.debug("extrinsic_hashes(%s): %s", filename, extrinsic_hashes) | ||||
filepath, hashes = download( | _, hashes = download( | ||||
uri, dest=tmpdir, filename=filename, hashes=extrinsic_hashes | uri, dest=tmpdir, filename=filename, hashes=extrinsic_hashes | ||||
) | ) | ||||
all_hashes[filename] = hashes | all_hashes[filename] = hashes | ||||
logger.debug("all_hashes: %s", all_hashes) | logger.debug("all_hashes: %s", all_hashes) | ||||
return all_hashes | return all_hashes | ||||
▲ Show 20 Lines • Show All 146 Lines • Show Last 20 Lines |
Oof, that looks pretty leaky.
I guess we should: