diff --git a/swh/lister/nixguix/lister.py b/swh/lister/nixguix/lister.py --- a/swh/lister/nixguix/lister.py +++ b/swh/lister/nixguix/lister.py @@ -16,6 +16,7 @@ """ import base64 +import binascii from dataclasses import dataclass from enum import Enum import logging @@ -362,11 +363,20 @@ yield built_artifact continue + outputHash = artifact.get("outputHash") integrity = artifact.get("integrity") - if integrity is None: - logger.warning("Skipping url <%s>: missing integrity field", origin) + if integrity is None and outputHash is None: + logger.warning( + "Skipping url <%s>: missing integrity and outputHash field", + origin, + ) continue + # Falls back to outputHash field if integrity is missing + if integrity is None and outputHash: + # We'll deal with outputHash as integrity field + integrity = outputHash + try: is_tar, origin = is_tarball(urls, self.session) except ArtifactNatureMistyped: @@ -396,10 +406,18 @@ # convert into a dict of checksums. This only parses the # `hash-expression` (hash-) as defined in # https://w3c.github.io/webappsec-subresource-integrity/#the-integrity-attribute - chksum_algo, chksum_b64 = integrity.split("-") - checksums: Dict[str, str] = { - chksum_algo: base64.decodebytes(chksum_b64.encode()).hex() - } + try: + chksum_algo, chksum_b64 = integrity.split("-") + checksums: Dict[str, str] = { + chksum_algo: base64.decodebytes(chksum_b64.encode()).hex() + } + except binascii.Error: + logger.exception( + "Skipping url: <%s>: integrity computation failure for <%s>", + url, + artifact, + ) + continue # The 'outputHashMode' attribute determines how the hash is computed. It # must be one of the following two values: diff --git a/swh/lister/nixguix/tests/data/guix-swh_sources.json b/swh/lister/nixguix/tests/data/guix-swh_sources.json --- a/swh/lister/nixguix/tests/data/guix-swh_sources.json +++ b/swh/lister/nixguix/tests/data/guix-swh_sources.json @@ -27,6 +27,16 @@ ], "integrity": "sha256-lV3xiWUZmSnt4LW0ni/sUyC/bbtaxkTzvFLFtJKLuI4=" }, + { + "outputHash": "sha256-9uF0fYl4Zz/Ia2UKx7CBi8ZU8jfWoBfy2QSgTSwXo5A", + "outputHashAlgo": null, + "outputHashMode": "recursive", + "type": "url", + "urls": [ + "https://github.com/figiel/hosts/archive/v1.0.0.tar.gz" + ], + "inferredFetcher": "fetchzip" + }, { "type": "url", "urls": [ "unknown://example.org/wrong-scheme-so-skipped.txt" ], diff --git a/swh/lister/nixguix/tests/data/nixpkgs-swh_sources.json b/swh/lister/nixguix/tests/data/nixpkgs-swh_sources.json --- a/swh/lister/nixguix/tests/data/nixpkgs-swh_sources.json +++ b/swh/lister/nixguix/tests/data/nixpkgs-swh_sources.json @@ -79,6 +79,16 @@ "svn_url": "https://code.call-cc.org/svn/chicken-eggs/release/5/iset/tags/2.2", "svn_revision": 39057 }, + { + "outputHash": "sha256-LxVcYj2WKHbhNu5x/DFkxQPOYrVkNvwiE/qcODq52Lc=", + "outputHashAlgo": null, + "outputHashMode": "recursive", + "type": "url", + "urls": [ + "https://github.com/julian-klode/triehash/archive/debian/0.3-3.tar.gz" + ], + "inferredFetcher": "fetchzip" + }, { "type": "url", "urls": [