diff --git a/swh/lister/nixguix/lister.py b/swh/lister/nixguix/lister.py --- a/swh/lister/nixguix/lister.py +++ b/swh/lister/nixguix/lister.py @@ -43,12 +43,33 @@ class ArtifactNatureMistyped(ValueError): - """Raised when a remote artifact's neither a tarball nor a file. It's probably a - misconfiguration in the manifest that badly typed a vcs repository.""" + """Raised when a remote artifact's neither a tarball nor a file. + + Error of this type are' probably a misconfiguration in the manifest generation that + badly typed a vcs repository. + + """ pass +class ChecksumsComputation(Enum): + """The possible artifact types listed out of the manifest.""" + + STANDARD = "standard" + """Standard checksums (e.g. sha1, sha256, ...) on the tarball or file.""" + NAR = "nar" + """The hash is computed over the NAR archive dump of the output (e.g. uncompressed + directory.)""" + + +MAPPING_CHECKSUMS_COMPUTATION = { + "flat": ChecksumsComputation.STANDARD, + "recursive": ChecksumsComputation.NAR, +} +"""Mapping between the outputHashMode from the manifest and how to compute checksums.""" + + @dataclass class Artifact: """Metadata information on Remote Artifact with url (tarball or file).""" @@ -61,6 +82,8 @@ """List of urls to retrieve tarball artifact if canonical url no longer works.""" checksums: Dict[str, str] """Integrity hash converted into a checksum dict.""" + checksums_computation: ChecksumsComputation + """Checksums computation mode to provide to loaders (e.g. nar, standard, ...)""" @dataclass @@ -297,7 +320,7 @@ for url in origin_urls: urlparsed = urlparse(url) if urlparsed.scheme == "": - logger.warning("Missing scheme for <%s>, fallback to http", url) + logger.warning("Missing scheme for <%s>: fallback to http", url) fixed_url = f"http://{url}" else: fixed_url = url @@ -349,11 +372,23 @@ chksum_algo: base64.decodebytes(chksum_b64.encode()).hex() } + # The 'outputHashMode' attribute determines how the hash is computed. It + # must be one of the following two values: + # - "flat": (default) The output must be a non-executable regular file. + # If it isn’t, the build fails. The hash is simply computed over the + # contents of that file (so it’s equal to what Unix commands like + # `sha256sum` or `sha1sum` produce). + # - "recursive": The hash is computed over the NAR archive dump of the + # output (i.e., the result of `nix-store --dump`). In this case, + # the output can be anything, including a directory tree. + outputHashMode = artifact.get("outputHashMode", "flat") + logger.debug("%s: %s", "dir" if is_tar else "cnt", origin) yield ArtifactType.ARTIFACT, Artifact( origin=origin, fallback_urls=fallback_urls, checksums=checksums, + checksums_computation=MAPPING_CHECKSUMS_COMPUTATION[outputHashMode], visit_type="directory" if is_tar else "content", ) else: @@ -382,6 +417,7 @@ visit_type=artifact.visit_type, extra_loader_arguments={ "checksums": artifact.checksums, + "checksums_computation": artifact.checksums_computation.value, "fallback_urls": artifact.fallback_urls, }, ) diff --git a/swh/lister/nixguix/tests/data/nixpkgs-swh_sources.json b/swh/lister/nixguix/tests/data/nixpkgs-swh_sources.json --- a/swh/lister/nixguix/tests/data/nixpkgs-swh_sources.json +++ b/swh/lister/nixguix/tests/data/nixpkgs-swh_sources.json @@ -38,6 +38,7 @@ }, { "type": "url", + "outputHashMode": "flat", "urls": [ "http://downloads.sourceforge.net/project/nmon/lmon16n.c", "http://ufpr.dl.sourceforge.net/project/nmon/lmon16n.c", @@ -45,6 +46,17 @@ ], "integrity": "sha256-wAEswtkl3ulAw3zq4perrGS6Wlww5XXnQYsEAoYT9fI=" }, + { + "outputHash": "0s7p9swjqjsqddylmgid6cv263ggq7pmb734z4k84yfcrgb6kg4g", + "outputHashAlgo": "sha256", + "outputHashMode": "recursive", + "type": "url", + "urls": [ + "https://github.com/kandu/trie/archive/1.0.0.tar.gz" + ], + "integrity": "sha256-j7xp1svMeYIm+WScVe/B7w0jNjMtvkp9a1hLLLlO92g=", + "inferredFetcher": "fetchzip" + }, { "type": "git", "git_url": "https://example.org/pali/0xffff",