diff --git a/swh/lister/nixguix/lister.py b/swh/lister/nixguix/lister.py --- a/swh/lister/nixguix/lister.py +++ b/swh/lister/nixguix/lister.py @@ -59,7 +59,9 @@ """Either 'tar' or 'file' """ fallback_urls: List[str] """List of urls to retrieve tarball artifact if canonical url no longer works.""" - checksums: Dict[str, str] + is_integrity_about_artifact: bool + """Whether integrity field is about the artifact or the uncompressed directory""" + checksums: Optional[Dict[str, str]] = None """Integrity hash converted into a checksum dict.""" @@ -297,7 +299,7 @@ for url in origin_urls: urlparsed = urlparse(url) if urlparsed.scheme == "": - logger.warning("Missing scheme for <%s>, fallback to http", url) + logger.warning("Missing scheme for <%s>: fallback to http", url) fixed_url = f"http://{url}" else: fixed_url = url @@ -349,11 +351,23 @@ chksum_algo: base64.decodebytes(chksum_b64.encode()).hex() } + # The 'outputHashMode' attribute determines how the hash is computed. It + # must be one of the following two values: + # - "flat": (default) The output must be a non-executable regular file. + # If it isn’t, the build fails. The hash is simply computed over the + # contents of that file (so it’s equal to what Unix commands like + # `sha256sum` or `sha1sum` produce). + # - "recursive": The hash is computed over the NAR archive dump of the + # output (i.e., the result of `nix-store --dump`). In this case, + # the output can be anything, including a directory tree. + outputHashMode = artifact.get("outputHashMode", "flat") + logger.debug("%s: %s", "dir" if is_tar else "cnt", origin) yield ArtifactType.ARTIFACT, Artifact( origin=origin, fallback_urls=fallback_urls, checksums=checksums, + is_integrity_about_artifact=outputHashMode == "flat", visit_type="directory" if is_tar else "content", ) else: @@ -376,12 +390,15 @@ def artifact_to_listed_origin(self, artifact: Artifact) -> Iterator[ListedOrigin]: """Given an artifact (tarball, file), yield one ListedOrigin.""" assert self.lister_obj.id is not None + key_checksum = ( + "checksums" if artifact.is_integrity_about_artifact else "nar_checksums" + ) yield ListedOrigin( lister_id=self.lister_obj.id, url=artifact.origin, visit_type=artifact.visit_type, extra_loader_arguments={ - "checksums": artifact.checksums, + key_checksum: artifact.checksums, "fallback_urls": artifact.fallback_urls, }, ) diff --git a/swh/lister/nixguix/tests/data/nixpkgs-swh_sources.json b/swh/lister/nixguix/tests/data/nixpkgs-swh_sources.json --- a/swh/lister/nixguix/tests/data/nixpkgs-swh_sources.json +++ b/swh/lister/nixguix/tests/data/nixpkgs-swh_sources.json @@ -38,6 +38,7 @@ }, { "type": "url", + "outputHashMode": "flat", "urls": [ "http://downloads.sourceforge.net/project/nmon/lmon16n.c", "http://ufpr.dl.sourceforge.net/project/nmon/lmon16n.c", @@ -45,6 +46,17 @@ ], "integrity": "sha256-wAEswtkl3ulAw3zq4perrGS6Wlww5XXnQYsEAoYT9fI=" }, + { + "outputHash": "0s7p9swjqjsqddylmgid6cv263ggq7pmb734z4k84yfcrgb6kg4g", + "outputHashAlgo": "sha256", + "outputHashMode": "recursive", + "type": "url", + "urls": [ + "https://github.com/kandu/trie/archive/1.0.0.tar.gz" + ], + "integrity": "sha256-j7xp1svMeYIm+WScVe/B7w0jNjMtvkp9a1hLLLlO92g=", + "inferredFetcher": "fetchzip" + }, { "type": "git", "git_url": "https://example.org/pali/0xffff",