diff --git a/swh/loader/package/archive/loader.py b/swh/loader/package/archive/loader.py --- a/swh/loader/package/archive/loader.py +++ b/swh/loader/package/archive/loader.py @@ -24,25 +24,40 @@ @attr.s class ArchivePackageInfo(BasePackageInfo): raw_info = attr.ib(type=Dict[str, Any]) - length = attr.ib(type=int) - """Size of the archive file""" - time = attr.ib(type=Union[str, datetime.datetime]) - """Timestamp of the archive file on the server""" + + length = attr.ib(type=Optional[int], default=None) + """Optional size of the archive file if integrity is provided.""" + time = attr.ib(type=Optional[Union[str, datetime.datetime]], default=None) + """Optional timestamp of the archive file on the server if integrity is provided.""" + version = attr.ib(type=Optional[str], default=None) # type: ignore + """(Override) Optional version if integrity is provided.""" + integrity = attr.ib(type=Optional[str], default=None) + """Archive integrity field""" # default format for gnu MANIFEST_FORMAT = string.Template("$time $length $version $url") + # default format for nixguix manifests (e.g. nixpkgs, guix) + INTEGRITY_MANIFEST_FORMAT = string.Template("$integrity $url") + INTEGRITY_EXTID_TYPE = "package-manifest-integrity" def extid(self, manifest_format: Optional[string.Template] = None) -> PartialExtID: """Returns a unique intrinsic identifier of this package info ``manifest_format`` allows overriding the class' default MANIFEST_FORMAT""" - manifest_format = manifest_format or self.MANIFEST_FORMAT + + if self.raw_info.get("integrity") is not None: + manifest_format = manifest_format or self.INTEGRITY_MANIFEST_FORMAT + extid_type = self.INTEGRITY_EXTID_TYPE + else: + manifest_format = manifest_format or self.MANIFEST_FORMAT + extid_type = self.EXTID_TYPE + # TODO: use parsed attributes instead of self.raw_info manifest = manifest_format.substitute( {k: str(v) for (k, v) in self.raw_info.items()} ) return ( - self.EXTID_TYPE, + extid_type, self.EXTID_VERSION, hashlib.sha256(manifest.encode()).digest(), ) @@ -50,15 +65,30 @@ @classmethod def from_metadata(cls, a_metadata: Dict[str, Any]) -> "ArchivePackageInfo": url = a_metadata["url"] - filename = a_metadata.get("filename") - return cls( - url=url, - filename=filename if filename else path.split(url)[-1], - raw_info=a_metadata, - length=a_metadata["length"], - time=a_metadata["time"], - version=a_metadata["version"], - ) + integrity = a_metadata.get("integrity") + filename_ = a_metadata.get("filename") + filename = filename_ if filename_ else path.split(url)[-1] + if integrity is not None: + return cls( + url=url, + filename=filename, + raw_info=a_metadata, + integrity=integrity, + ) + else: + length = a_metadata["length"] + time = a_metadata["time"] + version = a_metadata["version"] + assert length is not None + assert version is not None + return cls( + url=url, + filename=filename, + raw_info=a_metadata, + length=length, + time=time, + version=version, + ) class ArchiveLoader(PackageLoader[ArchivePackageInfo]): @@ -141,6 +171,7 @@ self, p_info: ArchivePackageInfo, uncompressed_path: str, directory: Sha1Git ) -> Optional[Release]: time = p_info.time # assume it's a timestamp + parsed_time: Optional[datetime.datetime] if isinstance(time, str): # otherwise, assume it's a parsable date parsed_time = iso8601.parse_date(time) else: @@ -151,8 +182,15 @@ else None ) msg = f"Synthetic release for archive at {p_info.url}\n" + if p_info.version is not None: + name = p_info.version.encode() + elif p_info.integrity is not None: + name = p_info.integrity.encode() + else: + raise ValueError("Either version or integrity must be provided.") + return Release( - name=p_info.version.encode(), + name=name, message=msg.encode(), date=normalized_time, author=EMPTY_AUTHOR, diff --git a/swh/loader/package/archive/tests/test_archive.py b/swh/loader/package/archive/tests/test_archive.py --- a/swh/loader/package/archive/tests/test_archive.py +++ b/swh/loader/package/archive/tests/test_archive.py @@ -9,6 +9,7 @@ from io import BytesIO from pathlib import Path import string +from typing import Optional import attr import pytest @@ -339,8 +340,9 @@ @attr.s class TestPackageInfo(ArchivePackageInfo): - a = attr.ib() - b = attr.ib() + # mandatory change since ArchivePackageinfo defines optional values + a = attr.ib(type=Optional[str], default=None) + b = attr.ib(type=Optional[str], default=None) metadata = GNU_ARTIFACTS[0]