diff --git a/swh/loader/package/archive/loader.py b/swh/loader/package/archive/loader.py --- a/swh/loader/package/archive/loader.py +++ b/swh/loader/package/archive/loader.py @@ -34,7 +34,6 @@ """Size of the archive file""" time = attr.ib(type=Union[str, datetime.datetime]) """Timestamp of the archive file on the server""" - version = attr.ib(type=str) # default format for gnu MANIFEST_FORMAT = string.Template("$time $length $version $url") @@ -143,11 +142,7 @@ return p_info.extid(manifest_format=self.extid_manifest_format) def build_release( - self, - version: str, - p_info: ArchivePackageInfo, - uncompressed_path: str, - directory: Sha1Git, + self, p_info: ArchivePackageInfo, uncompressed_path: str, directory: Sha1Git ) -> Optional[Release]: time = p_info.time # assume it's a timestamp if isinstance(time, str): # otherwise, assume it's a parsable date @@ -156,7 +151,7 @@ parsed_time = time normalized_time = TimestampWithTimezone.from_datetime(parsed_time) return Release( - name=version.encode(), + name=p_info.version.encode(), message=REVISION_MESSAGE, date=normalized_time, author=SWH_PERSON, diff --git a/swh/loader/package/cran/loader.py b/swh/loader/package/cran/loader.py --- a/swh/loader/package/cran/loader.py +++ b/swh/loader/package/cran/loader.py @@ -30,7 +30,6 @@ @attr.s class CRANPackageInfo(BasePackageInfo): raw_info = attr.ib(type=Dict[str, Any]) - version = attr.ib(type=str) EXTID_TYPE = "cran-sha256" MANIFEST_FORMAT = string.Template("$version $url") @@ -83,20 +82,15 @@ yield release_name(version), p_info def build_release( - self, - version: str, - p_info: CRANPackageInfo, - uncompressed_path: str, - directory: Sha1Git, + self, p_info: CRANPackageInfo, uncompressed_path: str, directory: Sha1Git ) -> Optional[Release]: # a_metadata is empty metadata = extract_intrinsic_metadata(uncompressed_path) date = parse_date(metadata.get("Date")) author = Person.from_fullname(metadata.get("Maintainer", "").encode()) - version = metadata.get("Version", p_info.version) return Release( - name=version.encode(), - message=version.encode(), + name=p_info.version.encode(), + message=p_info.version.encode(), date=date, author=author, target_type=ObjectType.DIRECTORY, diff --git a/swh/loader/package/debian/loader.py b/swh/loader/package/debian/loader.py --- a/swh/loader/package/debian/loader.py +++ b/swh/loader/package/debian/loader.py @@ -69,20 +69,24 @@ files = attr.ib(type=Dict[str, DebianFileMetadata]) """Metadata of the files (.deb, .dsc, ...) of the package.""" name = attr.ib(type=str) - version = attr.ib(type=str) + full_version = attr.ib(type=str) + """eg. stretch/contrib/0.7.2-3""" @classmethod - def from_metadata(cls, a_metadata: Dict[str, Any], url: str) -> "DebianPackageInfo": + def from_metadata( + cls, a_metadata: Dict[str, Any], url: str, version: str + ) -> "DebianPackageInfo": return cls( url=url, filename=None, + version=version, raw_info=a_metadata, files={ file_name: DebianFileMetadata(**file_metadata) for (file_name, file_metadata) in a_metadata.get("files", {}).items() }, name=a_metadata["name"], - version=a_metadata["version"], + full_version=a_metadata["version"], ) def extid(self) -> Optional[PartialExtID]: @@ -177,7 +181,7 @@ def get_package_info(self, version: str) -> Iterator[Tuple[str, DebianPackageInfo]]: meta = self.packages[version] - p_info = DebianPackageInfo.from_metadata(meta, url=self.url) + p_info = DebianPackageInfo.from_metadata(meta, url=self.url, version=version) yield release_name(version), p_info def download_package( @@ -207,11 +211,7 @@ return extract_package(dl_artifacts, dest=dest) def build_release( - self, - version: str, - p_info: DebianPackageInfo, - uncompressed_path: str, - directory: Sha1Git, + self, p_info: DebianPackageInfo, uncompressed_path: str, directory: Sha1Git, ) -> Optional[Release]: dsc_url, dsc_name = dsc_information(p_info) if not dsc_name: @@ -226,7 +226,7 @@ msg = "Synthetic revision for Debian source package %s version %s" % ( p_info.name, - p_info.version, + p_info.full_version, ) author = prepare_person(intrinsic_metadata.changelog.person) @@ -234,7 +234,7 @@ # inspired from swh.loader.debian.converters.package_metadata_to_revision return Release( - name=version.encode(), + name=p_info.version.encode(), message=msg.encode("utf-8"), author=author, date=date, diff --git a/swh/loader/package/debian/tests/test_debian.py b/swh/loader/package/debian/tests/test_debian.py --- a/swh/loader/package/debian/tests/test_debian.py +++ b/swh/loader/package/debian/tests/test_debian.py @@ -261,7 +261,7 @@ def test_debian_download_package(datadir, tmpdir, requests_mock_datadir): tmpdir = str(tmpdir) # py3.5 work around (LocalPath issue) - p_info = DebianPackageInfo.from_metadata(PACKAGE_FILES, url=URL) + p_info = DebianPackageInfo.from_metadata(PACKAGE_FILES, url=URL, version="0.7.2-3") all_hashes = download_package(p_info, tmpdir) assert all_hashes == { "cicero_0.7.2-3.diff.gz": { @@ -304,7 +304,7 @@ def test_debian_dsc_information_ok(): fname = "cicero_0.7.2-3.dsc" - p_info = DebianPackageInfo.from_metadata(PACKAGE_FILES, url=URL) + p_info = DebianPackageInfo.from_metadata(PACKAGE_FILES, url=URL, version="0.7.2-3") dsc_url, dsc_name = dsc_information(p_info) assert dsc_url == PACKAGE_FILES["files"][fname]["uri"] @@ -313,7 +313,7 @@ def test_debian_dsc_information_not_found(): fname = "cicero_0.7.2-3.dsc" - p_info = DebianPackageInfo.from_metadata(PACKAGE_FILES, url=URL) + p_info = DebianPackageInfo.from_metadata(PACKAGE_FILES, url=URL, version="0.7.2-3") p_info.files.pop(fname) dsc_url, dsc_name = dsc_information(p_info) @@ -328,7 +328,7 @@ for package_metadata in package_files["files"].values(): del package_metadata["md5sum"] - p_info = DebianPackageInfo.from_metadata(package_files, url=URL) + p_info = DebianPackageInfo.from_metadata(package_files, url=URL, version="0.7.2-3") for debian_file_metadata in p_info.files.values(): assert not debian_file_metadata.md5sum @@ -337,7 +337,7 @@ def test_debian_dsc_information_too_many_dsc_entries(): # craft an extra dsc file fname = "cicero_0.7.2-3.dsc" - p_info = DebianPackageInfo.from_metadata(PACKAGE_FILES, url=URL) + p_info = DebianPackageInfo.from_metadata(PACKAGE_FILES, url=URL, version="0.7.2-3") data = p_info.files[fname] fname2 = fname.replace("cicero", "ciceroo") p_info.files[fname2] = data @@ -354,7 +354,7 @@ requests_mock_datadir, datadir, tmp_path ): tmp_path = str(tmp_path) # py3.5 compat. - p_info = DebianPackageInfo.from_metadata(PACKAGE_FILES, url=URL) + p_info = DebianPackageInfo.from_metadata(PACKAGE_FILES, url=URL, version="0.7.2-3") logger.debug("p_info: %s", p_info) diff --git a/swh/loader/package/deposit/loader.py b/swh/loader/package/deposit/loader.py --- a/swh/loader/package/deposit/loader.py +++ b/swh/loader/package/deposit/loader.py @@ -60,7 +60,7 @@ @classmethod def from_metadata( - cls, metadata: Dict[str, Any], url: str, filename: str + cls, metadata: Dict[str, Any], url: str, filename: str, version: str ) -> "DepositPackageInfo": # Note: # `date` and `committer_date` are always transmitted by the deposit read api @@ -80,6 +80,7 @@ return cls( url=url, filename=filename, + version=version, author_date=depo["author_date"], commit_date=depo["committer_date"], client=depo["client"], @@ -176,7 +177,10 @@ self, version: str ) -> Iterator[Tuple[str, DepositPackageInfo]]: p_info = DepositPackageInfo.from_metadata( - self.metadata(), url=self.url, filename=self.default_filename, + self.metadata(), + url=self.url, + filename=self.default_filename, + version=version, ) yield "HEAD", p_info @@ -189,18 +193,14 @@ return [self.client.archive_get(self.deposit_id, tmpdir, p_info.filename)] def build_release( - self, - version: str, - p_info: DepositPackageInfo, - uncompressed_path: str, - directory: Sha1Git, + self, p_info: DepositPackageInfo, uncompressed_path: str, directory: Sha1Git, ) -> Optional[Release]: message = ( f"{p_info.client}: Deposit {p_info.id} in collection {p_info.collection}" ).encode("utf-8") return Release( - name=version.encode(), + name=p_info.version.encode(), message=message, author=p_info.author, date=TimestampWithTimezone.from_dict(p_info.author_date), diff --git a/swh/loader/package/loader.py b/swh/loader/package/loader.py --- a/swh/loader/package/loader.py +++ b/swh/loader/package/loader.py @@ -104,6 +104,8 @@ url = attr.ib(type=str) filename = attr.ib(type=Optional[str]) + version = attr.ib(type=str) + """Version name/number.""" MANIFEST_FORMAT: Optional[string.Template] = None """If not None, used by the default extid() implementation to format a manifest, @@ -191,11 +193,7 @@ yield from {} def build_release( - self, - version: str, - p_info: TPackageInfo, - uncompressed_path: str, - directory: Sha1Git, + self, p_info: TPackageInfo, uncompressed_path: str, directory: Sha1Git ) -> Optional[Release]: """Build the release from the archive metadata (extrinsic artifact metadata) and the intrinsic metadata. @@ -549,16 +547,14 @@ ) # Get the metadata of each version's package - packages_info: List[Tuple[str, str, TPackageInfo]] = [ - (version, branch_name, p_info) + packages_info: List[Tuple[str, TPackageInfo]] = [ + (branch_name, p_info) for version in versions for (branch_name, p_info) in self.get_package_info(version) ] # Compute the ExtID of each of these packages - known_extids = self._get_known_extids( - [p_info for (_, _, p_info) in packages_info] - ) + known_extids = self._get_known_extids([p_info for (_, p_info) in packages_info]) if last_snapshot is None: last_snapshot_targets: Set[Sha1Git] = set() @@ -572,7 +568,7 @@ version: [] for version in versions } errors = [] - for (version, branch_name, p_info) in packages_info: + for (branch_name, p_info) in packages_info: logger.debug("package_info: %s", p_info) # Check if the package was already loaded, using its ExtID @@ -602,7 +598,7 @@ release_id = None try: - res = self._load_release(version, p_info, origin) + res = self._load_release(p_info, origin) if res: (release_id, directory_id) = res assert release_id @@ -629,7 +625,7 @@ elif swhid.object_type == ObjectType.REVISION: # If 'rev' was None, the previous block would have run. assert rev is not None - rel = rev2rel(rev, version) + rel = rev2rel(rev, p_info.version) self.storage.release_add([rel]) logger.debug("Upgraded %s to %s", swhid, rel.swhid()) release_id = rel.id @@ -658,7 +654,7 @@ ExtID(extid_type=extid_type, extid=extid, target=release_swhid) ) - tmp_releases[version].append((branch_name, release_id)) + tmp_releases[p_info.version].append((branch_name, release_id)) if load_exceptions: status_visit = "partial" @@ -752,7 +748,7 @@ return (uncompressed_path, directory) def _load_release( - self, version: str, p_info: TPackageInfo, origin + self, p_info: TPackageInfo, origin ) -> Optional[Tuple[Sha1Git, Sha1Git]]: """Does all the loading of a release itself: @@ -772,9 +768,8 @@ # FIXME: This should be release. cf. D409 release = self.build_release( - version, p_info, uncompressed_path, directory=directory.hash + p_info, uncompressed_path, directory=directory.hash ) - print(release) if not release: # Some artifacts are missing intrinsic metadata # skipping those diff --git a/swh/loader/package/nixguix/loader.py b/swh/loader/package/nixguix/loader.py --- a/swh/loader/package/nixguix/loader.py +++ b/swh/loader/package/nixguix/loader.py @@ -44,10 +44,13 @@ specification.""" @classmethod - def from_metadata(cls, metadata: Dict[str, Any]) -> "NixGuixPackageInfo": + def from_metadata( + cls, metadata: Dict[str, Any], version: str + ) -> "NixGuixPackageInfo": return cls( url=metadata["url"], filename=None, + version=version, integrity=metadata["integrity"], raw_info=metadata, ) @@ -120,7 +123,9 @@ # currently only use the first one, but if the first one # fails, we should try the second one and so on. integrity = self.integrity_by_url()[url] - p_info = NixGuixPackageInfo.from_metadata({"url": url, "integrity": integrity}) + p_info = NixGuixPackageInfo.from_metadata( + {"url": url, "integrity": integrity}, version=url + ) yield url, p_info def extra_branches(self) -> Dict[bytes, Mapping[str, Any]]: @@ -151,14 +156,10 @@ } def build_release( - self, - version: str, - p_info: NixGuixPackageInfo, - uncompressed_path: str, - directory: Sha1Git, + self, p_info: NixGuixPackageInfo, uncompressed_path: str, directory: Sha1Git ) -> Optional[Release]: return Release( - name=version.encode(), + name=p_info.version.encode(), message=b"", author=EMPTY_AUTHOR, date=None, diff --git a/swh/loader/package/npm/loader.py b/swh/loader/package/npm/loader.py --- a/swh/loader/package/npm/loader.py +++ b/swh/loader/package/npm/loader.py @@ -48,7 +48,6 @@ date = attr.ib(type=Optional[str]) shasum = attr.ib(type=str) """sha1 checksum""" - version = attr.ib(type=str) @classmethod def from_metadata( @@ -142,11 +141,7 @@ yield release_name(version), p_info def build_release( - self, - version: str, - p_info: NpmPackageInfo, - uncompressed_path: str, - directory: Sha1Git, + self, p_info: NpmPackageInfo, uncompressed_path: str, directory: Sha1Git ) -> Optional[Release]: i_metadata = extract_intrinsic_metadata(uncompressed_path) if not i_metadata: @@ -168,7 +163,7 @@ date = attr.evolve(date, timestamp=attr.evolve(date.timestamp, microseconds=0)) r = Release( - name=version.encode(), + name=p_info.version.encode(), message=message, author=author, date=date, diff --git a/swh/loader/package/opam/loader.py b/swh/loader/package/opam/loader.py --- a/swh/loader/package/opam/loader.py +++ b/swh/loader/package/opam/loader.py @@ -31,7 +31,6 @@ class OpamPackageInfo(BasePackageInfo): author = attr.ib(type=Person) committer = attr.ib(type=Person) - version = attr.ib(type=str) def opam_read( @@ -242,15 +241,11 @@ ) def build_release( - self, - version: str, - p_info: OpamPackageInfo, - uncompressed_path: str, - directory: Sha1Git, + self, p_info: OpamPackageInfo, uncompressed_path: str, directory: Sha1Git, ) -> Optional[Release]: return Release( - name=version.encode(), + name=p_info.version.encode(), author=p_info.author, message=str.encode(p_info.version), date=None, diff --git a/swh/loader/package/pypi/loader.py b/swh/loader/package/pypi/loader.py --- a/swh/loader/package/pypi/loader.py +++ b/swh/loader/package/pypi/loader.py @@ -46,10 +46,11 @@ upload_time = attr.ib(type=str) @classmethod - def from_metadata(cls, metadata: Dict[str, Any]) -> "PyPIPackageInfo": + def from_metadata(cls, metadata: Dict[str, Any], version: str) -> "PyPIPackageInfo": return cls( url=metadata["url"], filename=metadata["filename"], + version=version, raw_info=metadata, comment_text=metadata.get("comment_text"), sha256=metadata["digests"]["sha256"], @@ -115,7 +116,7 @@ ): continue - p_info = PyPIPackageInfo.from_metadata(meta) + p_info = PyPIPackageInfo.from_metadata(meta, version=version) res.append((version, p_info)) if len(res) == 1: @@ -126,11 +127,7 @@ yield release_name(version, p_info.filename), p_info def build_release( - self, - version: str, - p_info: PyPIPackageInfo, - uncompressed_path: str, - directory: Sha1Git, + self, p_info: PyPIPackageInfo, uncompressed_path: str, directory: Sha1Git ) -> Optional[Release]: i_metadata = extract_intrinsic_metadata(uncompressed_path) if not i_metadata: @@ -146,7 +143,7 @@ date = TimestampWithTimezone.from_iso8601(p_info.upload_time) return Release( - name=version.encode(), + name=p_info.version.encode(), message=message.encode(), author=author_, date=date, diff --git a/swh/loader/package/pypi/tests/test_pypi.py b/swh/loader/package/pypi/tests/test_pypi.py --- a/swh/loader/package/pypi/tests/test_pypi.py +++ b/swh/loader/package/pypi/tests/test_pypi.py @@ -733,6 +733,7 @@ p_info = PyPIPackageInfo( url=url, filename="GermlineFilter-1.2.tar.gz", + version="1.2", directory_extrinsic_metadata=[], raw_info={}, comment_text="", @@ -758,7 +759,7 @@ # attempt to build release loader = PyPILoader(swh_storage, url) - release = loader.build_release("1.2", p_info, str(tmp_path), directory) + release = loader.build_release(p_info, str(tmp_path), directory) # without comment_text and version in PKG-INFO, message should be empty assert release.message == b"" diff --git a/swh/loader/package/tests/test_loader.py b/swh/loader/package/tests/test_loader.py --- a/swh/loader/package/tests/test_loader.py +++ b/swh/loader/package/tests/test_loader.py @@ -58,7 +58,9 @@ return ["v1.0", "v2.0", "v3.0", "v4.0"] def get_package_info(self, version): - p_info = StubPackageInfo("http://example.org", f"example-{version}.tar") + p_info = StubPackageInfo( + "http://example.org", f"example-{version}.tar", version=version + ) extid_type = "extid-type1" if version in ("v1.0", "v2.0") else "extid-type2" # Versions 1.0 and 2.0 have an extid of a given type, v3.0 has an extid # of a different type @@ -70,7 +72,7 @@ ).start() yield (f"branch-{version}", p_info) - def _load_release(self, version, p_info, origin): + def _load_release(self, p_info, origin): return None @@ -93,7 +95,7 @@ def test_resolve_object_from_extids() -> None: loader = PackageLoader(None, None) # type: ignore - p_info = Mock(wraps=BasePackageInfo(None, None)) # type: ignore + p_info = Mock(wraps=BasePackageInfo(None, None, None)) # type: ignore # The PackageInfo does not support extids p_info.extid.return_value = None @@ -217,12 +219,12 @@ # v1.0: not loaded because there is already its (extid_type, extid, rel) # in the storage. # v2.0: loaded, because there is already a similar extid, but different type - call("v2.0", StubPackageInfo(origin, "example-v2.0.tar"), Origin(url=origin)), + call(StubPackageInfo(origin, "example-v2.0.tar", "v2.0"), Origin(url=origin),), # v3.0: loaded despite having an (extid_type, extid) in storage, because # the target of the extid is not in the previous snapshot - call("v3.0", StubPackageInfo(origin, "example-v3.0.tar"), Origin(url=origin)), + call(StubPackageInfo(origin, "example-v3.0.tar", "v3.0"), Origin(url=origin),), # v4.0: loaded, because there isn't its extid - call("v4.0", StubPackageInfo(origin, "example-v4.0.tar"), Origin(url=origin)), + call(StubPackageInfo(origin, "example-v4.0.tar", "v4.0"), Origin(url=origin),), ] # then check the snapshot has all the branches. @@ -374,9 +376,9 @@ # v1.0: not loaded because there is already a revision matching it # v2.0: loaded, as the revision is missing from the storage even though there # is an extid - call("v2.0", StubPackageInfo(origin, "example-v2.0.tar"), Origin(url=origin)), + call(StubPackageInfo(origin, "example-v2.0.tar", "v2.0"), Origin(url=origin)), # v3.0: loaded (did not exist yet) - call("v3.0", StubPackageInfo(origin, "example-v3.0.tar"), Origin(url=origin)), + call(StubPackageInfo(origin, "example-v3.0.tar", "v3.0"), Origin(url=origin)), ] snapshot = Snapshot( @@ -416,7 +418,6 @@ b = attr.ib() length = attr.ib() filename = attr.ib() - version = attr.ib() MANIFEST_FORMAT = string.Template("$a $b") diff --git a/swh/loader/package/tests/test_loader_metadata.py b/swh/loader/package/tests/test_loader_metadata.py --- a/swh/loader/package/tests/test_loader_metadata.py +++ b/swh/loader/package/tests/test_loader_metadata.py @@ -100,14 +100,10 @@ return [("path", {"artifact_key": "value", "length": 0})] def build_release( - self, - version: str, - p_info: BasePackageInfo, - uncompressed_path: str, - directory: Sha1Git, + self, p_info: BasePackageInfo, uncompressed_path: str, directory: Sha1Git, ): return Release( - name=version.encode(), + name=p_info.version.encode(), message=b"", author=Person.from_fullname(b""), date=None, @@ -125,6 +121,7 @@ p_info = BasePackageInfo( url=ORIGIN_URL, filename="archive.tgz", + version=version, directory_extrinsic_metadata=[ RawExtrinsicMetadataCore(m0.format, m0.metadata, m0.discovery_date), RawExtrinsicMetadataCore(m1.format, m1.metadata, m1.discovery_date),