diff --git a/docs/package-loader-specifications.rst b/docs/package-loader-specifications.rst --- a/docs/package-loader-specifications.rst +++ b/docs/package-loader-specifications.rst @@ -24,17 +24,16 @@ - passed as arg - ``release_name(​version)`` - =version - - "swh-loader-package: - synthetic revision message" + - "Synthetic release for archive at {p_info.url}" - true - - SWH robot + - "" - passed as arg - * - cran - ``metadata.get(​"Version", passed as arg)`` - ``release_name(​version)`` - =version - - =version + - standard message - true - ``metadata.get(​"Maintainer", "")`` - ``metadata.get(​"Date")`` @@ -43,7 +42,7 @@ - passed as arg (eg. ``stretch/contrib/0.7.2-3``) - ``release_name(​version)`` - =version - - "Synthetic revision for Debian source package %s version %s" + - standard message (using full version) - true - ``metadata​.changelog​.person`` - ``metadata​.changelog​.date`` @@ -54,14 +53,14 @@ - HEAD - "{client}: Deposit {id} in collection {collection}" - true - - SWH robot + - original author - ```` from SWORD XML - revisions had parents * - nixguix - URL - URL - URL - - "" + - None - true - "" - None @@ -70,7 +69,7 @@ - ``metadata​["version"]`` - ``release_name(​version)`` - =version - - =version + - standard message - true - from int metadata or "" - from ext metadata or None @@ -79,7 +78,7 @@ - as given by opam - "{opam_package}​.{version}" - =version - - =version + - standard message - true - from metadata - None @@ -88,7 +87,7 @@ - ``metadata​["version"]`` - ``release_name(​version)`` or ``release_name(​version, filename)`` - =version - - "{version}: {metadata[​'comment_text']}" or just version + - ``metadata[​'comment_text']}`` or standard message - true - from int metadata or "" - from ext metadata or None @@ -101,6 +100,13 @@ return "releases/%s/%s" % (version, filename) return "releases/%s" % version +and "standard message" being:: + + msg = ( + f"Synthetic release for {PACKAGE_MANAGER} source package {name} " + f"version {version}" + ) + The ``target_type`` field is always ``dir``, and the target the id of a directory loaded by unpacking a tarball/zip file/... diff --git a/swh/loader/package/archive/loader.py b/swh/loader/package/archive/loader.py --- a/swh/loader/package/archive/loader.py +++ b/swh/loader/package/archive/loader.py @@ -14,17 +14,11 @@ import iso8601 from swh.loader.package.loader import BasePackageInfo, PackageLoader, PartialExtID -from swh.loader.package.utils import release_name -from swh.model.model import ObjectType, Person, Release, Sha1Git, TimestampWithTimezone +from swh.loader.package.utils import EMPTY_AUTHOR, release_name +from swh.model.model import ObjectType, Release, Sha1Git, TimestampWithTimezone from swh.storage.interface import StorageInterface logger = logging.getLogger(__name__) -SWH_PERSON = Person( - name=b"Software Heritage", - fullname=b"Software Heritage", - email=b"robot@softwareheritage.org", -) -REVISION_MESSAGE = b"swh-loader-package: synthetic revision message" @attr.s @@ -150,11 +144,12 @@ else: parsed_time = time normalized_time = TimestampWithTimezone.from_datetime(parsed_time) + msg = f"Synthetic release for archive at {p_info.url}" return Release( name=p_info.version.encode(), - message=REVISION_MESSAGE, + message=msg.encode(), date=normalized_time, - author=SWH_PERSON, + author=EMPTY_AUTHOR, target=directory, target_type=ObjectType.DIRECTORY, synthetic=True, diff --git a/swh/loader/package/archive/tests/test_archive.py b/swh/loader/package/archive/tests/test_archive.py --- a/swh/loader/package/archive/tests/test_archive.py +++ b/swh/loader/package/archive/tests/test_archive.py @@ -14,8 +14,17 @@ from swh.loader.package.archive.loader import ArchiveLoader, ArchivePackageInfo from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats -from swh.model.hashutil import hash_to_bytes -from swh.model.model import Snapshot, SnapshotBranch, TargetType +from swh.model.hashutil import hash_to_bytes, hash_to_hex +from swh.model.model import ( + ObjectType, + Person, + Release, + Snapshot, + SnapshotBranch, + TargetType, + Timestamp, + TimestampWithTimezone, +) URL = "https://ftp.gnu.org/gnu/8sync/" GNU_ARTIFACTS = [ @@ -77,7 +86,7 @@ ] _expected_new_releases_first_visit = { - "c9786c1e3b46f52779c727d3509d66ebf8948d88": ( + "97c2ada10ca9b7876a8b5b17858b0518309170fd": ( "3aebc29ed1fccc4a6f2f2010fb8e57882406b528" ) } @@ -131,12 +140,11 @@ assert actual_load_status["status"] == "eventful" expected_snapshot_first_visit_id = hash_to_bytes( - "cdf8f335fa0c81c8ad089870ec14f52b1980eb6c" + "af62f6f6d464f9b29f270d1bbefa355af38946c4" ) - assert ( - hash_to_bytes(actual_load_status["snapshot_id"]) - == expected_snapshot_first_visit_id + assert actual_load_status["snapshot_id"] == hash_to_hex( + expected_snapshot_first_visit_id ) assert_last_visit_matches(swh_storage, URL, status="full", type="tar") @@ -153,6 +161,7 @@ "snapshot": 1, } == stats + release_id = hash_to_bytes(list(_expected_new_releases_first_visit)[0]) expected_snapshot = Snapshot( id=expected_snapshot_first_visit_id, branches={ @@ -160,14 +169,30 @@ target_type=TargetType.ALIAS, target=b"releases/0.1.0", ), b"releases/0.1.0": SnapshotBranch( - target_type=TargetType.RELEASE, - target=hash_to_bytes(list(_expected_new_releases_first_visit)[0]), + target_type=TargetType.RELEASE, target=release_id, ), }, ) - check_snapshot(expected_snapshot, swh_storage) + assert swh_storage.release_get([release_id])[0] == Release( + id=release_id, + name=b"0.1.0", + message=( + b"Synthetic release for archive at " + b"https://ftp.gnu.org/gnu/8sync/8sync-0.1.0.tar.gz" + ), + target=hash_to_bytes("3aebc29ed1fccc4a6f2f2010fb8e57882406b528"), + target_type=ObjectType.DIRECTORY, + synthetic=True, + author=Person.from_fullname(b""), + date=TimestampWithTimezone( + timestamp=Timestamp(seconds=944729610, microseconds=0), + offset=0, + negative_utc=False, + ), + ) + expected_contents = map(hash_to_bytes, _expected_new_contents_first_visit) assert list(swh_storage.content_missing_per_sha1(expected_contents)) == [] diff --git a/swh/loader/package/cran/loader.py b/swh/loader/package/cran/loader.py --- a/swh/loader/package/cran/loader.py +++ b/swh/loader/package/cran/loader.py @@ -30,6 +30,7 @@ @attr.s class CRANPackageInfo(BasePackageInfo): raw_info = attr.ib(type=Dict[str, Any]) + name = attr.ib(type=str) EXTID_TYPE = "cran-sha256" MANIFEST_FORMAT = string.Template("$version $url") @@ -41,6 +42,7 @@ url=url, filename=path.basename(url), raw_info=a_metadata, + name=a_metadata["package"], version=a_metadata["version"], ) @@ -88,9 +90,13 @@ metadata = extract_intrinsic_metadata(uncompressed_path) date = parse_date(metadata.get("Date")) author = Person.from_fullname(metadata.get("Maintainer", "").encode()) + msg = ( + f"Synthetic release for CRAN source package {p_info.name} " + f"version {p_info.version}" + ) return Release( name=p_info.version.encode(), - message=p_info.version.encode(), + message=msg.encode(), date=date, author=author, target_type=ObjectType.DIRECTORY, diff --git a/swh/loader/package/cran/tests/test_cran.py b/swh/loader/package/cran/tests/test_cran.py --- a/swh/loader/package/cran/tests/test_cran.py +++ b/swh/loader/package/cran/tests/test_cran.py @@ -20,17 +20,27 @@ ) from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats from swh.model.hashutil import hash_to_bytes -from swh.model.model import Snapshot, SnapshotBranch, TargetType, TimestampWithTimezone +from swh.model.model import ( + ObjectType, + Person, + Release, + Snapshot, + SnapshotBranch, + TargetType, + Timestamp, + TimestampWithTimezone, +) + +RELEASE_ID = hash_to_bytes("9a977f6415e6997fd9dd53c6dcb540ff0a7bff26") SNAPSHOT = Snapshot( - id=hash_to_bytes("56ed00938d83892bd5b42f2f368ae38a1dbfa718"), + id=hash_to_bytes("3787efc620c55b1e18889cfa561d9bcdc62c4cb2"), branches={ b"HEAD": SnapshotBranch( target=b"releases/2.22-6", target_type=TargetType.ALIAS ), b"releases/2.22-6": SnapshotBranch( - target=hash_to_bytes("42993a72eac50a4a83523c9327a52be3593755a8"), - target_type=TargetType.RELEASE, + target=RELEASE_ID, target_type=TargetType.RELEASE, ), }, ) @@ -172,7 +182,15 @@ f"{base_url}/src_contrib_1.4.0_Recommended_KernSmooth_{version}.tar.gz" # noqa ) loader = CRANLoader( - swh_storage, origin_url, artifacts=[{"url": artifact_url, "version": version,}] + swh_storage, + origin_url, + artifacts=[ + { + "url": artifact_url, + "version": version, + "package": "Recommended_KernSmooth", + } + ], ) actual_load_status = loader.load() @@ -188,6 +206,28 @@ check_snapshot(SNAPSHOT, swh_storage) + assert swh_storage.release_get([RELEASE_ID])[0] == Release( + id=RELEASE_ID, + name=b"2.22-6", + message=( + b"Synthetic release for CRAN source package " + b"Recommended_KernSmooth version 2.22-6" + ), + target=hash_to_bytes("ff64177fea3f4a5136b9caf7581a4f7d4cf65296"), + target_type=ObjectType.DIRECTORY, + synthetic=True, + author=Person( + fullname=b"Brian Ripley ", + name=b"Brian Ripley", + email=b"ripley@stats.ox.ac.uk", + ), + date=TimestampWithTimezone( + timestamp=Timestamp(seconds=991958400, microseconds=0), + offset=0, + negative_utc=False, + ), + ) + visit_stats = get_stats(swh_storage) assert { "content": 33, @@ -218,7 +258,15 @@ f"{base_url}/src_contrib_1.4.0_Recommended_KernSmooth_{version}.tar.gz" # noqa ) loader = CRANLoader( - swh_storage, origin_url, artifacts=[{"url": artifact_url, "version": version}] + swh_storage, + origin_url, + artifacts=[ + { + "url": artifact_url, + "version": version, + "package": "Recommended_KernSmooth", + } + ], ) # first visit @@ -342,7 +390,13 @@ loader = CRANLoader( swh_storage, origin_url, - artifacts=[{"url": artifact_url, "version": version}], + artifacts=[ + { + "url": artifact_url, + "version": version, + "package": "Recommended_KernSmooth", + } + ], ) actual_load_status = loader.load() diff --git a/swh/loader/package/debian/loader.py b/swh/loader/package/debian/loader.py --- a/swh/loader/package/debian/loader.py +++ b/swh/loader/package/debian/loader.py @@ -224,9 +224,9 @@ logger.debug("intrinsic_metadata: %s", intrinsic_metadata) logger.debug("p_info: %s", p_info) - msg = "Synthetic revision for Debian source package %s version %s" % ( - p_info.name, - p_info.full_version, + msg = ( + f"Synthetic release for Debian source package {p_info.name} " + f"version {p_info.full_version}" ) author = prepare_person(intrinsic_metadata.changelog.person) @@ -235,7 +235,7 @@ # inspired from swh.loader.debian.converters.package_metadata_to_revision return Release( name=p_info.version.encode(), - message=msg.encode("utf-8"), + message=msg.encode(), author=author, date=date, target=directory, diff --git a/swh/loader/package/debian/tests/test_debian.py b/swh/loader/package/debian/tests/test_debian.py --- a/swh/loader/package/debian/tests/test_debian.py +++ b/swh/loader/package/debian/tests/test_debian.py @@ -23,7 +23,16 @@ ) from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats from swh.model.hashutil import hash_to_bytes -from swh.model.model import Person, Snapshot, SnapshotBranch, TargetType +from swh.model.model import ( + ObjectType, + Person, + Release, + Snapshot, + SnapshotBranch, + TargetType, + Timestamp, + TimestampWithTimezone, +) logger = logging.getLogger(__name__) @@ -110,7 +119,7 @@ ) actual_load_status = loader.load() - expected_snapshot_id = "8bc5d12e2443ab216fdd2f969b25b39e96c20fef" + expected_snapshot_id = "20073c91e85b8bcbd2639990e76765d25bd2c0a6" assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id, @@ -124,18 +133,38 @@ snapshot=hash_to_bytes(expected_snapshot_id), ) + release_id = hash_to_bytes("ed191d99e070a33458a4a402becd0b4bba09cd1e") + expected_snapshot = Snapshot( id=hash_to_bytes(expected_snapshot_id), branches={ b"releases/stretch/contrib/0.7.2-3": SnapshotBranch( - target_type=TargetType.RELEASE, - target=hash_to_bytes("5a99736512d381700c5f54d7fdd6b46e136535a2"), + target_type=TargetType.RELEASE, target=release_id, ) }, ) # different than the previous loader as no release is done check_snapshot(expected_snapshot, swh_storage) + assert swh_storage.release_get([release_id])[0] == Release( + id=release_id, + name=b"stretch/contrib/0.7.2-3", + message=b"Synthetic release for Debian source package cicero version 0.7.2-3", + target=hash_to_bytes("798df511408c53bf842a8e54d4d335537836bdc3"), + target_type=ObjectType.DIRECTORY, + synthetic=True, + author=Person( + fullname=b"Samuel Thibault ", + name=b"Samuel Thibault", + email=b"sthibault@debian.org", + ), + date=TimestampWithTimezone( + timestamp=Timestamp(seconds=1413730355, microseconds=0), + offset=120, + negative_utc=False, + ), + ) + stats = get_stats(swh_storage) assert { "content": 42, @@ -162,7 +191,7 @@ actual_load_status = loader.load() - expected_snapshot_id = "8bc5d12e2443ab216fdd2f969b25b39e96c20fef" + expected_snapshot_id = "20073c91e85b8bcbd2639990e76765d25bd2c0a6" assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id, @@ -181,7 +210,7 @@ branches={ b"releases/stretch/contrib/0.7.2-3": SnapshotBranch( target_type=TargetType.RELEASE, - target=hash_to_bytes("5a99736512d381700c5f54d7fdd6b46e136535a2"), + target=hash_to_bytes("ed191d99e070a33458a4a402becd0b4bba09cd1e"), ) }, ) # different than the previous loader as no release is done @@ -418,7 +447,7 @@ ) actual_load_status = loader.load() - expected_snapshot_id = "3d26243c91eb084c350627a5a102cfe039c5b92a" + expected_snapshot_id = "3e423d7889ebd8df0ed0373016f035dfed8541cb" assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id, @@ -437,11 +466,11 @@ branches={ b"releases/stretch/contrib/0.7.2-3": SnapshotBranch( target_type=TargetType.RELEASE, - target=hash_to_bytes("5a99736512d381700c5f54d7fdd6b46e136535a2"), + target=hash_to_bytes("ed191d99e070a33458a4a402becd0b4bba09cd1e"), ), b"releases/buster/contrib/0.7.2-4": SnapshotBranch( target_type=TargetType.RELEASE, - target=hash_to_bytes("192fc7ccce80f64a0d3cf33d379133af067ec721"), + target=hash_to_bytes("d3dff4a416816c36dc284e49c1c9eed52c2d2ef4"), ), }, ) diff --git a/swh/loader/package/nixguix/loader.py b/swh/loader/package/nixguix/loader.py --- a/swh/loader/package/nixguix/loader.py +++ b/swh/loader/package/nixguix/loader.py @@ -160,7 +160,7 @@ ) -> Optional[Release]: return Release( name=p_info.version.encode(), - message=b"", + message=None, author=EMPTY_AUTHOR, date=None, target=directory, diff --git a/swh/loader/package/nixguix/tests/test_nixguix.py b/swh/loader/package/nixguix/tests/test_nixguix.py --- a/swh/loader/package/nixguix/tests/test_nixguix.py +++ b/swh/loader/package/nixguix/tests/test_nixguix.py @@ -28,7 +28,10 @@ MetadataAuthority, MetadataAuthorityType, MetadataFetcher, + ObjectType, + Person, RawExtrinsicMetadata, + Release, Snapshot, SnapshotBranch, TargetType, @@ -54,14 +57,14 @@ SNAPSHOT1 = Snapshot( - id=hash_to_bytes("771d13ae4e799755c22d1e05da8fc39cf215de58"), + id=hash_to_bytes("efe5145f85af3fc87f34102d8b8481cd5198f4f8"), branches={ b"evaluation": SnapshotBranch( target=hash_to_bytes("cc4e04c26672dd74e5fd0fecb78b435fb55368f7"), target_type=TargetType.REVISION, ), b"https://github.com/owner-1/repository-1/revision-1.tgz": SnapshotBranch( - target=hash_to_bytes("24853190589d26d0ea2b6c0330b553ff39176e0c"), + target=hash_to_bytes("df7811b9644ed8ef088e2e7add62ed32b0bab15f"), target_type=TargetType.RELEASE, ), }, @@ -273,8 +276,39 @@ def test_loader_one_visit(swh_storage, requests_mock_datadir, raw_sources): loader = NixGuixLoader(swh_storage, sources_url) - res = loader.load() - assert res["status"] == "eventful" + load_status = loader.load() + expected_snapshot_id_hex = "efe5145f85af3fc87f34102d8b8481cd5198f4f8" + expected_snapshot_id = hash_to_bytes(expected_snapshot_id_hex) + assert load_status == { + "status": "eventful", + "snapshot_id": expected_snapshot_id_hex, + } + + release_id = hash_to_bytes("df7811b9644ed8ef088e2e7add62ed32b0bab15f") + expected_snapshot = Snapshot( + id=expected_snapshot_id, + branches={ + b"evaluation": SnapshotBranch( + target=hash_to_bytes("cc4e04c26672dd74e5fd0fecb78b435fb55368f7"), + target_type=TargetType.REVISION, + ), + b"https://github.com/owner-1/repository-1/revision-1.tgz": SnapshotBranch( + target=release_id, target_type=TargetType.RELEASE, + ), + }, + ) + check_snapshot(expected_snapshot, storage=swh_storage) + + assert swh_storage.release_get([release_id])[0] == Release( + id=release_id, + name=b"https://github.com/owner-1/repository-1/revision-1.tgz", + message=None, + target=hash_to_bytes("4de2e07d3742718d928e974b8a4c721b9f7b33bf"), + target_type=ObjectType.DIRECTORY, + synthetic=True, + author=Person.from_fullname(b""), + date=None, + ) stats = get_stats(swh_storage) assert { @@ -413,7 +447,7 @@ loader = NixGuixLoader(swh_storage, sources_url) load_status = loader.load() - expected_snapshot_id_hex = "c5bba84fd5ac3342566effb86190619092d34e79" + expected_snapshot_id_hex = "c1983a0a3f647548e1fb92f30339da6848fe9f7a" expected_snapshot_id = hash_to_bytes(expected_snapshot_id_hex) assert load_status == { "status": "eventful", @@ -439,11 +473,11 @@ target_type=TargetType.REVISION, ), b"https://github.com/owner-1/repository-1/revision-1.tgz": SnapshotBranch( - target=hash_to_bytes("24853190589d26d0ea2b6c0330b553ff39176e0c"), + target=hash_to_bytes("df7811b9644ed8ef088e2e7add62ed32b0bab15f"), target_type=TargetType.RELEASE, ), b"https://github.com/owner-2/repository-1/revision-1.tgz": SnapshotBranch( - target=hash_to_bytes("3d44fbe814ba802cfd77f83975e45766d3a2ba85"), + target=hash_to_bytes("5cc0115cd643902b837cb6cfbc9f5865bc5a7cb2"), target_type=TargetType.RELEASE, ), }, @@ -573,7 +607,7 @@ ] archive_loader = ArchiveLoader(swh_storage, url=gnu_url, artifacts=gnu_artifacts) actual_load_status = archive_loader.load() - expected_snapshot_id = "cdf8f335fa0c81c8ad089870ec14f52b1980eb6c" + expected_snapshot_id = "af62f6f6d464f9b29f270d1bbefa355af38946c4" assert actual_load_status["status"] == "eventful" assert actual_load_status["snapshot_id"] == expected_snapshot_id # noqa diff --git a/swh/loader/package/npm/loader.py b/swh/loader/package/npm/loader.py --- a/swh/loader/package/npm/loader.py +++ b/swh/loader/package/npm/loader.py @@ -106,8 +106,8 @@ str: origin url (e.g. https://www.npmjs.com/package/) """ super().__init__(storage=storage, url=url, max_content_size=max_content_size) - package_name = url.split("https://www.npmjs.com/package/")[1] - safe_name = quote(package_name, safe="") + self.package_name = url.split("https://www.npmjs.com/package/")[1] + safe_name = quote(self.package_name, safe="") self.provider_url = f"https://replicate.npmjs.com/{safe_name}/" self._info: Dict[str, Any] = {} self._versions = None @@ -147,7 +147,10 @@ if not i_metadata: return None author = extract_npm_package_author(i_metadata) - message = i_metadata["version"].encode("ascii") + msg = ( + f"Synthetic release for NPM source package {self.package_name} " + f"version {p_info.version}" + ) if p_info.date is None: url = p_info.url @@ -164,7 +167,7 @@ r = Release( name=p_info.version.encode(), - message=message, + message=msg.encode(), author=author, date=date, target=directory, diff --git a/swh/loader/package/npm/tests/test_npm.py b/swh/loader/package/npm/tests/test_npm.py --- a/swh/loader/package/npm/tests/test_npm.py +++ b/swh/loader/package/npm/tests/test_npm.py @@ -19,9 +19,12 @@ from swh.model.model import ( Person, RawExtrinsicMetadata, + Release, Snapshot, SnapshotBranch, TargetType, + Timestamp, + TimestampWithTimezone, ) from swh.model.model import MetadataAuthority, MetadataAuthorityType, MetadataFetcher from swh.model.model import ObjectType as ModelObjectType @@ -280,13 +283,13 @@ _expected_new_releases_first_visit = normalize_hashes( { - "d25e722a32c145b3eb88b416049dd35d27759a87": ( + "adcc40ee87a3ebb1b5a82edd692cf52aa5099cee": ( "42753c0c2ab00c4501b552ac4671c68f3cf5aece" ), - "3522e846b97c0b8434c565fe891c0f082a357e5d": ( + "c781147df0e4963a0f9859134abd28296b702233": ( "3370d20d6f96dc1c9e50f083e2134881db110f4f" ), - "54f6c1711c6aedb6de3cf2d6347b9f772e343784": ( + "f544812dac98e7589155be7dfaef64477a408ec0": ( "d7895533ef5edbcffdea3f057d9fef3a1ef845ce" ), } @@ -307,7 +310,7 @@ loader = NpmLoader(swh_storage, url) actual_load_status = loader.load() - expected_snapshot_id = hash_to_bytes("ddaad89b0b4edb7eefe7c92e9b1166caa776ebbc") + expected_snapshot_id = hash_to_bytes("d24e3f10492ade1e9462ec701370fef4a79a40f1") assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id.hex(), @@ -318,9 +321,9 @@ ) versions = [ - ("0.0.2", "d25e722a32c145b3eb88b416049dd35d27759a87"), - ("0.0.3", "3522e846b97c0b8434c565fe891c0f082a357e5d"), - ("0.0.4", "54f6c1711c6aedb6de3cf2d6347b9f772e343784"), + ("0.0.2", "adcc40ee87a3ebb1b5a82edd692cf52aa5099cee"), + ("0.0.3", "c781147df0e4963a0f9859134abd28296b702233"), + ("0.0.4", "f544812dac98e7589155be7dfaef64477a408ec0"), ] expected_snapshot = Snapshot( @@ -340,6 +343,27 @@ ) check_snapshot(expected_snapshot, swh_storage) + assert swh_storage.release_get( + [hash_to_bytes("adcc40ee87a3ebb1b5a82edd692cf52aa5099cee")] + )[0] == Release( + name=b"0.0.2", + message=b"Synthetic release for NPM source package org version 0.0.2", + target=hash_to_bytes("42753c0c2ab00c4501b552ac4671c68f3cf5aece"), + target_type=ModelObjectType.DIRECTORY, + synthetic=True, + author=Person( + fullname=b"mooz ", + name=b"mooz", + email=b"stillpedant@gmail.com", + ), + date=TimestampWithTimezone( + timestamp=Timestamp(seconds=1388590833, microseconds=0), + offset=0, + negative_utc=False, + ), + id=hash_to_bytes("adcc40ee87a3ebb1b5a82edd692cf52aa5099cee"), + ) + contents = swh_storage.content_get(_expected_new_contents_first_visit) count = sum(0 if content is None else 1 for content in contents) assert count == len(_expected_new_contents_first_visit) @@ -403,7 +427,7 @@ url = package_url(package) loader = NpmLoader(swh_storage, url) - expected_snapshot_id = hash_to_bytes("ddaad89b0b4edb7eefe7c92e9b1166caa776ebbc") + expected_snapshot_id = hash_to_bytes("d24e3f10492ade1e9462ec701370fef4a79a40f1") actual_load_status = loader.load() assert actual_load_status == { "status": "eventful", @@ -466,7 +490,7 @@ loader = NpmLoader(swh_storage, url) actual_load_status = loader.load() - expected_snapshot_id = hash_to_bytes("7a89bc3cb51ff1d3213b2151c745d82c3b9d69b1") + expected_snapshot_id = hash_to_bytes("92ff37da8045f0088ed35bce0bc34e2025202825") assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id.hex(), @@ -483,11 +507,11 @@ ), b"releases/0.1.0": SnapshotBranch( target_type=TargetType.RELEASE, - target=hash_to_bytes("103fa6d0a1abb405468e3590dcf634bcb77f67be"), + target=hash_to_bytes("c5e0f0e185660b6bdd694ca5c68babe5bab20e24"), ), b"releases/0.1.1-alpha.14": SnapshotBranch( target_type=TargetType.RELEASE, - target=hash_to_bytes("c00b54143582a4e963e0b86e8dfa58eedd260020"), + target=hash_to_bytes("2f89c709eacc974b587e13f90d10a826b23a550e"), ), }, ) @@ -566,7 +590,7 @@ loader = NpmLoader(swh_storage, url) actual_load_status = loader.load() - expected_snapshot_id = hash_to_bytes("7f5e591dd3c4754abca4db1cc18355671e2c014c") + expected_snapshot_id = hash_to_bytes("2a7a67725f9c7134f56612281e8d1638f1386118") assert actual_load_status == { "status": "eventful", @@ -582,7 +606,7 @@ ), b"releases/0.0.1": SnapshotBranch( target_type=TargetType.RELEASE, - target=hash_to_bytes("199bf0ad020617357d608655e6549e526a65dc36"), + target=hash_to_bytes("68b2a100103cecec06b8dd780228bb751f2dc6f3"), ), }, ) diff --git a/swh/loader/package/opam/loader.py b/swh/loader/package/opam/loader.py --- a/swh/loader/package/opam/loader.py +++ b/swh/loader/package/opam/loader.py @@ -244,10 +244,14 @@ self, p_info: OpamPackageInfo, uncompressed_path: str, directory: Sha1Git, ) -> Optional[Release]: + msg = ( + f"Synthetic release for OPAM source package {self.opam_package} " + f"version {p_info.version}" + ) return Release( name=p_info.version.encode(), author=p_info.author, - message=str.encode(p_info.version), + message=msg.encode(), date=None, target=directory, target_type=ObjectType.DIRECTORY, diff --git a/swh/loader/package/opam/tests/test_opam.py b/swh/loader/package/opam/tests/test_opam.py --- a/swh/loader/package/opam/tests/test_opam.py +++ b/swh/loader/package/opam/tests/test_opam.py @@ -9,15 +9,15 @@ from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats from swh.model.hashutil import hash_to_bytes from swh.model.model import ( - MetadataAuthority, - MetadataAuthorityType, - MetadataFetcher, Person, RawExtrinsicMetadata, + Release, Snapshot, SnapshotBranch, TargetType, ) +from swh.model.model import MetadataAuthority, MetadataAuthorityType, MetadataFetcher +from swh.model.model import ObjectType as ModelObjectType from swh.model.swhids import CoreSWHID, ExtendedObjectType, ExtendedSWHID, ObjectType from swh.storage.interface import PagedResult @@ -110,23 +110,37 @@ actual_load_status = loader.load() - expected_snapshot_id = hash_to_bytes("50b5961c27dd4f8b138acce8bac4f90d1e33081f") + expected_snapshot_id = hash_to_bytes("e480958fa7851268be2bcc8d01145c0c9624b34b") assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id.hex(), } + release_id = hash_to_bytes("03db7f0d572509f1c7ce18c847db83070e26fd5e") + expected_snapshot = Snapshot( id=expected_snapshot_id, branches={ b"HEAD": SnapshotBranch(target=b"agrid.0.1", target_type=TargetType.ALIAS,), b"agrid.0.1": SnapshotBranch( - target=hash_to_bytes("efcb9ef9d0f2a85312463251732b42f9e45a5c12"), - target_type=TargetType.RELEASE, + target=release_id, target_type=TargetType.RELEASE, ), }, ) + assert swh_storage.release_get([release_id])[0] == Release( + name=b"0.1", + message=b"Synthetic release for OPAM source package agrid version 0.1", + target=hash_to_bytes("00412ee5bc601deb462e55addd1004715116785e"), + target_type=ModelObjectType.DIRECTORY, + synthetic=True, + author=Person( + fullname=b"OCamlPro ", name=None, email=None + ), + date=None, + id=release_id, + ) + assert_last_visit_matches( swh_storage, url, status="full", type="opam", snapshot=expected_snapshot_id ) @@ -167,7 +181,7 @@ actual_load_status = loader.load() - expected_snapshot_id = hash_to_bytes("f0a974e47999e74d323f1fb9604fde72527bda28") + expected_snapshot_id = hash_to_bytes("1a70631bee44c86dded71e0a091b1c91c110f812") assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id.hex(), @@ -180,15 +194,15 @@ target=b"directories.0.3", target_type=TargetType.ALIAS, ), b"directories.0.1": SnapshotBranch( - target=hash_to_bytes("1f839cb1f4720d6b33fdd856e3ff1119497979d9"), + target=hash_to_bytes("013d53d7e1aedbe03aaa3d5c0e6d1d780ef2634d"), target_type=TargetType.RELEASE, ), b"directories.0.2": SnapshotBranch( - target=hash_to_bytes("4133834d966381804347efbc41e35dd2bdd48962"), + target=hash_to_bytes("4fdcc3606c0af33cb4d733b70074e79f03e928a1"), target_type=TargetType.RELEASE, ), b"directories.0.3": SnapshotBranch( - target=hash_to_bytes("2f20cabfbacfe447b80dc2a4eb14d461775100c8"), + target=hash_to_bytes("5de72a60f81649157d267773c30e897b7005dcdb"), target_type=TargetType.RELEASE, ), }, @@ -222,7 +236,7 @@ actual_load_status = loader.load() - expected_snapshot_id = hash_to_bytes("987425c6fe94d3972c4c4e97ee27a6a7c8b68e82") + expected_snapshot_id = hash_to_bytes("96246035587354a71f429d5b9b8dcc98afad3708") assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id.hex(), @@ -251,7 +265,7 @@ assert branch_name == expected_branch_name assert package_info == expected_package_info - release_id = hash_to_bytes("8d0612cdf172e5dff3d876ca2bbc0f6003cc36cc") + release_id = hash_to_bytes("4904ad9d0f3b3f84cec2b899d0d05c682b0efdcb") expected_snapshot = Snapshot( id=hash_to_bytes(actual_load_status["snapshot_id"]), @@ -298,7 +312,7 @@ assert actual_load_status["status"] == "eventful" - expected_release_id = hash_to_bytes("8d0612cdf172e5dff3d876ca2bbc0f6003cc36cc") + expected_release_id = hash_to_bytes("4904ad9d0f3b3f84cec2b899d0d05c682b0efdcb") expected_snapshot = Snapshot( id=hash_to_bytes(actual_load_status["snapshot_id"]), diff --git a/swh/loader/package/pypi/loader.py b/swh/loader/package/pypi/loader.py --- a/swh/loader/package/pypi/loader.py +++ b/swh/loader/package/pypi/loader.py @@ -41,17 +41,21 @@ class PyPIPackageInfo(BasePackageInfo): raw_info = attr.ib(type=Dict[str, Any]) + name = attr.ib(type=str) comment_text = attr.ib(type=Optional[str]) sha256 = attr.ib(type=str) upload_time = attr.ib(type=str) @classmethod - def from_metadata(cls, metadata: Dict[str, Any], version: str) -> "PyPIPackageInfo": + def from_metadata( + cls, metadata: Dict[str, Any], name: str, version: str + ) -> "PyPIPackageInfo": return cls( url=metadata["url"], filename=metadata["filename"], version=version, raw_info=metadata, + name=name, comment_text=metadata.get("comment_text"), sha256=metadata["digests"]["sha256"], upload_time=metadata["upload_time"], @@ -116,7 +120,9 @@ ): continue - p_info = PyPIPackageInfo.from_metadata(meta, version=version) + p_info = PyPIPackageInfo.from_metadata( + meta, name=self.info()["info"]["name"], version=version + ) res.append((version, p_info)) if len(res) == 1: @@ -134,17 +140,22 @@ return None # from intrinsic metadata - version_ = i_metadata.get("version", "") + version_ = i_metadata.get("version", p_info.version) author_ = author(i_metadata) - # from extrinsic metadata - message = p_info.comment_text or "" - message = "%s: %s" % (version_, message) if message else version_ + if p_info.comment_text: + msg = p_info.comment_text + else: + msg = ( + f"Synthetic release for PyPI source package {p_info.name} " + f"version {version_}" + ) + date = TimestampWithTimezone.from_iso8601(p_info.upload_time) return Release( name=p_info.version.encode(), - message=message.encode(), + message=msg.encode(), author=author_, date=date, target=directory, diff --git a/swh/loader/package/pypi/tests/test_pypi.py b/swh/loader/package/pypi/tests/test_pypi.py --- a/swh/loader/package/pypi/tests/test_pypi.py +++ b/swh/loader/package/pypi/tests/test_pypi.py @@ -329,7 +329,7 @@ assert actual_load_status["status"] == "eventful" assert actual_load_status["snapshot_id"] is not None - expected_release_id = hash_to_bytes("a1e10745d375be66c1b65e55c0c15fe98776b53c") + expected_release_id = hash_to_bytes("e05d81600f3db1f905d23ab2a06ea64460c7e3f4") expected_snapshot = Snapshot( id=hash_to_bytes(actual_load_status["snapshot_id"]), @@ -338,12 +338,11 @@ target=b"releases/1.2.0", target_type=TargetType.ALIAS, ), b"releases/1.1.0": SnapshotBranch( - target=hash_to_bytes("9478c9981887fdf5ada3f1fcb20c81069cdf4c44"), + target=hash_to_bytes("ed4132a0160d97752a6ce5716722fb937a2e00b1"), target_type=TargetType.RELEASE, ), b"releases/1.2.0": SnapshotBranch( - target=hash_to_bytes("a1e10745d375be66c1b65e55c0c15fe98776b53c"), - target_type=TargetType.RELEASE, + target=expected_release_id, target_type=TargetType.RELEASE, ), }, ) @@ -397,7 +396,7 @@ loader = PyPILoader(swh_storage, url) actual_load_status = loader.load() - expected_snapshot_id = hash_to_bytes("eee24d5b0c156ebb4ece0c810c9dce636ebe881f") + expected_snapshot_id = hash_to_bytes("1838a3d6fff760338ab14b95c43c2dabcbb03c5a") assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id.hex(), @@ -411,7 +410,7 @@ id=hash_to_bytes(expected_snapshot_id), branches={ b"releases/1.2.0": SnapshotBranch( - target=hash_to_bytes("a1e10745d375be66c1b65e55c0c15fe98776b53c"), + target=hash_to_bytes("e05d81600f3db1f905d23ab2a06ea64460c7e3f4"), target_type=TargetType.RELEASE, ), b"HEAD": SnapshotBranch( @@ -443,7 +442,7 @@ loader = PyPILoader(swh_storage, url) actual_load_status = loader.load() - expected_snapshot_id = hash_to_bytes("62d957f2b5cdc515bea0a46252a3ab29ee271636") + expected_snapshot_id = hash_to_bytes("7e34abda294fb80e6d4e64637ae43fed112079ca") assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id.hex(), @@ -457,11 +456,11 @@ id=expected_snapshot_id, branches={ b"releases/1.1.0": SnapshotBranch( - target=hash_to_bytes("9478c9981887fdf5ada3f1fcb20c81069cdf4c44"), + target=hash_to_bytes("ed4132a0160d97752a6ce5716722fb937a2e00b1"), target_type=TargetType.RELEASE, ), b"releases/1.2.0": SnapshotBranch( - target=hash_to_bytes("a1e10745d375be66c1b65e55c0c15fe98776b53c"), + target=hash_to_bytes("e05d81600f3db1f905d23ab2a06ea64460c7e3f4"), target_type=TargetType.RELEASE, ), b"HEAD": SnapshotBranch( @@ -492,7 +491,7 @@ loader = PyPILoader(swh_storage, url) actual_load_status = loader.load() - snapshot_id = hash_to_bytes("62d957f2b5cdc515bea0a46252a3ab29ee271636") + snapshot_id = hash_to_bytes("7e34abda294fb80e6d4e64637ae43fed112079ca") assert actual_load_status == { "status": "eventful", "snapshot_id": snapshot_id.hex(), @@ -505,11 +504,11 @@ id=snapshot_id, branches={ b"releases/1.1.0": SnapshotBranch( - target=hash_to_bytes("9478c9981887fdf5ada3f1fcb20c81069cdf4c44"), + target=hash_to_bytes("ed4132a0160d97752a6ce5716722fb937a2e00b1"), target_type=TargetType.RELEASE, ), b"releases/1.2.0": SnapshotBranch( - target=hash_to_bytes("a1e10745d375be66c1b65e55c0c15fe98776b53c"), + target=hash_to_bytes("e05d81600f3db1f905d23ab2a06ea64460c7e3f4"), target_type=TargetType.RELEASE, ), b"HEAD": SnapshotBranch( @@ -560,7 +559,7 @@ visit1_actual_load_status = loader.load() visit1_stats = get_stats(swh_storage) - expected_snapshot_id = hash_to_bytes("62d957f2b5cdc515bea0a46252a3ab29ee271636") + expected_snapshot_id = hash_to_bytes("7e34abda294fb80e6d4e64637ae43fed112079ca") assert visit1_actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id.hex(), @@ -589,7 +588,7 @@ visit2_stats = get_stats(swh_storage) assert visit2_actual_load_status["status"] == "eventful", visit2_actual_load_status - expected_snapshot_id2 = hash_to_bytes("6a8a84e7f765bed4362315fb054adb2466598636") + expected_snapshot_id2 = hash_to_bytes("6636693213eab9000b8eee1c5bbb3f1b675a4c70") assert visit2_actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id2.hex(), @@ -603,15 +602,15 @@ id=expected_snapshot_id2, branches={ b"releases/1.1.0": SnapshotBranch( - target=hash_to_bytes("9478c9981887fdf5ada3f1fcb20c81069cdf4c44"), + target=hash_to_bytes("ed4132a0160d97752a6ce5716722fb937a2e00b1"), target_type=TargetType.RELEASE, ), b"releases/1.2.0": SnapshotBranch( - target=hash_to_bytes("a1e10745d375be66c1b65e55c0c15fe98776b53c"), + target=hash_to_bytes("e05d81600f3db1f905d23ab2a06ea64460c7e3f4"), target_type=TargetType.RELEASE, ), b"releases/1.3.0": SnapshotBranch( - target=hash_to_bytes("d46442e99bb6e05df5f75a7f0f7f61a4f2098147"), + target=hash_to_bytes("a21b09cbec8e31f47307f196bb1f939effc26e11"), target_type=TargetType.RELEASE, ), b"HEAD": SnapshotBranch( @@ -665,7 +664,7 @@ loader = PyPILoader(swh_storage, url) actual_load_status = loader.load() - expected_snapshot_id = hash_to_bytes("a136ee226316276c347d7be3da07df5828605927") + expected_snapshot_id = hash_to_bytes("a65c03a837b24720fa95622de07074e279eddd0d") assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id.hex(), @@ -679,11 +678,11 @@ id=expected_snapshot_id, branches={ b"releases/1.1.0/nexter-1.1.0.zip": SnapshotBranch( - target=hash_to_bytes("9478c9981887fdf5ada3f1fcb20c81069cdf4c44"), + target=hash_to_bytes("18d0087b1e1a3a31070d54bf3e9edbd44ab01cb5"), target_type=TargetType.RELEASE, ), b"releases/1.1.0/nexter-1.1.0.tar.gz": SnapshotBranch( - target=hash_to_bytes("b3391cb4007fb6872c4dfab476a7cfe7443a1bb4"), + target=hash_to_bytes("b2b379b3eb61adcde22e10788b1fc5f985e938d2"), target_type=TargetType.RELEASE, ), }, @@ -734,6 +733,7 @@ url=url, filename="GermlineFilter-1.2.tar.gz", version="1.2", + name="GermlineFilter", directory_extrinsic_metadata=[], raw_info={}, comment_text="", @@ -762,7 +762,10 @@ release = loader.build_release(p_info, str(tmp_path), directory) # without comment_text and version in PKG-INFO, message should be empty - assert release.message == b"" + assert ( + release.message + == b"Synthetic release for PyPI source package GermlineFilter version 1.2" + ) def test_filter_out_invalid_sdists(swh_storage, requests_mock): @@ -782,6 +785,7 @@ requests_mock.get( json_url, json={ + "info": {"name": project_name,}, "releases": { version: [ {