diff --git a/docs/package-loader-specifications.rst b/docs/package-loader-specifications.rst --- a/docs/package-loader-specifications.rst +++ b/docs/package-loader-specifications.rst @@ -24,17 +24,16 @@ - passed as arg - ``release_name(​version)`` - =version - - "swh-loader-package: - synthetic revision message" + - "Synthetic release for archive at {p_info.url}\n" - true - - SWH robot + - "" - passed as arg - * - cran - ``metadata.get(​"Version", passed as arg)`` - ``release_name(​version)`` - =version - - =version + - standard message - true - ``metadata.get(​"Maintainer", "")`` - ``metadata.get(​"Date")`` @@ -43,7 +42,7 @@ - passed as arg (eg. ``stretch/contrib/0.7.2-3``) - ``release_name(​version)`` - =version - - "Synthetic revision for Debian source package %s version %s" + - standard message (using full version) - true - ``metadata​.changelog​.person`` - ``metadata​.changelog​.date`` @@ -52,16 +51,16 @@ - HEAD - only HEAD - HEAD - - "{client}: Deposit {id} in collection {collection}" + - "{client}: Deposit {id} in collection {collection}\n" - true - - SWH robot + - original author - ```` from SWORD XML - revisions had parents * - nixguix - URL - URL - URL - - "" + - None - true - "" - None @@ -70,7 +69,7 @@ - ``metadata​["version"]`` - ``release_name(​version)`` - =version - - =version + - standard message - true - from int metadata or "" - from ext metadata or None @@ -79,7 +78,7 @@ - as given by opam - "{opam_package}​.{version}" - =version - - =version + - standard message - true - from metadata - None @@ -88,7 +87,7 @@ - ``metadata​["version"]`` - ``release_name(​version)`` or ``release_name(​version, filename)`` - =version - - "{version}: {metadata[​'comment_text']}" or just version + - ``metadata[​'comment_text']}`` or standard message - true - from int metadata or "" - from ext metadata or None @@ -101,6 +100,13 @@ return "releases/%s/%s" % (version, filename) return "releases/%s" % version +and "standard message" being:: + + msg = ( + f"Synthetic release for {PACKAGE_MANAGER} source package {name} " + f"version {version}\n" + ) + The ``target_type`` field is always ``dir``, and the target the id of a directory loaded by unpacking a tarball/zip file/... diff --git a/swh/loader/package/archive/loader.py b/swh/loader/package/archive/loader.py --- a/swh/loader/package/archive/loader.py +++ b/swh/loader/package/archive/loader.py @@ -14,17 +14,11 @@ import iso8601 from swh.loader.package.loader import BasePackageInfo, PackageLoader, PartialExtID -from swh.loader.package.utils import release_name -from swh.model.model import ObjectType, Person, Release, Sha1Git, TimestampWithTimezone +from swh.loader.package.utils import EMPTY_AUTHOR, release_name +from swh.model.model import ObjectType, Release, Sha1Git, TimestampWithTimezone from swh.storage.interface import StorageInterface logger = logging.getLogger(__name__) -SWH_PERSON = Person( - name=b"Software Heritage", - fullname=b"Software Heritage", - email=b"robot@softwareheritage.org", -) -REVISION_MESSAGE = b"swh-loader-package: synthetic revision message" @attr.s @@ -150,11 +144,12 @@ else: parsed_time = time normalized_time = TimestampWithTimezone.from_datetime(parsed_time) + msg = f"Synthetic release for archive at {p_info.url}\n" return Release( name=p_info.version.encode(), - message=REVISION_MESSAGE, + message=msg.encode(), date=normalized_time, - author=SWH_PERSON, + author=EMPTY_AUTHOR, target=directory, target_type=ObjectType.DIRECTORY, synthetic=True, diff --git a/swh/loader/package/archive/tests/test_archive.py b/swh/loader/package/archive/tests/test_archive.py --- a/swh/loader/package/archive/tests/test_archive.py +++ b/swh/loader/package/archive/tests/test_archive.py @@ -14,8 +14,17 @@ from swh.loader.package.archive.loader import ArchiveLoader, ArchivePackageInfo from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats -from swh.model.hashutil import hash_to_bytes -from swh.model.model import Snapshot, SnapshotBranch, TargetType +from swh.model.hashutil import hash_to_bytes, hash_to_hex +from swh.model.model import ( + ObjectType, + Person, + Release, + Snapshot, + SnapshotBranch, + TargetType, + Timestamp, + TimestampWithTimezone, +) URL = "https://ftp.gnu.org/gnu/8sync/" GNU_ARTIFACTS = [ @@ -77,7 +86,7 @@ ] _expected_new_releases_first_visit = { - "c9786c1e3b46f52779c727d3509d66ebf8948d88": ( + "c92b2ad9e70ef1dce455e8fe1d8e41b92512cc08": ( "3aebc29ed1fccc4a6f2f2010fb8e57882406b528" ) } @@ -131,12 +140,11 @@ assert actual_load_status["status"] == "eventful" expected_snapshot_first_visit_id = hash_to_bytes( - "cdf8f335fa0c81c8ad089870ec14f52b1980eb6c" + "9efecc835e8f99254934f256b5301b94f348fd17" ) - assert ( - hash_to_bytes(actual_load_status["snapshot_id"]) - == expected_snapshot_first_visit_id + assert actual_load_status["snapshot_id"] == hash_to_hex( + expected_snapshot_first_visit_id ) assert_last_visit_matches(swh_storage, URL, status="full", type="tar") @@ -153,6 +161,7 @@ "snapshot": 1, } == stats + release_id = hash_to_bytes(list(_expected_new_releases_first_visit)[0]) expected_snapshot = Snapshot( id=expected_snapshot_first_visit_id, branches={ @@ -160,14 +169,30 @@ target_type=TargetType.ALIAS, target=b"releases/0.1.0", ), b"releases/0.1.0": SnapshotBranch( - target_type=TargetType.RELEASE, - target=hash_to_bytes(list(_expected_new_releases_first_visit)[0]), + target_type=TargetType.RELEASE, target=release_id, ), }, ) - check_snapshot(expected_snapshot, swh_storage) + assert swh_storage.release_get([release_id])[0] == Release( + id=release_id, + name=b"0.1.0", + message=( + b"Synthetic release for archive at " + b"https://ftp.gnu.org/gnu/8sync/8sync-0.1.0.tar.gz\n" + ), + target=hash_to_bytes("3aebc29ed1fccc4a6f2f2010fb8e57882406b528"), + target_type=ObjectType.DIRECTORY, + synthetic=True, + author=Person.from_fullname(b""), + date=TimestampWithTimezone( + timestamp=Timestamp(seconds=944729610, microseconds=0), + offset=0, + negative_utc=False, + ), + ) + expected_contents = map(hash_to_bytes, _expected_new_contents_first_visit) assert list(swh_storage.content_missing_per_sha1(expected_contents)) == [] diff --git a/swh/loader/package/cran/loader.py b/swh/loader/package/cran/loader.py --- a/swh/loader/package/cran/loader.py +++ b/swh/loader/package/cran/loader.py @@ -30,6 +30,7 @@ @attr.s class CRANPackageInfo(BasePackageInfo): raw_info = attr.ib(type=Dict[str, Any]) + name = attr.ib(type=str) EXTID_TYPE = "cran-sha256" MANIFEST_FORMAT = string.Template("$version $url") @@ -41,6 +42,7 @@ url=url, filename=path.basename(url), raw_info=a_metadata, + name=a_metadata["package"], version=a_metadata["version"], ) @@ -88,9 +90,13 @@ metadata = extract_intrinsic_metadata(uncompressed_path) date = parse_date(metadata.get("Date")) author = Person.from_fullname(metadata.get("Maintainer", "").encode()) + msg = ( + f"Synthetic release for CRAN source package {p_info.name} " + f"version {p_info.version}\n" + ) return Release( name=p_info.version.encode(), - message=p_info.version.encode(), + message=msg.encode(), date=date, author=author, target_type=ObjectType.DIRECTORY, diff --git a/swh/loader/package/cran/tests/test_cran.py b/swh/loader/package/cran/tests/test_cran.py --- a/swh/loader/package/cran/tests/test_cran.py +++ b/swh/loader/package/cran/tests/test_cran.py @@ -20,17 +20,27 @@ ) from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats from swh.model.hashutil import hash_to_bytes -from swh.model.model import Snapshot, SnapshotBranch, TargetType, TimestampWithTimezone +from swh.model.model import ( + ObjectType, + Person, + Release, + Snapshot, + SnapshotBranch, + TargetType, + Timestamp, + TimestampWithTimezone, +) + +RELEASE_ID = hash_to_bytes("daaf3cffedac946060de53648994631d0b3c63bc") SNAPSHOT = Snapshot( - id=hash_to_bytes("56ed00938d83892bd5b42f2f368ae38a1dbfa718"), + id=hash_to_bytes("c0ccd6452cbe9cd4f0a523b23f09c411bd92ef4e"), branches={ b"HEAD": SnapshotBranch( target=b"releases/2.22-6", target_type=TargetType.ALIAS ), b"releases/2.22-6": SnapshotBranch( - target=hash_to_bytes("42993a72eac50a4a83523c9327a52be3593755a8"), - target_type=TargetType.RELEASE, + target=RELEASE_ID, target_type=TargetType.RELEASE, ), }, ) @@ -172,7 +182,15 @@ f"{base_url}/src_contrib_1.4.0_Recommended_KernSmooth_{version}.tar.gz" # noqa ) loader = CRANLoader( - swh_storage, origin_url, artifacts=[{"url": artifact_url, "version": version,}] + swh_storage, + origin_url, + artifacts=[ + { + "url": artifact_url, + "version": version, + "package": "Recommended_KernSmooth", + } + ], ) actual_load_status = loader.load() @@ -188,6 +206,28 @@ check_snapshot(SNAPSHOT, swh_storage) + assert swh_storage.release_get([RELEASE_ID])[0] == Release( + id=RELEASE_ID, + name=b"2.22-6", + message=( + b"Synthetic release for CRAN source package " + b"Recommended_KernSmooth version 2.22-6\n" + ), + target=hash_to_bytes("ff64177fea3f4a5136b9caf7581a4f7d4cf65296"), + target_type=ObjectType.DIRECTORY, + synthetic=True, + author=Person( + fullname=b"Brian Ripley ", + name=b"Brian Ripley", + email=b"ripley@stats.ox.ac.uk", + ), + date=TimestampWithTimezone( + timestamp=Timestamp(seconds=991958400, microseconds=0), + offset=0, + negative_utc=False, + ), + ) + visit_stats = get_stats(swh_storage) assert { "content": 33, @@ -218,7 +258,15 @@ f"{base_url}/src_contrib_1.4.0_Recommended_KernSmooth_{version}.tar.gz" # noqa ) loader = CRANLoader( - swh_storage, origin_url, artifacts=[{"url": artifact_url, "version": version}] + swh_storage, + origin_url, + artifacts=[ + { + "url": artifact_url, + "version": version, + "package": "Recommended_KernSmooth", + } + ], ) # first visit @@ -342,7 +390,13 @@ loader = CRANLoader( swh_storage, origin_url, - artifacts=[{"url": artifact_url, "version": version}], + artifacts=[ + { + "url": artifact_url, + "version": version, + "package": "Recommended_KernSmooth", + } + ], ) actual_load_status = loader.load() diff --git a/swh/loader/package/debian/loader.py b/swh/loader/package/debian/loader.py --- a/swh/loader/package/debian/loader.py +++ b/swh/loader/package/debian/loader.py @@ -224,9 +224,9 @@ logger.debug("intrinsic_metadata: %s", intrinsic_metadata) logger.debug("p_info: %s", p_info) - msg = "Synthetic revision for Debian source package %s version %s" % ( - p_info.name, - p_info.full_version, + msg = ( + f"Synthetic release for Debian source package {p_info.name} " + f"version {p_info.full_version}\n" ) author = prepare_person(intrinsic_metadata.changelog.person) @@ -235,7 +235,7 @@ # inspired from swh.loader.debian.converters.package_metadata_to_revision return Release( name=p_info.version.encode(), - message=msg.encode("utf-8"), + message=msg.encode(), author=author, date=date, target=directory, diff --git a/swh/loader/package/debian/tests/test_debian.py b/swh/loader/package/debian/tests/test_debian.py --- a/swh/loader/package/debian/tests/test_debian.py +++ b/swh/loader/package/debian/tests/test_debian.py @@ -23,7 +23,16 @@ ) from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats from swh.model.hashutil import hash_to_bytes -from swh.model.model import Person, Snapshot, SnapshotBranch, TargetType +from swh.model.model import ( + ObjectType, + Person, + Release, + Snapshot, + SnapshotBranch, + TargetType, + Timestamp, + TimestampWithTimezone, +) logger = logging.getLogger(__name__) @@ -110,7 +119,7 @@ ) actual_load_status = loader.load() - expected_snapshot_id = "8bc5d12e2443ab216fdd2f969b25b39e96c20fef" + expected_snapshot_id = "ad1367b5470a03857be7c7325a5a8bde698e1800" assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id, @@ -124,18 +133,38 @@ snapshot=hash_to_bytes(expected_snapshot_id), ) + release_id = hash_to_bytes("73e0ede9c21f7074ad1f9c81a774cfcb9e02addf") + expected_snapshot = Snapshot( id=hash_to_bytes(expected_snapshot_id), branches={ b"releases/stretch/contrib/0.7.2-3": SnapshotBranch( - target_type=TargetType.RELEASE, - target=hash_to_bytes("5a99736512d381700c5f54d7fdd6b46e136535a2"), + target_type=TargetType.RELEASE, target=release_id, ) }, ) # different than the previous loader as no release is done check_snapshot(expected_snapshot, swh_storage) + assert swh_storage.release_get([release_id])[0] == Release( + id=release_id, + name=b"stretch/contrib/0.7.2-3", + message=b"Synthetic release for Debian source package cicero version 0.7.2-3\n", + target=hash_to_bytes("798df511408c53bf842a8e54d4d335537836bdc3"), + target_type=ObjectType.DIRECTORY, + synthetic=True, + author=Person( + fullname=b"Samuel Thibault ", + name=b"Samuel Thibault", + email=b"sthibault@debian.org", + ), + date=TimestampWithTimezone( + timestamp=Timestamp(seconds=1413730355, microseconds=0), + offset=120, + negative_utc=False, + ), + ) + stats = get_stats(swh_storage) assert { "content": 42, @@ -162,7 +191,7 @@ actual_load_status = loader.load() - expected_snapshot_id = "8bc5d12e2443ab216fdd2f969b25b39e96c20fef" + expected_snapshot_id = "ad1367b5470a03857be7c7325a5a8bde698e1800" assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id, @@ -181,7 +210,7 @@ branches={ b"releases/stretch/contrib/0.7.2-3": SnapshotBranch( target_type=TargetType.RELEASE, - target=hash_to_bytes("5a99736512d381700c5f54d7fdd6b46e136535a2"), + target=hash_to_bytes("73e0ede9c21f7074ad1f9c81a774cfcb9e02addf"), ) }, ) # different than the previous loader as no release is done @@ -418,7 +447,7 @@ ) actual_load_status = loader.load() - expected_snapshot_id = "3d26243c91eb084c350627a5a102cfe039c5b92a" + expected_snapshot_id = "a83fa5c089b048161f0677b9614a4aae96a6ca18" assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id, @@ -437,11 +466,11 @@ branches={ b"releases/stretch/contrib/0.7.2-3": SnapshotBranch( target_type=TargetType.RELEASE, - target=hash_to_bytes("5a99736512d381700c5f54d7fdd6b46e136535a2"), + target=hash_to_bytes("73e0ede9c21f7074ad1f9c81a774cfcb9e02addf"), ), b"releases/buster/contrib/0.7.2-4": SnapshotBranch( target_type=TargetType.RELEASE, - target=hash_to_bytes("192fc7ccce80f64a0d3cf33d379133af067ec721"), + target=hash_to_bytes("9f6d8d868514f991af0d9f5d7173aba1236a5a75"), ), }, ) diff --git a/swh/loader/package/deposit/loader.py b/swh/loader/package/deposit/loader.py --- a/swh/loader/package/deposit/loader.py +++ b/swh/loader/package/deposit/loader.py @@ -196,7 +196,7 @@ self, p_info: DepositPackageInfo, uncompressed_path: str, directory: Sha1Git, ) -> Optional[Release]: message = ( - f"{p_info.client}: Deposit {p_info.id} in collection {p_info.collection}" + f"{p_info.client}: Deposit {p_info.id} in collection {p_info.collection}\n" ).encode("utf-8") return Release( diff --git a/swh/loader/package/deposit/tests/test_deposit.py b/swh/loader/package/deposit/tests/test_deposit.py --- a/swh/loader/package/deposit/tests/test_deposit.py +++ b/swh/loader/package/deposit/tests/test_deposit.py @@ -169,7 +169,7 @@ ) actual_load_status = loader.load() - expected_snapshot_id = "1090aaadc9fd1a77798bf6187d309145cbd23c53" + expected_snapshot_id = "338b45d87e02fb5cbf324694bc4a898623d6a30f" assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id, @@ -183,7 +183,7 @@ snapshot=hash_to_bytes(expected_snapshot_id), ) - release_id_hex = "77c127bff4f9137baf26774fe19e29d82a41f69d" + release_id_hex = "2566a64a27bc00362e265be9666d7606750530a1" release_id = hash_to_bytes(release_id_hex) expected_snapshot = Snapshot( @@ -208,7 +208,7 @@ assert release == Release( id=release_id, name=b"HEAD", - message=b"hal: Deposit 666 in collection hal", + message=b"hal: Deposit 666 in collection hal\n", author=person, date=date, target_type=ModelObjectType.DIRECTORY, @@ -300,7 +300,7 @@ ) actual_load_status = loader.load() - expected_snapshot_id = "f87b25c121d9ab3ff0219b04b92d83f8c6f368f4" + expected_snapshot_id = "3449b8ff31abeacefd33cca60e3074c1649dc3a1" assert actual_load_status == { "status": "eventful", @@ -314,7 +314,7 @@ snapshot=hash_to_bytes(expected_snapshot_id), ) - release_id = "c6891941d4033f4fb1dbf39b501c819ac618f957" + release_id = "ba6c9a59ae3256e765d32b211cc183dc2380aed7" expected_snapshot = Snapshot( id=hash_to_bytes(expected_snapshot_id), branches={ @@ -479,7 +479,7 @@ loader = DepositLoader(swh_storage, url, deposit_id, deposit_client) actual_load_status = loader.load() - expected_snapshot_id = "212228fe041c763471c14545cf11dbec8003d6b4" + expected_snapshot_id = "4677843de89e398f1d6bfedc9ca9b89c451c55c8" assert actual_load_status == { "status": "eventful", diff --git a/swh/loader/package/loader.py b/swh/loader/package/loader.py --- a/swh/loader/package/loader.py +++ b/swh/loader/package/loader.py @@ -1034,9 +1034,12 @@ def rev2rel(rev: Revision, version: str) -> Release: """Converts a revision to a release.""" + message = rev.message + if message and not message.endswith(b"\n"): + message += b"\n" return Release( name=version.encode(), - message=rev.message, + message=message, target=rev.directory, target_type=ModelObjectType.DIRECTORY, synthetic=rev.synthetic, diff --git a/swh/loader/package/nixguix/loader.py b/swh/loader/package/nixguix/loader.py --- a/swh/loader/package/nixguix/loader.py +++ b/swh/loader/package/nixguix/loader.py @@ -160,7 +160,7 @@ ) -> Optional[Release]: return Release( name=p_info.version.encode(), - message=b"", + message=None, author=EMPTY_AUTHOR, date=None, target=directory, diff --git a/swh/loader/package/nixguix/tests/test_nixguix.py b/swh/loader/package/nixguix/tests/test_nixguix.py --- a/swh/loader/package/nixguix/tests/test_nixguix.py +++ b/swh/loader/package/nixguix/tests/test_nixguix.py @@ -28,7 +28,10 @@ MetadataAuthority, MetadataAuthorityType, MetadataFetcher, + ObjectType, + Person, RawExtrinsicMetadata, + Release, Snapshot, SnapshotBranch, TargetType, @@ -54,14 +57,14 @@ SNAPSHOT1 = Snapshot( - id=hash_to_bytes("771d13ae4e799755c22d1e05da8fc39cf215de58"), + id=hash_to_bytes("efe5145f85af3fc87f34102d8b8481cd5198f4f8"), branches={ b"evaluation": SnapshotBranch( target=hash_to_bytes("cc4e04c26672dd74e5fd0fecb78b435fb55368f7"), target_type=TargetType.REVISION, ), b"https://github.com/owner-1/repository-1/revision-1.tgz": SnapshotBranch( - target=hash_to_bytes("24853190589d26d0ea2b6c0330b553ff39176e0c"), + target=hash_to_bytes("df7811b9644ed8ef088e2e7add62ed32b0bab15f"), target_type=TargetType.RELEASE, ), }, @@ -273,8 +276,39 @@ def test_loader_one_visit(swh_storage, requests_mock_datadir, raw_sources): loader = NixGuixLoader(swh_storage, sources_url) - res = loader.load() - assert res["status"] == "eventful" + load_status = loader.load() + expected_snapshot_id_hex = "efe5145f85af3fc87f34102d8b8481cd5198f4f8" + expected_snapshot_id = hash_to_bytes(expected_snapshot_id_hex) + assert load_status == { + "status": "eventful", + "snapshot_id": expected_snapshot_id_hex, + } + + release_id = hash_to_bytes("df7811b9644ed8ef088e2e7add62ed32b0bab15f") + expected_snapshot = Snapshot( + id=expected_snapshot_id, + branches={ + b"evaluation": SnapshotBranch( + target=hash_to_bytes("cc4e04c26672dd74e5fd0fecb78b435fb55368f7"), + target_type=TargetType.REVISION, + ), + b"https://github.com/owner-1/repository-1/revision-1.tgz": SnapshotBranch( + target=release_id, target_type=TargetType.RELEASE, + ), + }, + ) + check_snapshot(expected_snapshot, storage=swh_storage) + + assert swh_storage.release_get([release_id])[0] == Release( + id=release_id, + name=b"https://github.com/owner-1/repository-1/revision-1.tgz", + message=None, + target=hash_to_bytes("4de2e07d3742718d928e974b8a4c721b9f7b33bf"), + target_type=ObjectType.DIRECTORY, + synthetic=True, + author=Person.from_fullname(b""), + date=None, + ) stats = get_stats(swh_storage) assert { @@ -413,7 +447,7 @@ loader = NixGuixLoader(swh_storage, sources_url) load_status = loader.load() - expected_snapshot_id_hex = "c5bba84fd5ac3342566effb86190619092d34e79" + expected_snapshot_id_hex = "c1983a0a3f647548e1fb92f30339da6848fe9f7a" expected_snapshot_id = hash_to_bytes(expected_snapshot_id_hex) assert load_status == { "status": "eventful", @@ -439,11 +473,11 @@ target_type=TargetType.REVISION, ), b"https://github.com/owner-1/repository-1/revision-1.tgz": SnapshotBranch( - target=hash_to_bytes("24853190589d26d0ea2b6c0330b553ff39176e0c"), + target=hash_to_bytes("df7811b9644ed8ef088e2e7add62ed32b0bab15f"), target_type=TargetType.RELEASE, ), b"https://github.com/owner-2/repository-1/revision-1.tgz": SnapshotBranch( - target=hash_to_bytes("3d44fbe814ba802cfd77f83975e45766d3a2ba85"), + target=hash_to_bytes("5cc0115cd643902b837cb6cfbc9f5865bc5a7cb2"), target_type=TargetType.RELEASE, ), }, @@ -573,7 +607,7 @@ ] archive_loader = ArchiveLoader(swh_storage, url=gnu_url, artifacts=gnu_artifacts) actual_load_status = archive_loader.load() - expected_snapshot_id = "cdf8f335fa0c81c8ad089870ec14f52b1980eb6c" + expected_snapshot_id = "9efecc835e8f99254934f256b5301b94f348fd17" assert actual_load_status["status"] == "eventful" assert actual_load_status["snapshot_id"] == expected_snapshot_id # noqa diff --git a/swh/loader/package/npm/loader.py b/swh/loader/package/npm/loader.py --- a/swh/loader/package/npm/loader.py +++ b/swh/loader/package/npm/loader.py @@ -106,8 +106,8 @@ str: origin url (e.g. https://www.npmjs.com/package/) """ super().__init__(storage=storage, url=url, max_content_size=max_content_size) - package_name = url.split("https://www.npmjs.com/package/")[1] - safe_name = quote(package_name, safe="") + self.package_name = url.split("https://www.npmjs.com/package/")[1] + safe_name = quote(self.package_name, safe="") self.provider_url = f"https://replicate.npmjs.com/{safe_name}/" self._info: Dict[str, Any] = {} self._versions = None @@ -147,7 +147,10 @@ if not i_metadata: return None author = extract_npm_package_author(i_metadata) - message = i_metadata["version"].encode("ascii") + msg = ( + f"Synthetic release for NPM source package {self.package_name} " + f"version {p_info.version}\n" + ) if p_info.date is None: url = p_info.url @@ -164,7 +167,7 @@ r = Release( name=p_info.version.encode(), - message=message, + message=msg.encode(), author=author, date=date, target=directory, diff --git a/swh/loader/package/npm/tests/test_npm.py b/swh/loader/package/npm/tests/test_npm.py --- a/swh/loader/package/npm/tests/test_npm.py +++ b/swh/loader/package/npm/tests/test_npm.py @@ -19,9 +19,12 @@ from swh.model.model import ( Person, RawExtrinsicMetadata, + Release, Snapshot, SnapshotBranch, TargetType, + Timestamp, + TimestampWithTimezone, ) from swh.model.model import MetadataAuthority, MetadataAuthorityType, MetadataFetcher from swh.model.model import ObjectType as ModelObjectType @@ -280,13 +283,13 @@ _expected_new_releases_first_visit = normalize_hashes( { - "d25e722a32c145b3eb88b416049dd35d27759a87": ( + "d38cc0b571cd41f3c85513864e049766b42032a7": ( "42753c0c2ab00c4501b552ac4671c68f3cf5aece" ), - "3522e846b97c0b8434c565fe891c0f082a357e5d": ( + "62bf7076bae9aa2cb4d6cb3bf7ce0ea4fdd5b295": ( "3370d20d6f96dc1c9e50f083e2134881db110f4f" ), - "54f6c1711c6aedb6de3cf2d6347b9f772e343784": ( + "6e976db82f6c310596b21fb0ed8b11f507631434": ( "d7895533ef5edbcffdea3f057d9fef3a1ef845ce" ), } @@ -307,7 +310,7 @@ loader = NpmLoader(swh_storage, url) actual_load_status = loader.load() - expected_snapshot_id = hash_to_bytes("ddaad89b0b4edb7eefe7c92e9b1166caa776ebbc") + expected_snapshot_id = hash_to_bytes("0996ca28d6280499abcf485b51c4e3941b057249") assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id.hex(), @@ -317,10 +320,11 @@ swh_storage, url, status="full", type="npm", snapshot=expected_snapshot_id ) + release_id = "d38cc0b571cd41f3c85513864e049766b42032a7" versions = [ - ("0.0.2", "d25e722a32c145b3eb88b416049dd35d27759a87"), - ("0.0.3", "3522e846b97c0b8434c565fe891c0f082a357e5d"), - ("0.0.4", "54f6c1711c6aedb6de3cf2d6347b9f772e343784"), + ("0.0.2", release_id), + ("0.0.3", "62bf7076bae9aa2cb4d6cb3bf7ce0ea4fdd5b295"), + ("0.0.4", "6e976db82f6c310596b21fb0ed8b11f507631434"), ] expected_snapshot = Snapshot( @@ -340,6 +344,25 @@ ) check_snapshot(expected_snapshot, swh_storage) + assert swh_storage.release_get([hash_to_bytes(release_id)])[0] == Release( + name=b"0.0.2", + message=b"Synthetic release for NPM source package org version 0.0.2\n", + target=hash_to_bytes("42753c0c2ab00c4501b552ac4671c68f3cf5aece"), + target_type=ModelObjectType.DIRECTORY, + synthetic=True, + author=Person( + fullname=b"mooz ", + name=b"mooz", + email=b"stillpedant@gmail.com", + ), + date=TimestampWithTimezone( + timestamp=Timestamp(seconds=1388590833, microseconds=0), + offset=0, + negative_utc=False, + ), + id=hash_to_bytes(release_id), + ) + contents = swh_storage.content_get(_expected_new_contents_first_visit) count = sum(0 if content is None else 1 for content in contents) assert count == len(_expected_new_contents_first_visit) @@ -403,7 +426,7 @@ url = package_url(package) loader = NpmLoader(swh_storage, url) - expected_snapshot_id = hash_to_bytes("ddaad89b0b4edb7eefe7c92e9b1166caa776ebbc") + expected_snapshot_id = hash_to_bytes("0996ca28d6280499abcf485b51c4e3941b057249") actual_load_status = loader.load() assert actual_load_status == { "status": "eventful", @@ -466,7 +489,7 @@ loader = NpmLoader(swh_storage, url) actual_load_status = loader.load() - expected_snapshot_id = hash_to_bytes("7a89bc3cb51ff1d3213b2151c745d82c3b9d69b1") + expected_snapshot_id = hash_to_bytes("ebbe6397d0c2a6cf7cba40fa5b043c59dd4f2497") assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id.hex(), @@ -483,11 +506,11 @@ ), b"releases/0.1.0": SnapshotBranch( target_type=TargetType.RELEASE, - target=hash_to_bytes("103fa6d0a1abb405468e3590dcf634bcb77f67be"), + target=hash_to_bytes("04c66f3a82aa001e8f1b45246b58b82d2b0ca0df"), ), b"releases/0.1.1-alpha.14": SnapshotBranch( target_type=TargetType.RELEASE, - target=hash_to_bytes("c00b54143582a4e963e0b86e8dfa58eedd260020"), + target=hash_to_bytes("90cc04dc72193f3b1444f10e1c525bee2ea9dac6"), ), }, ) @@ -566,7 +589,7 @@ loader = NpmLoader(swh_storage, url) actual_load_status = loader.load() - expected_snapshot_id = hash_to_bytes("7f5e591dd3c4754abca4db1cc18355671e2c014c") + expected_snapshot_id = hash_to_bytes("33b8f105d48ce16b6c59158af660e0cc78bcbef4") assert actual_load_status == { "status": "eventful", @@ -582,7 +605,7 @@ ), b"releases/0.0.1": SnapshotBranch( target_type=TargetType.RELEASE, - target=hash_to_bytes("199bf0ad020617357d608655e6549e526a65dc36"), + target=hash_to_bytes("3e3b800570869fa9b3dbc302500553e62400cc06"), ), }, ) diff --git a/swh/loader/package/opam/loader.py b/swh/loader/package/opam/loader.py --- a/swh/loader/package/opam/loader.py +++ b/swh/loader/package/opam/loader.py @@ -244,10 +244,14 @@ self, p_info: OpamPackageInfo, uncompressed_path: str, directory: Sha1Git, ) -> Optional[Release]: + msg = ( + f"Synthetic release for OPAM source package {self.opam_package} " + f"version {p_info.version}\n" + ) return Release( name=p_info.version.encode(), author=p_info.author, - message=str.encode(p_info.version), + message=msg.encode(), date=None, target=directory, target_type=ObjectType.DIRECTORY, diff --git a/swh/loader/package/opam/tests/test_opam.py b/swh/loader/package/opam/tests/test_opam.py --- a/swh/loader/package/opam/tests/test_opam.py +++ b/swh/loader/package/opam/tests/test_opam.py @@ -9,15 +9,15 @@ from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats from swh.model.hashutil import hash_to_bytes from swh.model.model import ( - MetadataAuthority, - MetadataAuthorityType, - MetadataFetcher, Person, RawExtrinsicMetadata, + Release, Snapshot, SnapshotBranch, TargetType, ) +from swh.model.model import MetadataAuthority, MetadataAuthorityType, MetadataFetcher +from swh.model.model import ObjectType as ModelObjectType from swh.model.swhids import CoreSWHID, ExtendedObjectType, ExtendedSWHID, ObjectType from swh.storage.interface import PagedResult @@ -110,29 +110,43 @@ actual_load_status = loader.load() - expected_snapshot_id = hash_to_bytes("50b5961c27dd4f8b138acce8bac4f90d1e33081f") + expected_snapshot_id = hash_to_bytes("e1159446b00745ba4daa7ee26d74fbd81ecc081c") assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id.hex(), } + assert_last_visit_matches( + swh_storage, url, status="full", type="opam", snapshot=expected_snapshot_id + ) + + release_id = hash_to_bytes("d4d8d3df4f34609a3eeabd48aea49002c5f54f41") + expected_snapshot = Snapshot( id=expected_snapshot_id, branches={ b"HEAD": SnapshotBranch(target=b"agrid.0.1", target_type=TargetType.ALIAS,), b"agrid.0.1": SnapshotBranch( - target=hash_to_bytes("efcb9ef9d0f2a85312463251732b42f9e45a5c12"), - target_type=TargetType.RELEASE, + target=release_id, target_type=TargetType.RELEASE, ), }, ) - assert_last_visit_matches( - swh_storage, url, status="full", type="opam", snapshot=expected_snapshot_id - ) - check_snapshot(expected_snapshot, swh_storage) + assert swh_storage.release_get([release_id])[0] == Release( + name=b"0.1", + message=b"Synthetic release for OPAM source package agrid version 0.1\n", + target=hash_to_bytes("00412ee5bc601deb462e55addd1004715116785e"), + target_type=ModelObjectType.DIRECTORY, + synthetic=True, + author=Person( + fullname=b"OCamlPro ", name=None, email=None + ), + date=None, + id=release_id, + ) + stats = get_stats(swh_storage) assert { @@ -167,7 +181,7 @@ actual_load_status = loader.load() - expected_snapshot_id = hash_to_bytes("f0a974e47999e74d323f1fb9604fde72527bda28") + expected_snapshot_id = hash_to_bytes("f498f7f3b0edbce5cf5834b487a4f8360f6a6a43") assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id.hex(), @@ -180,15 +194,15 @@ target=b"directories.0.3", target_type=TargetType.ALIAS, ), b"directories.0.1": SnapshotBranch( - target=hash_to_bytes("1f839cb1f4720d6b33fdd856e3ff1119497979d9"), + target=hash_to_bytes("1c88d466b3d57a619e296999322d096fa37bb1c2"), target_type=TargetType.RELEASE, ), b"directories.0.2": SnapshotBranch( - target=hash_to_bytes("4133834d966381804347efbc41e35dd2bdd48962"), + target=hash_to_bytes("d6f30684039ad485511a138e2ae504ff67a13075"), target_type=TargetType.RELEASE, ), b"directories.0.3": SnapshotBranch( - target=hash_to_bytes("2f20cabfbacfe447b80dc2a4eb14d461775100c8"), + target=hash_to_bytes("6cf92c0ff052074e69ac18809a9c8198bcc2e746"), target_type=TargetType.RELEASE, ), }, @@ -222,7 +236,7 @@ actual_load_status = loader.load() - expected_snapshot_id = hash_to_bytes("987425c6fe94d3972c4c4e97ee27a6a7c8b68e82") + expected_snapshot_id = hash_to_bytes("8ba39f050243a72ca667c5587a87413240cbaa47") assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id.hex(), @@ -251,7 +265,7 @@ assert branch_name == expected_branch_name assert package_info == expected_package_info - release_id = hash_to_bytes("8d0612cdf172e5dff3d876ca2bbc0f6003cc36cc") + release_id = hash_to_bytes("c231e541eb29c712635ada394b04127ac69e9fb0") expected_snapshot = Snapshot( id=hash_to_bytes(actual_load_status["snapshot_id"]), @@ -298,7 +312,7 @@ assert actual_load_status["status"] == "eventful" - expected_release_id = hash_to_bytes("8d0612cdf172e5dff3d876ca2bbc0f6003cc36cc") + expected_release_id = hash_to_bytes("c231e541eb29c712635ada394b04127ac69e9fb0") expected_snapshot = Snapshot( id=hash_to_bytes(actual_load_status["snapshot_id"]), diff --git a/swh/loader/package/pypi/loader.py b/swh/loader/package/pypi/loader.py --- a/swh/loader/package/pypi/loader.py +++ b/swh/loader/package/pypi/loader.py @@ -41,17 +41,21 @@ class PyPIPackageInfo(BasePackageInfo): raw_info = attr.ib(type=Dict[str, Any]) + name = attr.ib(type=str) comment_text = attr.ib(type=Optional[str]) sha256 = attr.ib(type=str) upload_time = attr.ib(type=str) @classmethod - def from_metadata(cls, metadata: Dict[str, Any], version: str) -> "PyPIPackageInfo": + def from_metadata( + cls, metadata: Dict[str, Any], name: str, version: str + ) -> "PyPIPackageInfo": return cls( url=metadata["url"], filename=metadata["filename"], version=version, raw_info=metadata, + name=name, comment_text=metadata.get("comment_text"), sha256=metadata["digests"]["sha256"], upload_time=metadata["upload_time"], @@ -116,7 +120,9 @@ ): continue - p_info = PyPIPackageInfo.from_metadata(meta, version=version) + p_info = PyPIPackageInfo.from_metadata( + meta, name=self.info()["info"]["name"], version=version + ) res.append((version, p_info)) if len(res) == 1: @@ -134,17 +140,22 @@ return None # from intrinsic metadata - version_ = i_metadata.get("version", "") + version_ = i_metadata.get("version", p_info.version) author_ = author(i_metadata) - # from extrinsic metadata - message = p_info.comment_text or "" - message = "%s: %s" % (version_, message) if message else version_ + if p_info.comment_text: + msg = p_info.comment_text + else: + msg = ( + f"Synthetic release for PyPI source package {p_info.name} " + f"version {version_}\n" + ) + date = TimestampWithTimezone.from_iso8601(p_info.upload_time) return Release( name=p_info.version.encode(), - message=message.encode(), + message=msg.encode(), author=author_, date=date, target=directory, diff --git a/swh/loader/package/pypi/tests/test_pypi.py b/swh/loader/package/pypi/tests/test_pypi.py --- a/swh/loader/package/pypi/tests/test_pypi.py +++ b/swh/loader/package/pypi/tests/test_pypi.py @@ -329,7 +329,7 @@ assert actual_load_status["status"] == "eventful" assert actual_load_status["snapshot_id"] is not None - expected_release_id = hash_to_bytes("a1e10745d375be66c1b65e55c0c15fe98776b53c") + expected_release_id = hash_to_bytes("fbbcb817f01111b06442cdcc93140ab3cc777d68") expected_snapshot = Snapshot( id=hash_to_bytes(actual_load_status["snapshot_id"]), @@ -338,12 +338,11 @@ target=b"releases/1.2.0", target_type=TargetType.ALIAS, ), b"releases/1.1.0": SnapshotBranch( - target=hash_to_bytes("9478c9981887fdf5ada3f1fcb20c81069cdf4c44"), + target=hash_to_bytes("f8789ff3ed70a5f570c35d885c7bcfda7b23b091"), target_type=TargetType.RELEASE, ), b"releases/1.2.0": SnapshotBranch( - target=hash_to_bytes("a1e10745d375be66c1b65e55c0c15fe98776b53c"), - target_type=TargetType.RELEASE, + target=expected_release_id, target_type=TargetType.RELEASE, ), }, ) @@ -397,7 +396,7 @@ loader = PyPILoader(swh_storage, url) actual_load_status = loader.load() - expected_snapshot_id = hash_to_bytes("eee24d5b0c156ebb4ece0c810c9dce636ebe881f") + expected_snapshot_id = hash_to_bytes("00785a38479abe5fbfa402df96be26d2ddf89c97") assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id.hex(), @@ -411,7 +410,7 @@ id=hash_to_bytes(expected_snapshot_id), branches={ b"releases/1.2.0": SnapshotBranch( - target=hash_to_bytes("a1e10745d375be66c1b65e55c0c15fe98776b53c"), + target=hash_to_bytes("fbbcb817f01111b06442cdcc93140ab3cc777d68"), target_type=TargetType.RELEASE, ), b"HEAD": SnapshotBranch( @@ -443,7 +442,7 @@ loader = PyPILoader(swh_storage, url) actual_load_status = loader.load() - expected_snapshot_id = hash_to_bytes("62d957f2b5cdc515bea0a46252a3ab29ee271636") + expected_snapshot_id = hash_to_bytes("3dd50c1a0e48a7625cf1427e3190a65b787c774e") assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id.hex(), @@ -457,11 +456,11 @@ id=expected_snapshot_id, branches={ b"releases/1.1.0": SnapshotBranch( - target=hash_to_bytes("9478c9981887fdf5ada3f1fcb20c81069cdf4c44"), + target=hash_to_bytes("f8789ff3ed70a5f570c35d885c7bcfda7b23b091"), target_type=TargetType.RELEASE, ), b"releases/1.2.0": SnapshotBranch( - target=hash_to_bytes("a1e10745d375be66c1b65e55c0c15fe98776b53c"), + target=hash_to_bytes("fbbcb817f01111b06442cdcc93140ab3cc777d68"), target_type=TargetType.RELEASE, ), b"HEAD": SnapshotBranch( @@ -492,7 +491,7 @@ loader = PyPILoader(swh_storage, url) actual_load_status = loader.load() - snapshot_id = hash_to_bytes("62d957f2b5cdc515bea0a46252a3ab29ee271636") + snapshot_id = hash_to_bytes("3dd50c1a0e48a7625cf1427e3190a65b787c774e") assert actual_load_status == { "status": "eventful", "snapshot_id": snapshot_id.hex(), @@ -505,11 +504,11 @@ id=snapshot_id, branches={ b"releases/1.1.0": SnapshotBranch( - target=hash_to_bytes("9478c9981887fdf5ada3f1fcb20c81069cdf4c44"), + target=hash_to_bytes("f8789ff3ed70a5f570c35d885c7bcfda7b23b091"), target_type=TargetType.RELEASE, ), b"releases/1.2.0": SnapshotBranch( - target=hash_to_bytes("a1e10745d375be66c1b65e55c0c15fe98776b53c"), + target=hash_to_bytes("fbbcb817f01111b06442cdcc93140ab3cc777d68"), target_type=TargetType.RELEASE, ), b"HEAD": SnapshotBranch( @@ -560,7 +559,7 @@ visit1_actual_load_status = loader.load() visit1_stats = get_stats(swh_storage) - expected_snapshot_id = hash_to_bytes("62d957f2b5cdc515bea0a46252a3ab29ee271636") + expected_snapshot_id = hash_to_bytes("3dd50c1a0e48a7625cf1427e3190a65b787c774e") assert visit1_actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id.hex(), @@ -589,7 +588,7 @@ visit2_stats = get_stats(swh_storage) assert visit2_actual_load_status["status"] == "eventful", visit2_actual_load_status - expected_snapshot_id2 = hash_to_bytes("6a8a84e7f765bed4362315fb054adb2466598636") + expected_snapshot_id2 = hash_to_bytes("77febe6ff0faf6cc00dd015a6c9763579a9fb6c7") assert visit2_actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id2.hex(), @@ -603,15 +602,15 @@ id=expected_snapshot_id2, branches={ b"releases/1.1.0": SnapshotBranch( - target=hash_to_bytes("9478c9981887fdf5ada3f1fcb20c81069cdf4c44"), + target=hash_to_bytes("f8789ff3ed70a5f570c35d885c7bcfda7b23b091"), target_type=TargetType.RELEASE, ), b"releases/1.2.0": SnapshotBranch( - target=hash_to_bytes("a1e10745d375be66c1b65e55c0c15fe98776b53c"), + target=hash_to_bytes("fbbcb817f01111b06442cdcc93140ab3cc777d68"), target_type=TargetType.RELEASE, ), b"releases/1.3.0": SnapshotBranch( - target=hash_to_bytes("d46442e99bb6e05df5f75a7f0f7f61a4f2098147"), + target=hash_to_bytes("a21b09cbec8e31f47307f196bb1f939effc26e11"), target_type=TargetType.RELEASE, ), b"HEAD": SnapshotBranch( @@ -665,7 +664,7 @@ loader = PyPILoader(swh_storage, url) actual_load_status = loader.load() - expected_snapshot_id = hash_to_bytes("a136ee226316276c347d7be3da07df5828605927") + expected_snapshot_id = hash_to_bytes("1394b2e59351a944cc763bd9d26d90ce8e8121a8") assert actual_load_status == { "status": "eventful", "snapshot_id": expected_snapshot_id.hex(), @@ -679,11 +678,11 @@ id=expected_snapshot_id, branches={ b"releases/1.1.0/nexter-1.1.0.zip": SnapshotBranch( - target=hash_to_bytes("9478c9981887fdf5ada3f1fcb20c81069cdf4c44"), + target=hash_to_bytes("f7d43faeb65b64d3faa67e4f46559db57d26b9a4"), target_type=TargetType.RELEASE, ), b"releases/1.1.0/nexter-1.1.0.tar.gz": SnapshotBranch( - target=hash_to_bytes("b3391cb4007fb6872c4dfab476a7cfe7443a1bb4"), + target=hash_to_bytes("732bb9dc087e6015884daaebb8b82559be729b5a"), target_type=TargetType.RELEASE, ), }, @@ -734,6 +733,7 @@ url=url, filename="GermlineFilter-1.2.tar.gz", version="1.2", + name="GermlineFilter", directory_extrinsic_metadata=[], raw_info={}, comment_text="", @@ -762,7 +762,10 @@ release = loader.build_release(p_info, str(tmp_path), directory) # without comment_text and version in PKG-INFO, message should be empty - assert release.message == b"" + assert ( + release.message + == b"Synthetic release for PyPI source package GermlineFilter version 1.2\n" + ) def test_filter_out_invalid_sdists(swh_storage, requests_mock): @@ -782,6 +785,7 @@ requests_mock.get( json_url, json={ + "info": {"name": project_name,}, "releases": { version: [ { diff --git a/swh/loader/package/tests/test_loader.py b/swh/loader/package/tests/test_loader.py --- a/swh/loader/package/tests/test_loader.py +++ b/swh/loader/package/tests/test_loader.py @@ -303,7 +303,7 @@ rel1 = Release( name=b"v1.0", - message=b"blah", + message=b"blah\n", author=person, date=date, target=dir1_swhid.object_id, diff --git a/swh/loader/tests/test_cli.py b/swh/loader/tests/test_cli.py --- a/swh/loader/tests/test_cli.py +++ b/swh/loader/tests/test_cli.py @@ -57,10 +57,12 @@ result = runner.invoke(loader_cli, ["run", "-h"]) assert result.exit_code == 0 - usage_prefix = _write_usage( - "loader run", f"[OPTIONS] [{'|'.join(SUPPORTED_LOADERS)}]\n" - ) - assert result.output.startswith(usage_prefix) + + # Syntax depends on dependencies' versions + supported_loaders = "|".join(SUPPORTED_LOADERS) + usage_prefix = _write_usage("loader run", "[OPTIONS] [%s]\n" % supported_loaders) + usage_prefix2 = _write_usage("loader run", "[OPTIONS] {%s}\n" % supported_loaders) + assert result.output.startswith((usage_prefix, usage_prefix2)) def test_run_with_configuration_failure(tmp_path):