Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9749640
D6629.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
43 KB
Subscribers
None
D6629.diff
View Options
diff --git a/docs/package-loader-specifications.rst b/docs/package-loader-specifications.rst
--- a/docs/package-loader-specifications.rst
+++ b/docs/package-loader-specifications.rst
@@ -24,17 +24,16 @@
- passed as arg
- ``release_name(version)``
- =version
- - "swh-loader-package:
- synthetic revision message"
+ - "Synthetic release for archive at {p_info.url}"
- true
- - SWH robot
+ - ""
- passed as arg
-
* - cran
- ``metadata.get("Version", passed as arg)``
- ``release_name(version)``
- =version
- - =version
+ - standard message
- true
- ``metadata.get("Maintainer", "")``
- ``metadata.get("Date")``
@@ -43,7 +42,7 @@
- passed as arg (eg. ``stretch/contrib/0.7.2-3``)
- ``release_name(version)``
- =version
- - "Synthetic revision for Debian source package %s version %s"
+ - standard message (using full version)
- true
- ``metadata.changelog.person``
- ``metadata.changelog.date``
@@ -54,14 +53,14 @@
- HEAD
- "{client}: Deposit {id} in collection {collection}"
- true
- - SWH robot
+ - original author
- ``<codemeta: dateCreated>`` from SWORD XML
- revisions had parents
* - nixguix
- URL
- URL
- URL
- - ""
+ - None
- true
- ""
- None
@@ -70,7 +69,7 @@
- ``metadata["version"]``
- ``release_name(version)``
- =version
- - =version
+ - standard message
- true
- from int metadata or ""
- from ext metadata or None
@@ -79,7 +78,7 @@
- as given by opam
- "{opam_package}.{version}"
- =version
- - =version
+ - standard message
- true
- from metadata
- None
@@ -88,7 +87,7 @@
- ``metadata["version"]``
- ``release_name(version)`` or ``release_name(version, filename)``
- =version
- - "{version}: {metadata['comment_text']}" or just version
+ - ``metadata['comment_text']}`` or standard message
- true
- from int metadata or ""
- from ext metadata or None
@@ -101,6 +100,13 @@
return "releases/%s/%s" % (version, filename)
return "releases/%s" % version
+and "standard message" being::
+
+ msg = (
+ f"Synthetic release for {PACKAGE_MANAGER} source package {name} "
+ f"version {version}"
+ )
+
The ``target_type`` field is always ``dir``, and the target the id of a directory
loaded by unpacking a tarball/zip file/...
diff --git a/swh/loader/package/archive/loader.py b/swh/loader/package/archive/loader.py
--- a/swh/loader/package/archive/loader.py
+++ b/swh/loader/package/archive/loader.py
@@ -14,17 +14,11 @@
import iso8601
from swh.loader.package.loader import BasePackageInfo, PackageLoader, PartialExtID
-from swh.loader.package.utils import release_name
-from swh.model.model import ObjectType, Person, Release, Sha1Git, TimestampWithTimezone
+from swh.loader.package.utils import EMPTY_AUTHOR, release_name
+from swh.model.model import ObjectType, Release, Sha1Git, TimestampWithTimezone
from swh.storage.interface import StorageInterface
logger = logging.getLogger(__name__)
-SWH_PERSON = Person(
- name=b"Software Heritage",
- fullname=b"Software Heritage",
- email=b"robot@softwareheritage.org",
-)
-REVISION_MESSAGE = b"swh-loader-package: synthetic revision message"
@attr.s
@@ -150,11 +144,12 @@
else:
parsed_time = time
normalized_time = TimestampWithTimezone.from_datetime(parsed_time)
+ msg = f"Synthetic release for archive at {p_info.url}"
return Release(
name=p_info.version.encode(),
- message=REVISION_MESSAGE,
+ message=msg.encode(),
date=normalized_time,
- author=SWH_PERSON,
+ author=EMPTY_AUTHOR,
target=directory,
target_type=ObjectType.DIRECTORY,
synthetic=True,
diff --git a/swh/loader/package/archive/tests/test_archive.py b/swh/loader/package/archive/tests/test_archive.py
--- a/swh/loader/package/archive/tests/test_archive.py
+++ b/swh/loader/package/archive/tests/test_archive.py
@@ -14,8 +14,17 @@
from swh.loader.package.archive.loader import ArchiveLoader, ArchivePackageInfo
from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats
-from swh.model.hashutil import hash_to_bytes
-from swh.model.model import Snapshot, SnapshotBranch, TargetType
+from swh.model.hashutil import hash_to_bytes, hash_to_hex
+from swh.model.model import (
+ ObjectType,
+ Person,
+ Release,
+ Snapshot,
+ SnapshotBranch,
+ TargetType,
+ Timestamp,
+ TimestampWithTimezone,
+)
URL = "https://ftp.gnu.org/gnu/8sync/"
GNU_ARTIFACTS = [
@@ -77,7 +86,7 @@
]
_expected_new_releases_first_visit = {
- "c9786c1e3b46f52779c727d3509d66ebf8948d88": (
+ "97c2ada10ca9b7876a8b5b17858b0518309170fd": (
"3aebc29ed1fccc4a6f2f2010fb8e57882406b528"
)
}
@@ -131,12 +140,11 @@
assert actual_load_status["status"] == "eventful"
expected_snapshot_first_visit_id = hash_to_bytes(
- "cdf8f335fa0c81c8ad089870ec14f52b1980eb6c"
+ "af62f6f6d464f9b29f270d1bbefa355af38946c4"
)
- assert (
- hash_to_bytes(actual_load_status["snapshot_id"])
- == expected_snapshot_first_visit_id
+ assert actual_load_status["snapshot_id"] == hash_to_hex(
+ expected_snapshot_first_visit_id
)
assert_last_visit_matches(swh_storage, URL, status="full", type="tar")
@@ -153,6 +161,7 @@
"snapshot": 1,
} == stats
+ release_id = hash_to_bytes(list(_expected_new_releases_first_visit)[0])
expected_snapshot = Snapshot(
id=expected_snapshot_first_visit_id,
branches={
@@ -160,14 +169,30 @@
target_type=TargetType.ALIAS, target=b"releases/0.1.0",
),
b"releases/0.1.0": SnapshotBranch(
- target_type=TargetType.RELEASE,
- target=hash_to_bytes(list(_expected_new_releases_first_visit)[0]),
+ target_type=TargetType.RELEASE, target=release_id,
),
},
)
-
check_snapshot(expected_snapshot, swh_storage)
+ assert swh_storage.release_get([release_id])[0] == Release(
+ id=release_id,
+ name=b"0.1.0",
+ message=(
+ b"Synthetic release for archive at "
+ b"https://ftp.gnu.org/gnu/8sync/8sync-0.1.0.tar.gz"
+ ),
+ target=hash_to_bytes("3aebc29ed1fccc4a6f2f2010fb8e57882406b528"),
+ target_type=ObjectType.DIRECTORY,
+ synthetic=True,
+ author=Person.from_fullname(b""),
+ date=TimestampWithTimezone(
+ timestamp=Timestamp(seconds=944729610, microseconds=0),
+ offset=0,
+ negative_utc=False,
+ ),
+ )
+
expected_contents = map(hash_to_bytes, _expected_new_contents_first_visit)
assert list(swh_storage.content_missing_per_sha1(expected_contents)) == []
diff --git a/swh/loader/package/cran/loader.py b/swh/loader/package/cran/loader.py
--- a/swh/loader/package/cran/loader.py
+++ b/swh/loader/package/cran/loader.py
@@ -30,6 +30,7 @@
@attr.s
class CRANPackageInfo(BasePackageInfo):
raw_info = attr.ib(type=Dict[str, Any])
+ name = attr.ib(type=str)
EXTID_TYPE = "cran-sha256"
MANIFEST_FORMAT = string.Template("$version $url")
@@ -41,6 +42,7 @@
url=url,
filename=path.basename(url),
raw_info=a_metadata,
+ name=a_metadata["package"],
version=a_metadata["version"],
)
@@ -88,9 +90,13 @@
metadata = extract_intrinsic_metadata(uncompressed_path)
date = parse_date(metadata.get("Date"))
author = Person.from_fullname(metadata.get("Maintainer", "").encode())
+ msg = (
+ f"Synthetic release for CRAN source package {p_info.name} "
+ f"version {p_info.version}"
+ )
return Release(
name=p_info.version.encode(),
- message=p_info.version.encode(),
+ message=msg.encode(),
date=date,
author=author,
target_type=ObjectType.DIRECTORY,
diff --git a/swh/loader/package/cran/tests/test_cran.py b/swh/loader/package/cran/tests/test_cran.py
--- a/swh/loader/package/cran/tests/test_cran.py
+++ b/swh/loader/package/cran/tests/test_cran.py
@@ -20,17 +20,27 @@
)
from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats
from swh.model.hashutil import hash_to_bytes
-from swh.model.model import Snapshot, SnapshotBranch, TargetType, TimestampWithTimezone
+from swh.model.model import (
+ ObjectType,
+ Person,
+ Release,
+ Snapshot,
+ SnapshotBranch,
+ TargetType,
+ Timestamp,
+ TimestampWithTimezone,
+)
+
+RELEASE_ID = hash_to_bytes("9a977f6415e6997fd9dd53c6dcb540ff0a7bff26")
SNAPSHOT = Snapshot(
- id=hash_to_bytes("56ed00938d83892bd5b42f2f368ae38a1dbfa718"),
+ id=hash_to_bytes("3787efc620c55b1e18889cfa561d9bcdc62c4cb2"),
branches={
b"HEAD": SnapshotBranch(
target=b"releases/2.22-6", target_type=TargetType.ALIAS
),
b"releases/2.22-6": SnapshotBranch(
- target=hash_to_bytes("42993a72eac50a4a83523c9327a52be3593755a8"),
- target_type=TargetType.RELEASE,
+ target=RELEASE_ID, target_type=TargetType.RELEASE,
),
},
)
@@ -172,7 +182,15 @@
f"{base_url}/src_contrib_1.4.0_Recommended_KernSmooth_{version}.tar.gz" # noqa
)
loader = CRANLoader(
- swh_storage, origin_url, artifacts=[{"url": artifact_url, "version": version,}]
+ swh_storage,
+ origin_url,
+ artifacts=[
+ {
+ "url": artifact_url,
+ "version": version,
+ "package": "Recommended_KernSmooth",
+ }
+ ],
)
actual_load_status = loader.load()
@@ -188,6 +206,28 @@
check_snapshot(SNAPSHOT, swh_storage)
+ assert swh_storage.release_get([RELEASE_ID])[0] == Release(
+ id=RELEASE_ID,
+ name=b"2.22-6",
+ message=(
+ b"Synthetic release for CRAN source package "
+ b"Recommended_KernSmooth version 2.22-6"
+ ),
+ target=hash_to_bytes("ff64177fea3f4a5136b9caf7581a4f7d4cf65296"),
+ target_type=ObjectType.DIRECTORY,
+ synthetic=True,
+ author=Person(
+ fullname=b"Brian Ripley <ripley@stats.ox.ac.uk>",
+ name=b"Brian Ripley",
+ email=b"ripley@stats.ox.ac.uk",
+ ),
+ date=TimestampWithTimezone(
+ timestamp=Timestamp(seconds=991958400, microseconds=0),
+ offset=0,
+ negative_utc=False,
+ ),
+ )
+
visit_stats = get_stats(swh_storage)
assert {
"content": 33,
@@ -218,7 +258,15 @@
f"{base_url}/src_contrib_1.4.0_Recommended_KernSmooth_{version}.tar.gz" # noqa
)
loader = CRANLoader(
- swh_storage, origin_url, artifacts=[{"url": artifact_url, "version": version}]
+ swh_storage,
+ origin_url,
+ artifacts=[
+ {
+ "url": artifact_url,
+ "version": version,
+ "package": "Recommended_KernSmooth",
+ }
+ ],
)
# first visit
@@ -342,7 +390,13 @@
loader = CRANLoader(
swh_storage,
origin_url,
- artifacts=[{"url": artifact_url, "version": version}],
+ artifacts=[
+ {
+ "url": artifact_url,
+ "version": version,
+ "package": "Recommended_KernSmooth",
+ }
+ ],
)
actual_load_status = loader.load()
diff --git a/swh/loader/package/debian/loader.py b/swh/loader/package/debian/loader.py
--- a/swh/loader/package/debian/loader.py
+++ b/swh/loader/package/debian/loader.py
@@ -224,9 +224,9 @@
logger.debug("intrinsic_metadata: %s", intrinsic_metadata)
logger.debug("p_info: %s", p_info)
- msg = "Synthetic revision for Debian source package %s version %s" % (
- p_info.name,
- p_info.full_version,
+ msg = (
+ f"Synthetic release for Debian source package {p_info.name} "
+ f"version {p_info.full_version}"
)
author = prepare_person(intrinsic_metadata.changelog.person)
@@ -235,7 +235,7 @@
# inspired from swh.loader.debian.converters.package_metadata_to_revision
return Release(
name=p_info.version.encode(),
- message=msg.encode("utf-8"),
+ message=msg.encode(),
author=author,
date=date,
target=directory,
diff --git a/swh/loader/package/debian/tests/test_debian.py b/swh/loader/package/debian/tests/test_debian.py
--- a/swh/loader/package/debian/tests/test_debian.py
+++ b/swh/loader/package/debian/tests/test_debian.py
@@ -23,7 +23,16 @@
)
from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats
from swh.model.hashutil import hash_to_bytes
-from swh.model.model import Person, Snapshot, SnapshotBranch, TargetType
+from swh.model.model import (
+ ObjectType,
+ Person,
+ Release,
+ Snapshot,
+ SnapshotBranch,
+ TargetType,
+ Timestamp,
+ TimestampWithTimezone,
+)
logger = logging.getLogger(__name__)
@@ -110,7 +119,7 @@
)
actual_load_status = loader.load()
- expected_snapshot_id = "8bc5d12e2443ab216fdd2f969b25b39e96c20fef"
+ expected_snapshot_id = "20073c91e85b8bcbd2639990e76765d25bd2c0a6"
assert actual_load_status == {
"status": "eventful",
"snapshot_id": expected_snapshot_id,
@@ -124,18 +133,38 @@
snapshot=hash_to_bytes(expected_snapshot_id),
)
+ release_id = hash_to_bytes("ed191d99e070a33458a4a402becd0b4bba09cd1e")
+
expected_snapshot = Snapshot(
id=hash_to_bytes(expected_snapshot_id),
branches={
b"releases/stretch/contrib/0.7.2-3": SnapshotBranch(
- target_type=TargetType.RELEASE,
- target=hash_to_bytes("5a99736512d381700c5f54d7fdd6b46e136535a2"),
+ target_type=TargetType.RELEASE, target=release_id,
)
},
) # different than the previous loader as no release is done
check_snapshot(expected_snapshot, swh_storage)
+ assert swh_storage.release_get([release_id])[0] == Release(
+ id=release_id,
+ name=b"stretch/contrib/0.7.2-3",
+ message=b"Synthetic release for Debian source package cicero version 0.7.2-3",
+ target=hash_to_bytes("798df511408c53bf842a8e54d4d335537836bdc3"),
+ target_type=ObjectType.DIRECTORY,
+ synthetic=True,
+ author=Person(
+ fullname=b"Samuel Thibault <sthibault@debian.org>",
+ name=b"Samuel Thibault",
+ email=b"sthibault@debian.org",
+ ),
+ date=TimestampWithTimezone(
+ timestamp=Timestamp(seconds=1413730355, microseconds=0),
+ offset=120,
+ negative_utc=False,
+ ),
+ )
+
stats = get_stats(swh_storage)
assert {
"content": 42,
@@ -162,7 +191,7 @@
actual_load_status = loader.load()
- expected_snapshot_id = "8bc5d12e2443ab216fdd2f969b25b39e96c20fef"
+ expected_snapshot_id = "20073c91e85b8bcbd2639990e76765d25bd2c0a6"
assert actual_load_status == {
"status": "eventful",
"snapshot_id": expected_snapshot_id,
@@ -181,7 +210,7 @@
branches={
b"releases/stretch/contrib/0.7.2-3": SnapshotBranch(
target_type=TargetType.RELEASE,
- target=hash_to_bytes("5a99736512d381700c5f54d7fdd6b46e136535a2"),
+ target=hash_to_bytes("ed191d99e070a33458a4a402becd0b4bba09cd1e"),
)
},
) # different than the previous loader as no release is done
@@ -418,7 +447,7 @@
)
actual_load_status = loader.load()
- expected_snapshot_id = "3d26243c91eb084c350627a5a102cfe039c5b92a"
+ expected_snapshot_id = "3e423d7889ebd8df0ed0373016f035dfed8541cb"
assert actual_load_status == {
"status": "eventful",
"snapshot_id": expected_snapshot_id,
@@ -437,11 +466,11 @@
branches={
b"releases/stretch/contrib/0.7.2-3": SnapshotBranch(
target_type=TargetType.RELEASE,
- target=hash_to_bytes("5a99736512d381700c5f54d7fdd6b46e136535a2"),
+ target=hash_to_bytes("ed191d99e070a33458a4a402becd0b4bba09cd1e"),
),
b"releases/buster/contrib/0.7.2-4": SnapshotBranch(
target_type=TargetType.RELEASE,
- target=hash_to_bytes("192fc7ccce80f64a0d3cf33d379133af067ec721"),
+ target=hash_to_bytes("d3dff4a416816c36dc284e49c1c9eed52c2d2ef4"),
),
},
)
diff --git a/swh/loader/package/nixguix/loader.py b/swh/loader/package/nixguix/loader.py
--- a/swh/loader/package/nixguix/loader.py
+++ b/swh/loader/package/nixguix/loader.py
@@ -160,7 +160,7 @@
) -> Optional[Release]:
return Release(
name=p_info.version.encode(),
- message=b"",
+ message=None,
author=EMPTY_AUTHOR,
date=None,
target=directory,
diff --git a/swh/loader/package/nixguix/tests/test_nixguix.py b/swh/loader/package/nixguix/tests/test_nixguix.py
--- a/swh/loader/package/nixguix/tests/test_nixguix.py
+++ b/swh/loader/package/nixguix/tests/test_nixguix.py
@@ -28,7 +28,10 @@
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
+ ObjectType,
+ Person,
RawExtrinsicMetadata,
+ Release,
Snapshot,
SnapshotBranch,
TargetType,
@@ -54,14 +57,14 @@
SNAPSHOT1 = Snapshot(
- id=hash_to_bytes("771d13ae4e799755c22d1e05da8fc39cf215de58"),
+ id=hash_to_bytes("efe5145f85af3fc87f34102d8b8481cd5198f4f8"),
branches={
b"evaluation": SnapshotBranch(
target=hash_to_bytes("cc4e04c26672dd74e5fd0fecb78b435fb55368f7"),
target_type=TargetType.REVISION,
),
b"https://github.com/owner-1/repository-1/revision-1.tgz": SnapshotBranch(
- target=hash_to_bytes("24853190589d26d0ea2b6c0330b553ff39176e0c"),
+ target=hash_to_bytes("df7811b9644ed8ef088e2e7add62ed32b0bab15f"),
target_type=TargetType.RELEASE,
),
},
@@ -273,8 +276,39 @@
def test_loader_one_visit(swh_storage, requests_mock_datadir, raw_sources):
loader = NixGuixLoader(swh_storage, sources_url)
- res = loader.load()
- assert res["status"] == "eventful"
+ load_status = loader.load()
+ expected_snapshot_id_hex = "efe5145f85af3fc87f34102d8b8481cd5198f4f8"
+ expected_snapshot_id = hash_to_bytes(expected_snapshot_id_hex)
+ assert load_status == {
+ "status": "eventful",
+ "snapshot_id": expected_snapshot_id_hex,
+ }
+
+ release_id = hash_to_bytes("df7811b9644ed8ef088e2e7add62ed32b0bab15f")
+ expected_snapshot = Snapshot(
+ id=expected_snapshot_id,
+ branches={
+ b"evaluation": SnapshotBranch(
+ target=hash_to_bytes("cc4e04c26672dd74e5fd0fecb78b435fb55368f7"),
+ target_type=TargetType.REVISION,
+ ),
+ b"https://github.com/owner-1/repository-1/revision-1.tgz": SnapshotBranch(
+ target=release_id, target_type=TargetType.RELEASE,
+ ),
+ },
+ )
+ check_snapshot(expected_snapshot, storage=swh_storage)
+
+ assert swh_storage.release_get([release_id])[0] == Release(
+ id=release_id,
+ name=b"https://github.com/owner-1/repository-1/revision-1.tgz",
+ message=None,
+ target=hash_to_bytes("4de2e07d3742718d928e974b8a4c721b9f7b33bf"),
+ target_type=ObjectType.DIRECTORY,
+ synthetic=True,
+ author=Person.from_fullname(b""),
+ date=None,
+ )
stats = get_stats(swh_storage)
assert {
@@ -413,7 +447,7 @@
loader = NixGuixLoader(swh_storage, sources_url)
load_status = loader.load()
- expected_snapshot_id_hex = "c5bba84fd5ac3342566effb86190619092d34e79"
+ expected_snapshot_id_hex = "c1983a0a3f647548e1fb92f30339da6848fe9f7a"
expected_snapshot_id = hash_to_bytes(expected_snapshot_id_hex)
assert load_status == {
"status": "eventful",
@@ -439,11 +473,11 @@
target_type=TargetType.REVISION,
),
b"https://github.com/owner-1/repository-1/revision-1.tgz": SnapshotBranch(
- target=hash_to_bytes("24853190589d26d0ea2b6c0330b553ff39176e0c"),
+ target=hash_to_bytes("df7811b9644ed8ef088e2e7add62ed32b0bab15f"),
target_type=TargetType.RELEASE,
),
b"https://github.com/owner-2/repository-1/revision-1.tgz": SnapshotBranch(
- target=hash_to_bytes("3d44fbe814ba802cfd77f83975e45766d3a2ba85"),
+ target=hash_to_bytes("5cc0115cd643902b837cb6cfbc9f5865bc5a7cb2"),
target_type=TargetType.RELEASE,
),
},
@@ -573,7 +607,7 @@
]
archive_loader = ArchiveLoader(swh_storage, url=gnu_url, artifacts=gnu_artifacts)
actual_load_status = archive_loader.load()
- expected_snapshot_id = "cdf8f335fa0c81c8ad089870ec14f52b1980eb6c"
+ expected_snapshot_id = "af62f6f6d464f9b29f270d1bbefa355af38946c4"
assert actual_load_status["status"] == "eventful"
assert actual_load_status["snapshot_id"] == expected_snapshot_id # noqa
diff --git a/swh/loader/package/npm/loader.py b/swh/loader/package/npm/loader.py
--- a/swh/loader/package/npm/loader.py
+++ b/swh/loader/package/npm/loader.py
@@ -106,8 +106,8 @@
str: origin url (e.g. https://www.npmjs.com/package/<package-name>)
"""
super().__init__(storage=storage, url=url, max_content_size=max_content_size)
- package_name = url.split("https://www.npmjs.com/package/")[1]
- safe_name = quote(package_name, safe="")
+ self.package_name = url.split("https://www.npmjs.com/package/")[1]
+ safe_name = quote(self.package_name, safe="")
self.provider_url = f"https://replicate.npmjs.com/{safe_name}/"
self._info: Dict[str, Any] = {}
self._versions = None
@@ -147,7 +147,10 @@
if not i_metadata:
return None
author = extract_npm_package_author(i_metadata)
- message = i_metadata["version"].encode("ascii")
+ msg = (
+ f"Synthetic release for NPM source package {self.package_name} "
+ f"version {p_info.version}"
+ )
if p_info.date is None:
url = p_info.url
@@ -164,7 +167,7 @@
r = Release(
name=p_info.version.encode(),
- message=message,
+ message=msg.encode(),
author=author,
date=date,
target=directory,
diff --git a/swh/loader/package/npm/tests/test_npm.py b/swh/loader/package/npm/tests/test_npm.py
--- a/swh/loader/package/npm/tests/test_npm.py
+++ b/swh/loader/package/npm/tests/test_npm.py
@@ -19,9 +19,12 @@
from swh.model.model import (
Person,
RawExtrinsicMetadata,
+ Release,
Snapshot,
SnapshotBranch,
TargetType,
+ Timestamp,
+ TimestampWithTimezone,
)
from swh.model.model import MetadataAuthority, MetadataAuthorityType, MetadataFetcher
from swh.model.model import ObjectType as ModelObjectType
@@ -280,13 +283,13 @@
_expected_new_releases_first_visit = normalize_hashes(
{
- "d25e722a32c145b3eb88b416049dd35d27759a87": (
+ "adcc40ee87a3ebb1b5a82edd692cf52aa5099cee": (
"42753c0c2ab00c4501b552ac4671c68f3cf5aece"
),
- "3522e846b97c0b8434c565fe891c0f082a357e5d": (
+ "c781147df0e4963a0f9859134abd28296b702233": (
"3370d20d6f96dc1c9e50f083e2134881db110f4f"
),
- "54f6c1711c6aedb6de3cf2d6347b9f772e343784": (
+ "f544812dac98e7589155be7dfaef64477a408ec0": (
"d7895533ef5edbcffdea3f057d9fef3a1ef845ce"
),
}
@@ -307,7 +310,7 @@
loader = NpmLoader(swh_storage, url)
actual_load_status = loader.load()
- expected_snapshot_id = hash_to_bytes("ddaad89b0b4edb7eefe7c92e9b1166caa776ebbc")
+ expected_snapshot_id = hash_to_bytes("d24e3f10492ade1e9462ec701370fef4a79a40f1")
assert actual_load_status == {
"status": "eventful",
"snapshot_id": expected_snapshot_id.hex(),
@@ -318,9 +321,9 @@
)
versions = [
- ("0.0.2", "d25e722a32c145b3eb88b416049dd35d27759a87"),
- ("0.0.3", "3522e846b97c0b8434c565fe891c0f082a357e5d"),
- ("0.0.4", "54f6c1711c6aedb6de3cf2d6347b9f772e343784"),
+ ("0.0.2", "adcc40ee87a3ebb1b5a82edd692cf52aa5099cee"),
+ ("0.0.3", "c781147df0e4963a0f9859134abd28296b702233"),
+ ("0.0.4", "f544812dac98e7589155be7dfaef64477a408ec0"),
]
expected_snapshot = Snapshot(
@@ -340,6 +343,27 @@
)
check_snapshot(expected_snapshot, swh_storage)
+ assert swh_storage.release_get(
+ [hash_to_bytes("adcc40ee87a3ebb1b5a82edd692cf52aa5099cee")]
+ )[0] == Release(
+ name=b"0.0.2",
+ message=b"Synthetic release for NPM source package org version 0.0.2",
+ target=hash_to_bytes("42753c0c2ab00c4501b552ac4671c68f3cf5aece"),
+ target_type=ModelObjectType.DIRECTORY,
+ synthetic=True,
+ author=Person(
+ fullname=b"mooz <stillpedant@gmail.com>",
+ name=b"mooz",
+ email=b"stillpedant@gmail.com",
+ ),
+ date=TimestampWithTimezone(
+ timestamp=Timestamp(seconds=1388590833, microseconds=0),
+ offset=0,
+ negative_utc=False,
+ ),
+ id=hash_to_bytes("adcc40ee87a3ebb1b5a82edd692cf52aa5099cee"),
+ )
+
contents = swh_storage.content_get(_expected_new_contents_first_visit)
count = sum(0 if content is None else 1 for content in contents)
assert count == len(_expected_new_contents_first_visit)
@@ -403,7 +427,7 @@
url = package_url(package)
loader = NpmLoader(swh_storage, url)
- expected_snapshot_id = hash_to_bytes("ddaad89b0b4edb7eefe7c92e9b1166caa776ebbc")
+ expected_snapshot_id = hash_to_bytes("d24e3f10492ade1e9462ec701370fef4a79a40f1")
actual_load_status = loader.load()
assert actual_load_status == {
"status": "eventful",
@@ -466,7 +490,7 @@
loader = NpmLoader(swh_storage, url)
actual_load_status = loader.load()
- expected_snapshot_id = hash_to_bytes("7a89bc3cb51ff1d3213b2151c745d82c3b9d69b1")
+ expected_snapshot_id = hash_to_bytes("92ff37da8045f0088ed35bce0bc34e2025202825")
assert actual_load_status == {
"status": "eventful",
"snapshot_id": expected_snapshot_id.hex(),
@@ -483,11 +507,11 @@
),
b"releases/0.1.0": SnapshotBranch(
target_type=TargetType.RELEASE,
- target=hash_to_bytes("103fa6d0a1abb405468e3590dcf634bcb77f67be"),
+ target=hash_to_bytes("c5e0f0e185660b6bdd694ca5c68babe5bab20e24"),
),
b"releases/0.1.1-alpha.14": SnapshotBranch(
target_type=TargetType.RELEASE,
- target=hash_to_bytes("c00b54143582a4e963e0b86e8dfa58eedd260020"),
+ target=hash_to_bytes("2f89c709eacc974b587e13f90d10a826b23a550e"),
),
},
)
@@ -566,7 +590,7 @@
loader = NpmLoader(swh_storage, url)
actual_load_status = loader.load()
- expected_snapshot_id = hash_to_bytes("7f5e591dd3c4754abca4db1cc18355671e2c014c")
+ expected_snapshot_id = hash_to_bytes("2a7a67725f9c7134f56612281e8d1638f1386118")
assert actual_load_status == {
"status": "eventful",
@@ -582,7 +606,7 @@
),
b"releases/0.0.1": SnapshotBranch(
target_type=TargetType.RELEASE,
- target=hash_to_bytes("199bf0ad020617357d608655e6549e526a65dc36"),
+ target=hash_to_bytes("68b2a100103cecec06b8dd780228bb751f2dc6f3"),
),
},
)
diff --git a/swh/loader/package/opam/loader.py b/swh/loader/package/opam/loader.py
--- a/swh/loader/package/opam/loader.py
+++ b/swh/loader/package/opam/loader.py
@@ -244,10 +244,14 @@
self, p_info: OpamPackageInfo, uncompressed_path: str, directory: Sha1Git,
) -> Optional[Release]:
+ msg = (
+ f"Synthetic release for OPAM source package {self.opam_package} "
+ f"version {p_info.version}"
+ )
return Release(
name=p_info.version.encode(),
author=p_info.author,
- message=str.encode(p_info.version),
+ message=msg.encode(),
date=None,
target=directory,
target_type=ObjectType.DIRECTORY,
diff --git a/swh/loader/package/opam/tests/test_opam.py b/swh/loader/package/opam/tests/test_opam.py
--- a/swh/loader/package/opam/tests/test_opam.py
+++ b/swh/loader/package/opam/tests/test_opam.py
@@ -9,15 +9,15 @@
from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats
from swh.model.hashutil import hash_to_bytes
from swh.model.model import (
- MetadataAuthority,
- MetadataAuthorityType,
- MetadataFetcher,
Person,
RawExtrinsicMetadata,
+ Release,
Snapshot,
SnapshotBranch,
TargetType,
)
+from swh.model.model import MetadataAuthority, MetadataAuthorityType, MetadataFetcher
+from swh.model.model import ObjectType as ModelObjectType
from swh.model.swhids import CoreSWHID, ExtendedObjectType, ExtendedSWHID, ObjectType
from swh.storage.interface import PagedResult
@@ -110,23 +110,37 @@
actual_load_status = loader.load()
- expected_snapshot_id = hash_to_bytes("50b5961c27dd4f8b138acce8bac4f90d1e33081f")
+ expected_snapshot_id = hash_to_bytes("e480958fa7851268be2bcc8d01145c0c9624b34b")
assert actual_load_status == {
"status": "eventful",
"snapshot_id": expected_snapshot_id.hex(),
}
+ release_id = hash_to_bytes("03db7f0d572509f1c7ce18c847db83070e26fd5e")
+
expected_snapshot = Snapshot(
id=expected_snapshot_id,
branches={
b"HEAD": SnapshotBranch(target=b"agrid.0.1", target_type=TargetType.ALIAS,),
b"agrid.0.1": SnapshotBranch(
- target=hash_to_bytes("efcb9ef9d0f2a85312463251732b42f9e45a5c12"),
- target_type=TargetType.RELEASE,
+ target=release_id, target_type=TargetType.RELEASE,
),
},
)
+ assert swh_storage.release_get([release_id])[0] == Release(
+ name=b"0.1",
+ message=b"Synthetic release for OPAM source package agrid version 0.1",
+ target=hash_to_bytes("00412ee5bc601deb462e55addd1004715116785e"),
+ target_type=ModelObjectType.DIRECTORY,
+ synthetic=True,
+ author=Person(
+ fullname=b"OCamlPro <contact@ocamlpro.com>", name=None, email=None
+ ),
+ date=None,
+ id=release_id,
+ )
+
assert_last_visit_matches(
swh_storage, url, status="full", type="opam", snapshot=expected_snapshot_id
)
@@ -167,7 +181,7 @@
actual_load_status = loader.load()
- expected_snapshot_id = hash_to_bytes("f0a974e47999e74d323f1fb9604fde72527bda28")
+ expected_snapshot_id = hash_to_bytes("1a70631bee44c86dded71e0a091b1c91c110f812")
assert actual_load_status == {
"status": "eventful",
"snapshot_id": expected_snapshot_id.hex(),
@@ -180,15 +194,15 @@
target=b"directories.0.3", target_type=TargetType.ALIAS,
),
b"directories.0.1": SnapshotBranch(
- target=hash_to_bytes("1f839cb1f4720d6b33fdd856e3ff1119497979d9"),
+ target=hash_to_bytes("013d53d7e1aedbe03aaa3d5c0e6d1d780ef2634d"),
target_type=TargetType.RELEASE,
),
b"directories.0.2": SnapshotBranch(
- target=hash_to_bytes("4133834d966381804347efbc41e35dd2bdd48962"),
+ target=hash_to_bytes("4fdcc3606c0af33cb4d733b70074e79f03e928a1"),
target_type=TargetType.RELEASE,
),
b"directories.0.3": SnapshotBranch(
- target=hash_to_bytes("2f20cabfbacfe447b80dc2a4eb14d461775100c8"),
+ target=hash_to_bytes("5de72a60f81649157d267773c30e897b7005dcdb"),
target_type=TargetType.RELEASE,
),
},
@@ -222,7 +236,7 @@
actual_load_status = loader.load()
- expected_snapshot_id = hash_to_bytes("987425c6fe94d3972c4c4e97ee27a6a7c8b68e82")
+ expected_snapshot_id = hash_to_bytes("96246035587354a71f429d5b9b8dcc98afad3708")
assert actual_load_status == {
"status": "eventful",
"snapshot_id": expected_snapshot_id.hex(),
@@ -251,7 +265,7 @@
assert branch_name == expected_branch_name
assert package_info == expected_package_info
- release_id = hash_to_bytes("8d0612cdf172e5dff3d876ca2bbc0f6003cc36cc")
+ release_id = hash_to_bytes("4904ad9d0f3b3f84cec2b899d0d05c682b0efdcb")
expected_snapshot = Snapshot(
id=hash_to_bytes(actual_load_status["snapshot_id"]),
@@ -298,7 +312,7 @@
assert actual_load_status["status"] == "eventful"
- expected_release_id = hash_to_bytes("8d0612cdf172e5dff3d876ca2bbc0f6003cc36cc")
+ expected_release_id = hash_to_bytes("4904ad9d0f3b3f84cec2b899d0d05c682b0efdcb")
expected_snapshot = Snapshot(
id=hash_to_bytes(actual_load_status["snapshot_id"]),
diff --git a/swh/loader/package/pypi/loader.py b/swh/loader/package/pypi/loader.py
--- a/swh/loader/package/pypi/loader.py
+++ b/swh/loader/package/pypi/loader.py
@@ -41,17 +41,21 @@
class PyPIPackageInfo(BasePackageInfo):
raw_info = attr.ib(type=Dict[str, Any])
+ name = attr.ib(type=str)
comment_text = attr.ib(type=Optional[str])
sha256 = attr.ib(type=str)
upload_time = attr.ib(type=str)
@classmethod
- def from_metadata(cls, metadata: Dict[str, Any], version: str) -> "PyPIPackageInfo":
+ def from_metadata(
+ cls, metadata: Dict[str, Any], name: str, version: str
+ ) -> "PyPIPackageInfo":
return cls(
url=metadata["url"],
filename=metadata["filename"],
version=version,
raw_info=metadata,
+ name=name,
comment_text=metadata.get("comment_text"),
sha256=metadata["digests"]["sha256"],
upload_time=metadata["upload_time"],
@@ -116,7 +120,9 @@
):
continue
- p_info = PyPIPackageInfo.from_metadata(meta, version=version)
+ p_info = PyPIPackageInfo.from_metadata(
+ meta, name=self.info()["info"]["name"], version=version
+ )
res.append((version, p_info))
if len(res) == 1:
@@ -134,17 +140,22 @@
return None
# from intrinsic metadata
- version_ = i_metadata.get("version", "")
+ version_ = i_metadata.get("version", p_info.version)
author_ = author(i_metadata)
- # from extrinsic metadata
- message = p_info.comment_text or ""
- message = "%s: %s" % (version_, message) if message else version_
+ if p_info.comment_text:
+ msg = p_info.comment_text
+ else:
+ msg = (
+ f"Synthetic release for PyPI source package {p_info.name} "
+ f"version {version_}"
+ )
+
date = TimestampWithTimezone.from_iso8601(p_info.upload_time)
return Release(
name=p_info.version.encode(),
- message=message.encode(),
+ message=msg.encode(),
author=author_,
date=date,
target=directory,
diff --git a/swh/loader/package/pypi/tests/test_pypi.py b/swh/loader/package/pypi/tests/test_pypi.py
--- a/swh/loader/package/pypi/tests/test_pypi.py
+++ b/swh/loader/package/pypi/tests/test_pypi.py
@@ -329,7 +329,7 @@
assert actual_load_status["status"] == "eventful"
assert actual_load_status["snapshot_id"] is not None
- expected_release_id = hash_to_bytes("a1e10745d375be66c1b65e55c0c15fe98776b53c")
+ expected_release_id = hash_to_bytes("e05d81600f3db1f905d23ab2a06ea64460c7e3f4")
expected_snapshot = Snapshot(
id=hash_to_bytes(actual_load_status["snapshot_id"]),
@@ -338,12 +338,11 @@
target=b"releases/1.2.0", target_type=TargetType.ALIAS,
),
b"releases/1.1.0": SnapshotBranch(
- target=hash_to_bytes("9478c9981887fdf5ada3f1fcb20c81069cdf4c44"),
+ target=hash_to_bytes("ed4132a0160d97752a6ce5716722fb937a2e00b1"),
target_type=TargetType.RELEASE,
),
b"releases/1.2.0": SnapshotBranch(
- target=hash_to_bytes("a1e10745d375be66c1b65e55c0c15fe98776b53c"),
- target_type=TargetType.RELEASE,
+ target=expected_release_id, target_type=TargetType.RELEASE,
),
},
)
@@ -397,7 +396,7 @@
loader = PyPILoader(swh_storage, url)
actual_load_status = loader.load()
- expected_snapshot_id = hash_to_bytes("eee24d5b0c156ebb4ece0c810c9dce636ebe881f")
+ expected_snapshot_id = hash_to_bytes("1838a3d6fff760338ab14b95c43c2dabcbb03c5a")
assert actual_load_status == {
"status": "eventful",
"snapshot_id": expected_snapshot_id.hex(),
@@ -411,7 +410,7 @@
id=hash_to_bytes(expected_snapshot_id),
branches={
b"releases/1.2.0": SnapshotBranch(
- target=hash_to_bytes("a1e10745d375be66c1b65e55c0c15fe98776b53c"),
+ target=hash_to_bytes("e05d81600f3db1f905d23ab2a06ea64460c7e3f4"),
target_type=TargetType.RELEASE,
),
b"HEAD": SnapshotBranch(
@@ -443,7 +442,7 @@
loader = PyPILoader(swh_storage, url)
actual_load_status = loader.load()
- expected_snapshot_id = hash_to_bytes("62d957f2b5cdc515bea0a46252a3ab29ee271636")
+ expected_snapshot_id = hash_to_bytes("7e34abda294fb80e6d4e64637ae43fed112079ca")
assert actual_load_status == {
"status": "eventful",
"snapshot_id": expected_snapshot_id.hex(),
@@ -457,11 +456,11 @@
id=expected_snapshot_id,
branches={
b"releases/1.1.0": SnapshotBranch(
- target=hash_to_bytes("9478c9981887fdf5ada3f1fcb20c81069cdf4c44"),
+ target=hash_to_bytes("ed4132a0160d97752a6ce5716722fb937a2e00b1"),
target_type=TargetType.RELEASE,
),
b"releases/1.2.0": SnapshotBranch(
- target=hash_to_bytes("a1e10745d375be66c1b65e55c0c15fe98776b53c"),
+ target=hash_to_bytes("e05d81600f3db1f905d23ab2a06ea64460c7e3f4"),
target_type=TargetType.RELEASE,
),
b"HEAD": SnapshotBranch(
@@ -492,7 +491,7 @@
loader = PyPILoader(swh_storage, url)
actual_load_status = loader.load()
- snapshot_id = hash_to_bytes("62d957f2b5cdc515bea0a46252a3ab29ee271636")
+ snapshot_id = hash_to_bytes("7e34abda294fb80e6d4e64637ae43fed112079ca")
assert actual_load_status == {
"status": "eventful",
"snapshot_id": snapshot_id.hex(),
@@ -505,11 +504,11 @@
id=snapshot_id,
branches={
b"releases/1.1.0": SnapshotBranch(
- target=hash_to_bytes("9478c9981887fdf5ada3f1fcb20c81069cdf4c44"),
+ target=hash_to_bytes("ed4132a0160d97752a6ce5716722fb937a2e00b1"),
target_type=TargetType.RELEASE,
),
b"releases/1.2.0": SnapshotBranch(
- target=hash_to_bytes("a1e10745d375be66c1b65e55c0c15fe98776b53c"),
+ target=hash_to_bytes("e05d81600f3db1f905d23ab2a06ea64460c7e3f4"),
target_type=TargetType.RELEASE,
),
b"HEAD": SnapshotBranch(
@@ -560,7 +559,7 @@
visit1_actual_load_status = loader.load()
visit1_stats = get_stats(swh_storage)
- expected_snapshot_id = hash_to_bytes("62d957f2b5cdc515bea0a46252a3ab29ee271636")
+ expected_snapshot_id = hash_to_bytes("7e34abda294fb80e6d4e64637ae43fed112079ca")
assert visit1_actual_load_status == {
"status": "eventful",
"snapshot_id": expected_snapshot_id.hex(),
@@ -589,7 +588,7 @@
visit2_stats = get_stats(swh_storage)
assert visit2_actual_load_status["status"] == "eventful", visit2_actual_load_status
- expected_snapshot_id2 = hash_to_bytes("6a8a84e7f765bed4362315fb054adb2466598636")
+ expected_snapshot_id2 = hash_to_bytes("6636693213eab9000b8eee1c5bbb3f1b675a4c70")
assert visit2_actual_load_status == {
"status": "eventful",
"snapshot_id": expected_snapshot_id2.hex(),
@@ -603,15 +602,15 @@
id=expected_snapshot_id2,
branches={
b"releases/1.1.0": SnapshotBranch(
- target=hash_to_bytes("9478c9981887fdf5ada3f1fcb20c81069cdf4c44"),
+ target=hash_to_bytes("ed4132a0160d97752a6ce5716722fb937a2e00b1"),
target_type=TargetType.RELEASE,
),
b"releases/1.2.0": SnapshotBranch(
- target=hash_to_bytes("a1e10745d375be66c1b65e55c0c15fe98776b53c"),
+ target=hash_to_bytes("e05d81600f3db1f905d23ab2a06ea64460c7e3f4"),
target_type=TargetType.RELEASE,
),
b"releases/1.3.0": SnapshotBranch(
- target=hash_to_bytes("d46442e99bb6e05df5f75a7f0f7f61a4f2098147"),
+ target=hash_to_bytes("a21b09cbec8e31f47307f196bb1f939effc26e11"),
target_type=TargetType.RELEASE,
),
b"HEAD": SnapshotBranch(
@@ -665,7 +664,7 @@
loader = PyPILoader(swh_storage, url)
actual_load_status = loader.load()
- expected_snapshot_id = hash_to_bytes("a136ee226316276c347d7be3da07df5828605927")
+ expected_snapshot_id = hash_to_bytes("a65c03a837b24720fa95622de07074e279eddd0d")
assert actual_load_status == {
"status": "eventful",
"snapshot_id": expected_snapshot_id.hex(),
@@ -679,11 +678,11 @@
id=expected_snapshot_id,
branches={
b"releases/1.1.0/nexter-1.1.0.zip": SnapshotBranch(
- target=hash_to_bytes("9478c9981887fdf5ada3f1fcb20c81069cdf4c44"),
+ target=hash_to_bytes("18d0087b1e1a3a31070d54bf3e9edbd44ab01cb5"),
target_type=TargetType.RELEASE,
),
b"releases/1.1.0/nexter-1.1.0.tar.gz": SnapshotBranch(
- target=hash_to_bytes("b3391cb4007fb6872c4dfab476a7cfe7443a1bb4"),
+ target=hash_to_bytes("b2b379b3eb61adcde22e10788b1fc5f985e938d2"),
target_type=TargetType.RELEASE,
),
},
@@ -734,6 +733,7 @@
url=url,
filename="GermlineFilter-1.2.tar.gz",
version="1.2",
+ name="GermlineFilter",
directory_extrinsic_metadata=[],
raw_info={},
comment_text="",
@@ -762,7 +762,10 @@
release = loader.build_release(p_info, str(tmp_path), directory)
# without comment_text and version in PKG-INFO, message should be empty
- assert release.message == b""
+ assert (
+ release.message
+ == b"Synthetic release for PyPI source package GermlineFilter version 1.2"
+ )
def test_filter_out_invalid_sdists(swh_storage, requests_mock):
@@ -782,6 +785,7 @@
requests_mock.get(
json_url,
json={
+ "info": {"name": project_name,},
"releases": {
version: [
{
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sun, Aug 24, 6:04 PM (1 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3220789
Attached To
D6629: Package loader: Uniformize author and message
Event Timeline
Log In to Comment