Page MenuHomeSoftware Heritage

D6629.diff
No OneTemporary

D6629.diff

diff --git a/docs/package-loader-specifications.rst b/docs/package-loader-specifications.rst
--- a/docs/package-loader-specifications.rst
+++ b/docs/package-loader-specifications.rst
@@ -24,17 +24,16 @@
- passed as arg
- ``release_name(​version)``
- =version
- - "swh-loader-package:
- synthetic revision message"
+ - "Synthetic release for archive at {p_info.url}"
- true
- - SWH robot
+ - ""
- passed as arg
-
* - cran
- ``metadata.get(​"Version", passed as arg)``
- ``release_name(​version)``
- =version
- - =version
+ - standard message
- true
- ``metadata.get(​"Maintainer", "")``
- ``metadata.get(​"Date")``
@@ -43,7 +42,7 @@
- passed as arg (eg. ``stretch/contrib/0.7.2-3``)
- ``release_name(​version)``
- =version
- - "Synthetic revision for Debian source package %s version %s"
+ - standard message (using full version)
- true
- ``metadata​.changelog​.person``
- ``metadata​.changelog​.date``
@@ -54,14 +53,14 @@
- HEAD
- "{client}: Deposit {id} in collection {collection}"
- true
- - SWH robot
+ - original author
- ``<codemeta: dateCreated>`` from SWORD XML
- revisions had parents
* - nixguix
- URL
- URL
- URL
- - ""
+ - None
- true
- ""
- None
@@ -70,7 +69,7 @@
- ``metadata​["version"]``
- ``release_name(​version)``
- =version
- - =version
+ - standard message
- true
- from int metadata or ""
- from ext metadata or None
@@ -79,7 +78,7 @@
- as given by opam
- "{opam_package}​.{version}"
- =version
- - =version
+ - standard message
- true
- from metadata
- None
@@ -88,7 +87,7 @@
- ``metadata​["version"]``
- ``release_name(​version)`` or ``release_name(​version, filename)``
- =version
- - "{version}: {metadata[​'comment_text']}" or just version
+ - ``metadata[​'comment_text']}`` or standard message
- true
- from int metadata or ""
- from ext metadata or None
@@ -101,6 +100,13 @@
return "releases/%s/%s" % (version, filename)
return "releases/%s" % version
+and "standard message" being::
+
+ msg = (
+ f"Synthetic release for {PACKAGE_MANAGER} source package {name} "
+ f"version {version}"
+ )
+
The ``target_type`` field is always ``dir``, and the target the id of a directory
loaded by unpacking a tarball/zip file/...
diff --git a/swh/loader/package/archive/loader.py b/swh/loader/package/archive/loader.py
--- a/swh/loader/package/archive/loader.py
+++ b/swh/loader/package/archive/loader.py
@@ -14,17 +14,11 @@
import iso8601
from swh.loader.package.loader import BasePackageInfo, PackageLoader, PartialExtID
-from swh.loader.package.utils import release_name
-from swh.model.model import ObjectType, Person, Release, Sha1Git, TimestampWithTimezone
+from swh.loader.package.utils import EMPTY_AUTHOR, release_name
+from swh.model.model import ObjectType, Release, Sha1Git, TimestampWithTimezone
from swh.storage.interface import StorageInterface
logger = logging.getLogger(__name__)
-SWH_PERSON = Person(
- name=b"Software Heritage",
- fullname=b"Software Heritage",
- email=b"robot@softwareheritage.org",
-)
-REVISION_MESSAGE = b"swh-loader-package: synthetic revision message"
@attr.s
@@ -150,11 +144,12 @@
else:
parsed_time = time
normalized_time = TimestampWithTimezone.from_datetime(parsed_time)
+ msg = f"Synthetic release for archive at {p_info.url}"
return Release(
name=p_info.version.encode(),
- message=REVISION_MESSAGE,
+ message=msg.encode(),
date=normalized_time,
- author=SWH_PERSON,
+ author=EMPTY_AUTHOR,
target=directory,
target_type=ObjectType.DIRECTORY,
synthetic=True,
diff --git a/swh/loader/package/archive/tests/test_archive.py b/swh/loader/package/archive/tests/test_archive.py
--- a/swh/loader/package/archive/tests/test_archive.py
+++ b/swh/loader/package/archive/tests/test_archive.py
@@ -14,8 +14,17 @@
from swh.loader.package.archive.loader import ArchiveLoader, ArchivePackageInfo
from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats
-from swh.model.hashutil import hash_to_bytes
-from swh.model.model import Snapshot, SnapshotBranch, TargetType
+from swh.model.hashutil import hash_to_bytes, hash_to_hex
+from swh.model.model import (
+ ObjectType,
+ Person,
+ Release,
+ Snapshot,
+ SnapshotBranch,
+ TargetType,
+ Timestamp,
+ TimestampWithTimezone,
+)
URL = "https://ftp.gnu.org/gnu/8sync/"
GNU_ARTIFACTS = [
@@ -77,7 +86,7 @@
]
_expected_new_releases_first_visit = {
- "c9786c1e3b46f52779c727d3509d66ebf8948d88": (
+ "97c2ada10ca9b7876a8b5b17858b0518309170fd": (
"3aebc29ed1fccc4a6f2f2010fb8e57882406b528"
)
}
@@ -131,12 +140,11 @@
assert actual_load_status["status"] == "eventful"
expected_snapshot_first_visit_id = hash_to_bytes(
- "cdf8f335fa0c81c8ad089870ec14f52b1980eb6c"
+ "af62f6f6d464f9b29f270d1bbefa355af38946c4"
)
- assert (
- hash_to_bytes(actual_load_status["snapshot_id"])
- == expected_snapshot_first_visit_id
+ assert actual_load_status["snapshot_id"] == hash_to_hex(
+ expected_snapshot_first_visit_id
)
assert_last_visit_matches(swh_storage, URL, status="full", type="tar")
@@ -153,6 +161,7 @@
"snapshot": 1,
} == stats
+ release_id = hash_to_bytes(list(_expected_new_releases_first_visit)[0])
expected_snapshot = Snapshot(
id=expected_snapshot_first_visit_id,
branches={
@@ -160,14 +169,30 @@
target_type=TargetType.ALIAS, target=b"releases/0.1.0",
),
b"releases/0.1.0": SnapshotBranch(
- target_type=TargetType.RELEASE,
- target=hash_to_bytes(list(_expected_new_releases_first_visit)[0]),
+ target_type=TargetType.RELEASE, target=release_id,
),
},
)
-
check_snapshot(expected_snapshot, swh_storage)
+ assert swh_storage.release_get([release_id])[0] == Release(
+ id=release_id,
+ name=b"0.1.0",
+ message=(
+ b"Synthetic release for archive at "
+ b"https://ftp.gnu.org/gnu/8sync/8sync-0.1.0.tar.gz"
+ ),
+ target=hash_to_bytes("3aebc29ed1fccc4a6f2f2010fb8e57882406b528"),
+ target_type=ObjectType.DIRECTORY,
+ synthetic=True,
+ author=Person.from_fullname(b""),
+ date=TimestampWithTimezone(
+ timestamp=Timestamp(seconds=944729610, microseconds=0),
+ offset=0,
+ negative_utc=False,
+ ),
+ )
+
expected_contents = map(hash_to_bytes, _expected_new_contents_first_visit)
assert list(swh_storage.content_missing_per_sha1(expected_contents)) == []
diff --git a/swh/loader/package/cran/loader.py b/swh/loader/package/cran/loader.py
--- a/swh/loader/package/cran/loader.py
+++ b/swh/loader/package/cran/loader.py
@@ -30,6 +30,7 @@
@attr.s
class CRANPackageInfo(BasePackageInfo):
raw_info = attr.ib(type=Dict[str, Any])
+ name = attr.ib(type=str)
EXTID_TYPE = "cran-sha256"
MANIFEST_FORMAT = string.Template("$version $url")
@@ -41,6 +42,7 @@
url=url,
filename=path.basename(url),
raw_info=a_metadata,
+ name=a_metadata["package"],
version=a_metadata["version"],
)
@@ -88,9 +90,13 @@
metadata = extract_intrinsic_metadata(uncompressed_path)
date = parse_date(metadata.get("Date"))
author = Person.from_fullname(metadata.get("Maintainer", "").encode())
+ msg = (
+ f"Synthetic release for CRAN source package {p_info.name} "
+ f"version {p_info.version}"
+ )
return Release(
name=p_info.version.encode(),
- message=p_info.version.encode(),
+ message=msg.encode(),
date=date,
author=author,
target_type=ObjectType.DIRECTORY,
diff --git a/swh/loader/package/cran/tests/test_cran.py b/swh/loader/package/cran/tests/test_cran.py
--- a/swh/loader/package/cran/tests/test_cran.py
+++ b/swh/loader/package/cran/tests/test_cran.py
@@ -20,17 +20,27 @@
)
from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats
from swh.model.hashutil import hash_to_bytes
-from swh.model.model import Snapshot, SnapshotBranch, TargetType, TimestampWithTimezone
+from swh.model.model import (
+ ObjectType,
+ Person,
+ Release,
+ Snapshot,
+ SnapshotBranch,
+ TargetType,
+ Timestamp,
+ TimestampWithTimezone,
+)
+
+RELEASE_ID = hash_to_bytes("9a977f6415e6997fd9dd53c6dcb540ff0a7bff26")
SNAPSHOT = Snapshot(
- id=hash_to_bytes("56ed00938d83892bd5b42f2f368ae38a1dbfa718"),
+ id=hash_to_bytes("3787efc620c55b1e18889cfa561d9bcdc62c4cb2"),
branches={
b"HEAD": SnapshotBranch(
target=b"releases/2.22-6", target_type=TargetType.ALIAS
),
b"releases/2.22-6": SnapshotBranch(
- target=hash_to_bytes("42993a72eac50a4a83523c9327a52be3593755a8"),
- target_type=TargetType.RELEASE,
+ target=RELEASE_ID, target_type=TargetType.RELEASE,
),
},
)
@@ -172,7 +182,15 @@
f"{base_url}/src_contrib_1.4.0_Recommended_KernSmooth_{version}.tar.gz" # noqa
)
loader = CRANLoader(
- swh_storage, origin_url, artifacts=[{"url": artifact_url, "version": version,}]
+ swh_storage,
+ origin_url,
+ artifacts=[
+ {
+ "url": artifact_url,
+ "version": version,
+ "package": "Recommended_KernSmooth",
+ }
+ ],
)
actual_load_status = loader.load()
@@ -188,6 +206,28 @@
check_snapshot(SNAPSHOT, swh_storage)
+ assert swh_storage.release_get([RELEASE_ID])[0] == Release(
+ id=RELEASE_ID,
+ name=b"2.22-6",
+ message=(
+ b"Synthetic release for CRAN source package "
+ b"Recommended_KernSmooth version 2.22-6"
+ ),
+ target=hash_to_bytes("ff64177fea3f4a5136b9caf7581a4f7d4cf65296"),
+ target_type=ObjectType.DIRECTORY,
+ synthetic=True,
+ author=Person(
+ fullname=b"Brian Ripley <ripley@stats.ox.ac.uk>",
+ name=b"Brian Ripley",
+ email=b"ripley@stats.ox.ac.uk",
+ ),
+ date=TimestampWithTimezone(
+ timestamp=Timestamp(seconds=991958400, microseconds=0),
+ offset=0,
+ negative_utc=False,
+ ),
+ )
+
visit_stats = get_stats(swh_storage)
assert {
"content": 33,
@@ -218,7 +258,15 @@
f"{base_url}/src_contrib_1.4.0_Recommended_KernSmooth_{version}.tar.gz" # noqa
)
loader = CRANLoader(
- swh_storage, origin_url, artifacts=[{"url": artifact_url, "version": version}]
+ swh_storage,
+ origin_url,
+ artifacts=[
+ {
+ "url": artifact_url,
+ "version": version,
+ "package": "Recommended_KernSmooth",
+ }
+ ],
)
# first visit
@@ -342,7 +390,13 @@
loader = CRANLoader(
swh_storage,
origin_url,
- artifacts=[{"url": artifact_url, "version": version}],
+ artifacts=[
+ {
+ "url": artifact_url,
+ "version": version,
+ "package": "Recommended_KernSmooth",
+ }
+ ],
)
actual_load_status = loader.load()
diff --git a/swh/loader/package/debian/loader.py b/swh/loader/package/debian/loader.py
--- a/swh/loader/package/debian/loader.py
+++ b/swh/loader/package/debian/loader.py
@@ -224,9 +224,9 @@
logger.debug("intrinsic_metadata: %s", intrinsic_metadata)
logger.debug("p_info: %s", p_info)
- msg = "Synthetic revision for Debian source package %s version %s" % (
- p_info.name,
- p_info.full_version,
+ msg = (
+ f"Synthetic release for Debian source package {p_info.name} "
+ f"version {p_info.full_version}"
)
author = prepare_person(intrinsic_metadata.changelog.person)
@@ -235,7 +235,7 @@
# inspired from swh.loader.debian.converters.package_metadata_to_revision
return Release(
name=p_info.version.encode(),
- message=msg.encode("utf-8"),
+ message=msg.encode(),
author=author,
date=date,
target=directory,
diff --git a/swh/loader/package/debian/tests/test_debian.py b/swh/loader/package/debian/tests/test_debian.py
--- a/swh/loader/package/debian/tests/test_debian.py
+++ b/swh/loader/package/debian/tests/test_debian.py
@@ -23,7 +23,16 @@
)
from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats
from swh.model.hashutil import hash_to_bytes
-from swh.model.model import Person, Snapshot, SnapshotBranch, TargetType
+from swh.model.model import (
+ ObjectType,
+ Person,
+ Release,
+ Snapshot,
+ SnapshotBranch,
+ TargetType,
+ Timestamp,
+ TimestampWithTimezone,
+)
logger = logging.getLogger(__name__)
@@ -110,7 +119,7 @@
)
actual_load_status = loader.load()
- expected_snapshot_id = "8bc5d12e2443ab216fdd2f969b25b39e96c20fef"
+ expected_snapshot_id = "20073c91e85b8bcbd2639990e76765d25bd2c0a6"
assert actual_load_status == {
"status": "eventful",
"snapshot_id": expected_snapshot_id,
@@ -124,18 +133,38 @@
snapshot=hash_to_bytes(expected_snapshot_id),
)
+ release_id = hash_to_bytes("ed191d99e070a33458a4a402becd0b4bba09cd1e")
+
expected_snapshot = Snapshot(
id=hash_to_bytes(expected_snapshot_id),
branches={
b"releases/stretch/contrib/0.7.2-3": SnapshotBranch(
- target_type=TargetType.RELEASE,
- target=hash_to_bytes("5a99736512d381700c5f54d7fdd6b46e136535a2"),
+ target_type=TargetType.RELEASE, target=release_id,
)
},
) # different than the previous loader as no release is done
check_snapshot(expected_snapshot, swh_storage)
+ assert swh_storage.release_get([release_id])[0] == Release(
+ id=release_id,
+ name=b"stretch/contrib/0.7.2-3",
+ message=b"Synthetic release for Debian source package cicero version 0.7.2-3",
+ target=hash_to_bytes("798df511408c53bf842a8e54d4d335537836bdc3"),
+ target_type=ObjectType.DIRECTORY,
+ synthetic=True,
+ author=Person(
+ fullname=b"Samuel Thibault <sthibault@debian.org>",
+ name=b"Samuel Thibault",
+ email=b"sthibault@debian.org",
+ ),
+ date=TimestampWithTimezone(
+ timestamp=Timestamp(seconds=1413730355, microseconds=0),
+ offset=120,
+ negative_utc=False,
+ ),
+ )
+
stats = get_stats(swh_storage)
assert {
"content": 42,
@@ -162,7 +191,7 @@
actual_load_status = loader.load()
- expected_snapshot_id = "8bc5d12e2443ab216fdd2f969b25b39e96c20fef"
+ expected_snapshot_id = "20073c91e85b8bcbd2639990e76765d25bd2c0a6"
assert actual_load_status == {
"status": "eventful",
"snapshot_id": expected_snapshot_id,
@@ -181,7 +210,7 @@
branches={
b"releases/stretch/contrib/0.7.2-3": SnapshotBranch(
target_type=TargetType.RELEASE,
- target=hash_to_bytes("5a99736512d381700c5f54d7fdd6b46e136535a2"),
+ target=hash_to_bytes("ed191d99e070a33458a4a402becd0b4bba09cd1e"),
)
},
) # different than the previous loader as no release is done
@@ -418,7 +447,7 @@
)
actual_load_status = loader.load()
- expected_snapshot_id = "3d26243c91eb084c350627a5a102cfe039c5b92a"
+ expected_snapshot_id = "3e423d7889ebd8df0ed0373016f035dfed8541cb"
assert actual_load_status == {
"status": "eventful",
"snapshot_id": expected_snapshot_id,
@@ -437,11 +466,11 @@
branches={
b"releases/stretch/contrib/0.7.2-3": SnapshotBranch(
target_type=TargetType.RELEASE,
- target=hash_to_bytes("5a99736512d381700c5f54d7fdd6b46e136535a2"),
+ target=hash_to_bytes("ed191d99e070a33458a4a402becd0b4bba09cd1e"),
),
b"releases/buster/contrib/0.7.2-4": SnapshotBranch(
target_type=TargetType.RELEASE,
- target=hash_to_bytes("192fc7ccce80f64a0d3cf33d379133af067ec721"),
+ target=hash_to_bytes("d3dff4a416816c36dc284e49c1c9eed52c2d2ef4"),
),
},
)
diff --git a/swh/loader/package/nixguix/loader.py b/swh/loader/package/nixguix/loader.py
--- a/swh/loader/package/nixguix/loader.py
+++ b/swh/loader/package/nixguix/loader.py
@@ -160,7 +160,7 @@
) -> Optional[Release]:
return Release(
name=p_info.version.encode(),
- message=b"",
+ message=None,
author=EMPTY_AUTHOR,
date=None,
target=directory,
diff --git a/swh/loader/package/nixguix/tests/test_nixguix.py b/swh/loader/package/nixguix/tests/test_nixguix.py
--- a/swh/loader/package/nixguix/tests/test_nixguix.py
+++ b/swh/loader/package/nixguix/tests/test_nixguix.py
@@ -28,7 +28,10 @@
MetadataAuthority,
MetadataAuthorityType,
MetadataFetcher,
+ ObjectType,
+ Person,
RawExtrinsicMetadata,
+ Release,
Snapshot,
SnapshotBranch,
TargetType,
@@ -54,14 +57,14 @@
SNAPSHOT1 = Snapshot(
- id=hash_to_bytes("771d13ae4e799755c22d1e05da8fc39cf215de58"),
+ id=hash_to_bytes("efe5145f85af3fc87f34102d8b8481cd5198f4f8"),
branches={
b"evaluation": SnapshotBranch(
target=hash_to_bytes("cc4e04c26672dd74e5fd0fecb78b435fb55368f7"),
target_type=TargetType.REVISION,
),
b"https://github.com/owner-1/repository-1/revision-1.tgz": SnapshotBranch(
- target=hash_to_bytes("24853190589d26d0ea2b6c0330b553ff39176e0c"),
+ target=hash_to_bytes("df7811b9644ed8ef088e2e7add62ed32b0bab15f"),
target_type=TargetType.RELEASE,
),
},
@@ -273,8 +276,39 @@
def test_loader_one_visit(swh_storage, requests_mock_datadir, raw_sources):
loader = NixGuixLoader(swh_storage, sources_url)
- res = loader.load()
- assert res["status"] == "eventful"
+ load_status = loader.load()
+ expected_snapshot_id_hex = "efe5145f85af3fc87f34102d8b8481cd5198f4f8"
+ expected_snapshot_id = hash_to_bytes(expected_snapshot_id_hex)
+ assert load_status == {
+ "status": "eventful",
+ "snapshot_id": expected_snapshot_id_hex,
+ }
+
+ release_id = hash_to_bytes("df7811b9644ed8ef088e2e7add62ed32b0bab15f")
+ expected_snapshot = Snapshot(
+ id=expected_snapshot_id,
+ branches={
+ b"evaluation": SnapshotBranch(
+ target=hash_to_bytes("cc4e04c26672dd74e5fd0fecb78b435fb55368f7"),
+ target_type=TargetType.REVISION,
+ ),
+ b"https://github.com/owner-1/repository-1/revision-1.tgz": SnapshotBranch(
+ target=release_id, target_type=TargetType.RELEASE,
+ ),
+ },
+ )
+ check_snapshot(expected_snapshot, storage=swh_storage)
+
+ assert swh_storage.release_get([release_id])[0] == Release(
+ id=release_id,
+ name=b"https://github.com/owner-1/repository-1/revision-1.tgz",
+ message=None,
+ target=hash_to_bytes("4de2e07d3742718d928e974b8a4c721b9f7b33bf"),
+ target_type=ObjectType.DIRECTORY,
+ synthetic=True,
+ author=Person.from_fullname(b""),
+ date=None,
+ )
stats = get_stats(swh_storage)
assert {
@@ -413,7 +447,7 @@
loader = NixGuixLoader(swh_storage, sources_url)
load_status = loader.load()
- expected_snapshot_id_hex = "c5bba84fd5ac3342566effb86190619092d34e79"
+ expected_snapshot_id_hex = "c1983a0a3f647548e1fb92f30339da6848fe9f7a"
expected_snapshot_id = hash_to_bytes(expected_snapshot_id_hex)
assert load_status == {
"status": "eventful",
@@ -439,11 +473,11 @@
target_type=TargetType.REVISION,
),
b"https://github.com/owner-1/repository-1/revision-1.tgz": SnapshotBranch(
- target=hash_to_bytes("24853190589d26d0ea2b6c0330b553ff39176e0c"),
+ target=hash_to_bytes("df7811b9644ed8ef088e2e7add62ed32b0bab15f"),
target_type=TargetType.RELEASE,
),
b"https://github.com/owner-2/repository-1/revision-1.tgz": SnapshotBranch(
- target=hash_to_bytes("3d44fbe814ba802cfd77f83975e45766d3a2ba85"),
+ target=hash_to_bytes("5cc0115cd643902b837cb6cfbc9f5865bc5a7cb2"),
target_type=TargetType.RELEASE,
),
},
@@ -573,7 +607,7 @@
]
archive_loader = ArchiveLoader(swh_storage, url=gnu_url, artifacts=gnu_artifacts)
actual_load_status = archive_loader.load()
- expected_snapshot_id = "cdf8f335fa0c81c8ad089870ec14f52b1980eb6c"
+ expected_snapshot_id = "af62f6f6d464f9b29f270d1bbefa355af38946c4"
assert actual_load_status["status"] == "eventful"
assert actual_load_status["snapshot_id"] == expected_snapshot_id # noqa
diff --git a/swh/loader/package/npm/loader.py b/swh/loader/package/npm/loader.py
--- a/swh/loader/package/npm/loader.py
+++ b/swh/loader/package/npm/loader.py
@@ -106,8 +106,8 @@
str: origin url (e.g. https://www.npmjs.com/package/<package-name>)
"""
super().__init__(storage=storage, url=url, max_content_size=max_content_size)
- package_name = url.split("https://www.npmjs.com/package/")[1]
- safe_name = quote(package_name, safe="")
+ self.package_name = url.split("https://www.npmjs.com/package/")[1]
+ safe_name = quote(self.package_name, safe="")
self.provider_url = f"https://replicate.npmjs.com/{safe_name}/"
self._info: Dict[str, Any] = {}
self._versions = None
@@ -147,7 +147,10 @@
if not i_metadata:
return None
author = extract_npm_package_author(i_metadata)
- message = i_metadata["version"].encode("ascii")
+ msg = (
+ f"Synthetic release for NPM source package {self.package_name} "
+ f"version {p_info.version}"
+ )
if p_info.date is None:
url = p_info.url
@@ -164,7 +167,7 @@
r = Release(
name=p_info.version.encode(),
- message=message,
+ message=msg.encode(),
author=author,
date=date,
target=directory,
diff --git a/swh/loader/package/npm/tests/test_npm.py b/swh/loader/package/npm/tests/test_npm.py
--- a/swh/loader/package/npm/tests/test_npm.py
+++ b/swh/loader/package/npm/tests/test_npm.py
@@ -19,9 +19,12 @@
from swh.model.model import (
Person,
RawExtrinsicMetadata,
+ Release,
Snapshot,
SnapshotBranch,
TargetType,
+ Timestamp,
+ TimestampWithTimezone,
)
from swh.model.model import MetadataAuthority, MetadataAuthorityType, MetadataFetcher
from swh.model.model import ObjectType as ModelObjectType
@@ -280,13 +283,13 @@
_expected_new_releases_first_visit = normalize_hashes(
{
- "d25e722a32c145b3eb88b416049dd35d27759a87": (
+ "adcc40ee87a3ebb1b5a82edd692cf52aa5099cee": (
"42753c0c2ab00c4501b552ac4671c68f3cf5aece"
),
- "3522e846b97c0b8434c565fe891c0f082a357e5d": (
+ "c781147df0e4963a0f9859134abd28296b702233": (
"3370d20d6f96dc1c9e50f083e2134881db110f4f"
),
- "54f6c1711c6aedb6de3cf2d6347b9f772e343784": (
+ "f544812dac98e7589155be7dfaef64477a408ec0": (
"d7895533ef5edbcffdea3f057d9fef3a1ef845ce"
),
}
@@ -307,7 +310,7 @@
loader = NpmLoader(swh_storage, url)
actual_load_status = loader.load()
- expected_snapshot_id = hash_to_bytes("ddaad89b0b4edb7eefe7c92e9b1166caa776ebbc")
+ expected_snapshot_id = hash_to_bytes("d24e3f10492ade1e9462ec701370fef4a79a40f1")
assert actual_load_status == {
"status": "eventful",
"snapshot_id": expected_snapshot_id.hex(),
@@ -318,9 +321,9 @@
)
versions = [
- ("0.0.2", "d25e722a32c145b3eb88b416049dd35d27759a87"),
- ("0.0.3", "3522e846b97c0b8434c565fe891c0f082a357e5d"),
- ("0.0.4", "54f6c1711c6aedb6de3cf2d6347b9f772e343784"),
+ ("0.0.2", "adcc40ee87a3ebb1b5a82edd692cf52aa5099cee"),
+ ("0.0.3", "c781147df0e4963a0f9859134abd28296b702233"),
+ ("0.0.4", "f544812dac98e7589155be7dfaef64477a408ec0"),
]
expected_snapshot = Snapshot(
@@ -340,6 +343,27 @@
)
check_snapshot(expected_snapshot, swh_storage)
+ assert swh_storage.release_get(
+ [hash_to_bytes("adcc40ee87a3ebb1b5a82edd692cf52aa5099cee")]
+ )[0] == Release(
+ name=b"0.0.2",
+ message=b"Synthetic release for NPM source package org version 0.0.2",
+ target=hash_to_bytes("42753c0c2ab00c4501b552ac4671c68f3cf5aece"),
+ target_type=ModelObjectType.DIRECTORY,
+ synthetic=True,
+ author=Person(
+ fullname=b"mooz <stillpedant@gmail.com>",
+ name=b"mooz",
+ email=b"stillpedant@gmail.com",
+ ),
+ date=TimestampWithTimezone(
+ timestamp=Timestamp(seconds=1388590833, microseconds=0),
+ offset=0,
+ negative_utc=False,
+ ),
+ id=hash_to_bytes("adcc40ee87a3ebb1b5a82edd692cf52aa5099cee"),
+ )
+
contents = swh_storage.content_get(_expected_new_contents_first_visit)
count = sum(0 if content is None else 1 for content in contents)
assert count == len(_expected_new_contents_first_visit)
@@ -403,7 +427,7 @@
url = package_url(package)
loader = NpmLoader(swh_storage, url)
- expected_snapshot_id = hash_to_bytes("ddaad89b0b4edb7eefe7c92e9b1166caa776ebbc")
+ expected_snapshot_id = hash_to_bytes("d24e3f10492ade1e9462ec701370fef4a79a40f1")
actual_load_status = loader.load()
assert actual_load_status == {
"status": "eventful",
@@ -466,7 +490,7 @@
loader = NpmLoader(swh_storage, url)
actual_load_status = loader.load()
- expected_snapshot_id = hash_to_bytes("7a89bc3cb51ff1d3213b2151c745d82c3b9d69b1")
+ expected_snapshot_id = hash_to_bytes("92ff37da8045f0088ed35bce0bc34e2025202825")
assert actual_load_status == {
"status": "eventful",
"snapshot_id": expected_snapshot_id.hex(),
@@ -483,11 +507,11 @@
),
b"releases/0.1.0": SnapshotBranch(
target_type=TargetType.RELEASE,
- target=hash_to_bytes("103fa6d0a1abb405468e3590dcf634bcb77f67be"),
+ target=hash_to_bytes("c5e0f0e185660b6bdd694ca5c68babe5bab20e24"),
),
b"releases/0.1.1-alpha.14": SnapshotBranch(
target_type=TargetType.RELEASE,
- target=hash_to_bytes("c00b54143582a4e963e0b86e8dfa58eedd260020"),
+ target=hash_to_bytes("2f89c709eacc974b587e13f90d10a826b23a550e"),
),
},
)
@@ -566,7 +590,7 @@
loader = NpmLoader(swh_storage, url)
actual_load_status = loader.load()
- expected_snapshot_id = hash_to_bytes("7f5e591dd3c4754abca4db1cc18355671e2c014c")
+ expected_snapshot_id = hash_to_bytes("2a7a67725f9c7134f56612281e8d1638f1386118")
assert actual_load_status == {
"status": "eventful",
@@ -582,7 +606,7 @@
),
b"releases/0.0.1": SnapshotBranch(
target_type=TargetType.RELEASE,
- target=hash_to_bytes("199bf0ad020617357d608655e6549e526a65dc36"),
+ target=hash_to_bytes("68b2a100103cecec06b8dd780228bb751f2dc6f3"),
),
},
)
diff --git a/swh/loader/package/opam/loader.py b/swh/loader/package/opam/loader.py
--- a/swh/loader/package/opam/loader.py
+++ b/swh/loader/package/opam/loader.py
@@ -244,10 +244,14 @@
self, p_info: OpamPackageInfo, uncompressed_path: str, directory: Sha1Git,
) -> Optional[Release]:
+ msg = (
+ f"Synthetic release for OPAM source package {self.opam_package} "
+ f"version {p_info.version}"
+ )
return Release(
name=p_info.version.encode(),
author=p_info.author,
- message=str.encode(p_info.version),
+ message=msg.encode(),
date=None,
target=directory,
target_type=ObjectType.DIRECTORY,
diff --git a/swh/loader/package/opam/tests/test_opam.py b/swh/loader/package/opam/tests/test_opam.py
--- a/swh/loader/package/opam/tests/test_opam.py
+++ b/swh/loader/package/opam/tests/test_opam.py
@@ -9,15 +9,15 @@
from swh.loader.tests import assert_last_visit_matches, check_snapshot, get_stats
from swh.model.hashutil import hash_to_bytes
from swh.model.model import (
- MetadataAuthority,
- MetadataAuthorityType,
- MetadataFetcher,
Person,
RawExtrinsicMetadata,
+ Release,
Snapshot,
SnapshotBranch,
TargetType,
)
+from swh.model.model import MetadataAuthority, MetadataAuthorityType, MetadataFetcher
+from swh.model.model import ObjectType as ModelObjectType
from swh.model.swhids import CoreSWHID, ExtendedObjectType, ExtendedSWHID, ObjectType
from swh.storage.interface import PagedResult
@@ -110,23 +110,37 @@
actual_load_status = loader.load()
- expected_snapshot_id = hash_to_bytes("50b5961c27dd4f8b138acce8bac4f90d1e33081f")
+ expected_snapshot_id = hash_to_bytes("e480958fa7851268be2bcc8d01145c0c9624b34b")
assert actual_load_status == {
"status": "eventful",
"snapshot_id": expected_snapshot_id.hex(),
}
+ release_id = hash_to_bytes("03db7f0d572509f1c7ce18c847db83070e26fd5e")
+
expected_snapshot = Snapshot(
id=expected_snapshot_id,
branches={
b"HEAD": SnapshotBranch(target=b"agrid.0.1", target_type=TargetType.ALIAS,),
b"agrid.0.1": SnapshotBranch(
- target=hash_to_bytes("efcb9ef9d0f2a85312463251732b42f9e45a5c12"),
- target_type=TargetType.RELEASE,
+ target=release_id, target_type=TargetType.RELEASE,
),
},
)
+ assert swh_storage.release_get([release_id])[0] == Release(
+ name=b"0.1",
+ message=b"Synthetic release for OPAM source package agrid version 0.1",
+ target=hash_to_bytes("00412ee5bc601deb462e55addd1004715116785e"),
+ target_type=ModelObjectType.DIRECTORY,
+ synthetic=True,
+ author=Person(
+ fullname=b"OCamlPro <contact@ocamlpro.com>", name=None, email=None
+ ),
+ date=None,
+ id=release_id,
+ )
+
assert_last_visit_matches(
swh_storage, url, status="full", type="opam", snapshot=expected_snapshot_id
)
@@ -167,7 +181,7 @@
actual_load_status = loader.load()
- expected_snapshot_id = hash_to_bytes("f0a974e47999e74d323f1fb9604fde72527bda28")
+ expected_snapshot_id = hash_to_bytes("1a70631bee44c86dded71e0a091b1c91c110f812")
assert actual_load_status == {
"status": "eventful",
"snapshot_id": expected_snapshot_id.hex(),
@@ -180,15 +194,15 @@
target=b"directories.0.3", target_type=TargetType.ALIAS,
),
b"directories.0.1": SnapshotBranch(
- target=hash_to_bytes("1f839cb1f4720d6b33fdd856e3ff1119497979d9"),
+ target=hash_to_bytes("013d53d7e1aedbe03aaa3d5c0e6d1d780ef2634d"),
target_type=TargetType.RELEASE,
),
b"directories.0.2": SnapshotBranch(
- target=hash_to_bytes("4133834d966381804347efbc41e35dd2bdd48962"),
+ target=hash_to_bytes("4fdcc3606c0af33cb4d733b70074e79f03e928a1"),
target_type=TargetType.RELEASE,
),
b"directories.0.3": SnapshotBranch(
- target=hash_to_bytes("2f20cabfbacfe447b80dc2a4eb14d461775100c8"),
+ target=hash_to_bytes("5de72a60f81649157d267773c30e897b7005dcdb"),
target_type=TargetType.RELEASE,
),
},
@@ -222,7 +236,7 @@
actual_load_status = loader.load()
- expected_snapshot_id = hash_to_bytes("987425c6fe94d3972c4c4e97ee27a6a7c8b68e82")
+ expected_snapshot_id = hash_to_bytes("96246035587354a71f429d5b9b8dcc98afad3708")
assert actual_load_status == {
"status": "eventful",
"snapshot_id": expected_snapshot_id.hex(),
@@ -251,7 +265,7 @@
assert branch_name == expected_branch_name
assert package_info == expected_package_info
- release_id = hash_to_bytes("8d0612cdf172e5dff3d876ca2bbc0f6003cc36cc")
+ release_id = hash_to_bytes("4904ad9d0f3b3f84cec2b899d0d05c682b0efdcb")
expected_snapshot = Snapshot(
id=hash_to_bytes(actual_load_status["snapshot_id"]),
@@ -298,7 +312,7 @@
assert actual_load_status["status"] == "eventful"
- expected_release_id = hash_to_bytes("8d0612cdf172e5dff3d876ca2bbc0f6003cc36cc")
+ expected_release_id = hash_to_bytes("4904ad9d0f3b3f84cec2b899d0d05c682b0efdcb")
expected_snapshot = Snapshot(
id=hash_to_bytes(actual_load_status["snapshot_id"]),
diff --git a/swh/loader/package/pypi/loader.py b/swh/loader/package/pypi/loader.py
--- a/swh/loader/package/pypi/loader.py
+++ b/swh/loader/package/pypi/loader.py
@@ -41,17 +41,21 @@
class PyPIPackageInfo(BasePackageInfo):
raw_info = attr.ib(type=Dict[str, Any])
+ name = attr.ib(type=str)
comment_text = attr.ib(type=Optional[str])
sha256 = attr.ib(type=str)
upload_time = attr.ib(type=str)
@classmethod
- def from_metadata(cls, metadata: Dict[str, Any], version: str) -> "PyPIPackageInfo":
+ def from_metadata(
+ cls, metadata: Dict[str, Any], name: str, version: str
+ ) -> "PyPIPackageInfo":
return cls(
url=metadata["url"],
filename=metadata["filename"],
version=version,
raw_info=metadata,
+ name=name,
comment_text=metadata.get("comment_text"),
sha256=metadata["digests"]["sha256"],
upload_time=metadata["upload_time"],
@@ -116,7 +120,9 @@
):
continue
- p_info = PyPIPackageInfo.from_metadata(meta, version=version)
+ p_info = PyPIPackageInfo.from_metadata(
+ meta, name=self.info()["info"]["name"], version=version
+ )
res.append((version, p_info))
if len(res) == 1:
@@ -134,17 +140,22 @@
return None
# from intrinsic metadata
- version_ = i_metadata.get("version", "")
+ version_ = i_metadata.get("version", p_info.version)
author_ = author(i_metadata)
- # from extrinsic metadata
- message = p_info.comment_text or ""
- message = "%s: %s" % (version_, message) if message else version_
+ if p_info.comment_text:
+ msg = p_info.comment_text
+ else:
+ msg = (
+ f"Synthetic release for PyPI source package {p_info.name} "
+ f"version {version_}"
+ )
+
date = TimestampWithTimezone.from_iso8601(p_info.upload_time)
return Release(
name=p_info.version.encode(),
- message=message.encode(),
+ message=msg.encode(),
author=author_,
date=date,
target=directory,
diff --git a/swh/loader/package/pypi/tests/test_pypi.py b/swh/loader/package/pypi/tests/test_pypi.py
--- a/swh/loader/package/pypi/tests/test_pypi.py
+++ b/swh/loader/package/pypi/tests/test_pypi.py
@@ -329,7 +329,7 @@
assert actual_load_status["status"] == "eventful"
assert actual_load_status["snapshot_id"] is not None
- expected_release_id = hash_to_bytes("a1e10745d375be66c1b65e55c0c15fe98776b53c")
+ expected_release_id = hash_to_bytes("e05d81600f3db1f905d23ab2a06ea64460c7e3f4")
expected_snapshot = Snapshot(
id=hash_to_bytes(actual_load_status["snapshot_id"]),
@@ -338,12 +338,11 @@
target=b"releases/1.2.0", target_type=TargetType.ALIAS,
),
b"releases/1.1.0": SnapshotBranch(
- target=hash_to_bytes("9478c9981887fdf5ada3f1fcb20c81069cdf4c44"),
+ target=hash_to_bytes("ed4132a0160d97752a6ce5716722fb937a2e00b1"),
target_type=TargetType.RELEASE,
),
b"releases/1.2.0": SnapshotBranch(
- target=hash_to_bytes("a1e10745d375be66c1b65e55c0c15fe98776b53c"),
- target_type=TargetType.RELEASE,
+ target=expected_release_id, target_type=TargetType.RELEASE,
),
},
)
@@ -397,7 +396,7 @@
loader = PyPILoader(swh_storage, url)
actual_load_status = loader.load()
- expected_snapshot_id = hash_to_bytes("eee24d5b0c156ebb4ece0c810c9dce636ebe881f")
+ expected_snapshot_id = hash_to_bytes("1838a3d6fff760338ab14b95c43c2dabcbb03c5a")
assert actual_load_status == {
"status": "eventful",
"snapshot_id": expected_snapshot_id.hex(),
@@ -411,7 +410,7 @@
id=hash_to_bytes(expected_snapshot_id),
branches={
b"releases/1.2.0": SnapshotBranch(
- target=hash_to_bytes("a1e10745d375be66c1b65e55c0c15fe98776b53c"),
+ target=hash_to_bytes("e05d81600f3db1f905d23ab2a06ea64460c7e3f4"),
target_type=TargetType.RELEASE,
),
b"HEAD": SnapshotBranch(
@@ -443,7 +442,7 @@
loader = PyPILoader(swh_storage, url)
actual_load_status = loader.load()
- expected_snapshot_id = hash_to_bytes("62d957f2b5cdc515bea0a46252a3ab29ee271636")
+ expected_snapshot_id = hash_to_bytes("7e34abda294fb80e6d4e64637ae43fed112079ca")
assert actual_load_status == {
"status": "eventful",
"snapshot_id": expected_snapshot_id.hex(),
@@ -457,11 +456,11 @@
id=expected_snapshot_id,
branches={
b"releases/1.1.0": SnapshotBranch(
- target=hash_to_bytes("9478c9981887fdf5ada3f1fcb20c81069cdf4c44"),
+ target=hash_to_bytes("ed4132a0160d97752a6ce5716722fb937a2e00b1"),
target_type=TargetType.RELEASE,
),
b"releases/1.2.0": SnapshotBranch(
- target=hash_to_bytes("a1e10745d375be66c1b65e55c0c15fe98776b53c"),
+ target=hash_to_bytes("e05d81600f3db1f905d23ab2a06ea64460c7e3f4"),
target_type=TargetType.RELEASE,
),
b"HEAD": SnapshotBranch(
@@ -492,7 +491,7 @@
loader = PyPILoader(swh_storage, url)
actual_load_status = loader.load()
- snapshot_id = hash_to_bytes("62d957f2b5cdc515bea0a46252a3ab29ee271636")
+ snapshot_id = hash_to_bytes("7e34abda294fb80e6d4e64637ae43fed112079ca")
assert actual_load_status == {
"status": "eventful",
"snapshot_id": snapshot_id.hex(),
@@ -505,11 +504,11 @@
id=snapshot_id,
branches={
b"releases/1.1.0": SnapshotBranch(
- target=hash_to_bytes("9478c9981887fdf5ada3f1fcb20c81069cdf4c44"),
+ target=hash_to_bytes("ed4132a0160d97752a6ce5716722fb937a2e00b1"),
target_type=TargetType.RELEASE,
),
b"releases/1.2.0": SnapshotBranch(
- target=hash_to_bytes("a1e10745d375be66c1b65e55c0c15fe98776b53c"),
+ target=hash_to_bytes("e05d81600f3db1f905d23ab2a06ea64460c7e3f4"),
target_type=TargetType.RELEASE,
),
b"HEAD": SnapshotBranch(
@@ -560,7 +559,7 @@
visit1_actual_load_status = loader.load()
visit1_stats = get_stats(swh_storage)
- expected_snapshot_id = hash_to_bytes("62d957f2b5cdc515bea0a46252a3ab29ee271636")
+ expected_snapshot_id = hash_to_bytes("7e34abda294fb80e6d4e64637ae43fed112079ca")
assert visit1_actual_load_status == {
"status": "eventful",
"snapshot_id": expected_snapshot_id.hex(),
@@ -589,7 +588,7 @@
visit2_stats = get_stats(swh_storage)
assert visit2_actual_load_status["status"] == "eventful", visit2_actual_load_status
- expected_snapshot_id2 = hash_to_bytes("6a8a84e7f765bed4362315fb054adb2466598636")
+ expected_snapshot_id2 = hash_to_bytes("6636693213eab9000b8eee1c5bbb3f1b675a4c70")
assert visit2_actual_load_status == {
"status": "eventful",
"snapshot_id": expected_snapshot_id2.hex(),
@@ -603,15 +602,15 @@
id=expected_snapshot_id2,
branches={
b"releases/1.1.0": SnapshotBranch(
- target=hash_to_bytes("9478c9981887fdf5ada3f1fcb20c81069cdf4c44"),
+ target=hash_to_bytes("ed4132a0160d97752a6ce5716722fb937a2e00b1"),
target_type=TargetType.RELEASE,
),
b"releases/1.2.0": SnapshotBranch(
- target=hash_to_bytes("a1e10745d375be66c1b65e55c0c15fe98776b53c"),
+ target=hash_to_bytes("e05d81600f3db1f905d23ab2a06ea64460c7e3f4"),
target_type=TargetType.RELEASE,
),
b"releases/1.3.0": SnapshotBranch(
- target=hash_to_bytes("d46442e99bb6e05df5f75a7f0f7f61a4f2098147"),
+ target=hash_to_bytes("a21b09cbec8e31f47307f196bb1f939effc26e11"),
target_type=TargetType.RELEASE,
),
b"HEAD": SnapshotBranch(
@@ -665,7 +664,7 @@
loader = PyPILoader(swh_storage, url)
actual_load_status = loader.load()
- expected_snapshot_id = hash_to_bytes("a136ee226316276c347d7be3da07df5828605927")
+ expected_snapshot_id = hash_to_bytes("a65c03a837b24720fa95622de07074e279eddd0d")
assert actual_load_status == {
"status": "eventful",
"snapshot_id": expected_snapshot_id.hex(),
@@ -679,11 +678,11 @@
id=expected_snapshot_id,
branches={
b"releases/1.1.0/nexter-1.1.0.zip": SnapshotBranch(
- target=hash_to_bytes("9478c9981887fdf5ada3f1fcb20c81069cdf4c44"),
+ target=hash_to_bytes("18d0087b1e1a3a31070d54bf3e9edbd44ab01cb5"),
target_type=TargetType.RELEASE,
),
b"releases/1.1.0/nexter-1.1.0.tar.gz": SnapshotBranch(
- target=hash_to_bytes("b3391cb4007fb6872c4dfab476a7cfe7443a1bb4"),
+ target=hash_to_bytes("b2b379b3eb61adcde22e10788b1fc5f985e938d2"),
target_type=TargetType.RELEASE,
),
},
@@ -734,6 +733,7 @@
url=url,
filename="GermlineFilter-1.2.tar.gz",
version="1.2",
+ name="GermlineFilter",
directory_extrinsic_metadata=[],
raw_info={},
comment_text="",
@@ -762,7 +762,10 @@
release = loader.build_release(p_info, str(tmp_path), directory)
# without comment_text and version in PKG-INFO, message should be empty
- assert release.message == b""
+ assert (
+ release.message
+ == b"Synthetic release for PyPI source package GermlineFilter version 1.2"
+ )
def test_filter_out_invalid_sdists(swh_storage, requests_mock):
@@ -782,6 +785,7 @@
requests_mock.get(
json_url,
json={
+ "info": {"name": project_name,},
"releases": {
version: [
{

File Metadata

Mime Type
text/plain
Expires
Sun, Aug 24, 6:04 PM (1 w, 3 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3220789

Event Timeline