Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7124717
D5789.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
8 KB
Subscribers
None
D5789.diff
View Options
diff --git a/swh/loader/package/archive/loader.py b/swh/loader/package/archive/loader.py
--- a/swh/loader/package/archive/loader.py
+++ b/swh/loader/package/archive/loader.py
@@ -8,7 +8,7 @@
import logging
from os import path
import string
-from typing import Any, Dict, Iterator, Optional, Sequence, Tuple, Union
+from typing import Any, Dict, Iterator, Mapping, Optional, Sequence, Tuple, Union
import attr
import iso8601
@@ -84,6 +84,7 @@
artifacts: Sequence[Dict[str, Any]],
extid_manifest_format: Optional[str] = None,
max_content_size: Optional[int] = None,
+ snapshot_append: bool = False,
):
f"""Loader constructor.
@@ -107,6 +108,8 @@
extid_manifest_format: template string used to format a manifest,
which is hashed to get the extid of a package.
Defaults to {ArchivePackageInfo.MANIFEST_FORMAT!r}
+ snapshot_append: if :const:`True`, append latest snapshot content to
+ the new snapshot created by the loader
"""
super().__init__(storage=storage, url=url, max_content_size=max_content_size)
@@ -116,6 +119,7 @@
if extid_manifest_format is None
else string.Template(extid_manifest_format)
)
+ self.snapshot_append = snapshot_append
def get_versions(self) -> Sequence[str]:
versions = []
@@ -164,3 +168,9 @@
directory=directory,
synthetic=True,
)
+
+ def extra_branches(self) -> Dict[bytes, Mapping[str, Any]]:
+ if not self.snapshot_append:
+ return {}
+ last_snapshot = self.last_snapshot()
+ return last_snapshot.to_dict()["branches"] if last_snapshot else {}
diff --git a/swh/loader/package/archive/tasks.py b/swh/loader/package/archive/tasks.py
--- a/swh/loader/package/archive/tasks.py
+++ b/swh/loader/package/archive/tasks.py
@@ -9,7 +9,9 @@
@shared_task(name=__name__ + ".LoadArchive")
-def load_archive_files(*, url=None, artifacts=None):
+def load_archive_files(*, url=None, artifacts=None, snapshot_append=False):
"""Load archive's artifacts (e.g gnu, etc...)"""
- loader = ArchiveLoader.from_configfile(url=url, artifacts=artifacts)
+ loader = ArchiveLoader.from_configfile(
+ url=url, artifacts=artifacts, snapshot_append=snapshot_append
+ )
return loader.load()
diff --git a/swh/loader/package/archive/tests/test_archive.py b/swh/loader/package/archive/tests/test_archive.py
--- a/swh/loader/package/archive/tests/test_archive.py
+++ b/swh/loader/package/archive/tests/test_archive.py
@@ -22,7 +22,14 @@
"length": 221837,
"filename": "8sync-0.1.0.tar.gz",
"version": "0.1.0",
- }
+ },
+ {
+ "time": 1480991830,
+ "url": "https://ftp.gnu.org/gnu/8sync/8sync-0.2.0.tar.gz",
+ "length": 238466,
+ "filename": "8sync-0.2.0.tar.gz",
+ "version": "0.2.0",
+ },
]
_expected_new_contents_first_visit = [
@@ -115,7 +122,7 @@
"""With no prior visit, load a gnu project ends up with 1 snapshot
"""
- loader = ArchiveLoader(swh_storage, URL, artifacts=GNU_ARTIFACTS)
+ loader = ArchiveLoader(swh_storage, URL, artifacts=GNU_ARTIFACTS[:1])
actual_load_status = loader.load()
assert actual_load_status["status"] == "eventful"
@@ -173,7 +180,7 @@
"""
url = URL
- loader = ArchiveLoader(swh_storage, url, artifacts=GNU_ARTIFACTS)
+ loader = ArchiveLoader(swh_storage, url, artifacts=GNU_ARTIFACTS[:1])
actual_load_status = loader.load()
assert actual_load_status["status"] == "eventful"
@@ -229,13 +236,7 @@
]
assert len(urls) == 1
- artifact2 = {
- "time": 1480991830,
- "url": "https://ftp.gnu.org/gnu/8sync/8sync-0.2.0.tar.gz",
- "length": 238466,
- "filename": "8sync-0.2.0.tar.gz",
- "version": "0.2.0",
- }
+ artifact2 = GNU_ARTIFACTS[1]
loader2 = ArchiveLoader(swh_storage, url, [artifact1, artifact2])
stats2 = get_stats(swh_storage)
@@ -341,3 +342,77 @@
with pytest.raises(KeyError):
p_info.extid(manifest_format=string.Template("$a $unknown_key"))
+
+
+def test_archive_snapshot_append(swh_storage, requests_mock_datadir):
+ # first loading with a first artifact
+ artifact1 = GNU_ARTIFACTS[0]
+ loader = ArchiveLoader(swh_storage, URL, [artifact1], snapshot_append=True)
+ actual_load_status = loader.load()
+ assert actual_load_status["status"] == "eventful"
+ assert actual_load_status["snapshot_id"] is not None
+ assert_last_visit_matches(swh_storage, URL, status="full", type="tar")
+
+ # check expected snapshot
+ snapshot = loader.last_snapshot()
+ assert len(snapshot.branches) == 2
+ branch_artifact1_name = f"releases/{artifact1['version']}".encode()
+ assert b"HEAD" in snapshot.branches
+ assert branch_artifact1_name in snapshot.branches
+ assert snapshot.branches[b"HEAD"].target == branch_artifact1_name
+
+ # second loading with a second artifact
+ artifact2 = GNU_ARTIFACTS[1]
+ loader = ArchiveLoader(swh_storage, URL, [artifact2], snapshot_append=True)
+ actual_load_status = loader.load()
+ assert actual_load_status["status"] == "eventful"
+ assert actual_load_status["snapshot_id"] is not None
+ assert_last_visit_matches(swh_storage, URL, status="full", type="tar")
+
+ # check expected snapshot, should contain a new branch and the
+ # branch for the first artifact
+ snapshot = loader.last_snapshot()
+ assert len(snapshot.branches) == 3
+ branch_artifact2_name = f"releases/{artifact2['version']}".encode()
+ assert b"HEAD" in snapshot.branches
+ assert branch_artifact2_name in snapshot.branches
+ assert branch_artifact1_name in snapshot.branches
+ assert snapshot.branches[b"HEAD"].target == branch_artifact2_name
+
+
+def test_archive_snapshot_append_branch_override(swh_storage, requests_mock_datadir):
+ # first loading for a first artifact
+ artifact1 = GNU_ARTIFACTS[0]
+ loader = ArchiveLoader(swh_storage, URL, [artifact1], snapshot_append=True)
+ actual_load_status = loader.load()
+ assert actual_load_status["status"] == "eventful"
+ assert actual_load_status["snapshot_id"] is not None
+ assert_last_visit_matches(swh_storage, URL, status="full", type="tar")
+
+ # check expected snapshot
+ snapshot = loader.last_snapshot()
+ assert len(snapshot.branches) == 2
+ branch_artifact1_name = f"releases/{artifact1['version']}".encode()
+ assert branch_artifact1_name in snapshot.branches
+ branch_target_first_visit = snapshot.branches[branch_artifact1_name].target
+
+ # second loading for a second artifact with same version as the first one
+ # but with different tarball content
+ artifact2 = dict(GNU_ARTIFACTS[0])
+ artifact2["url"] = GNU_ARTIFACTS[1]["url"]
+ artifact2["time"] = GNU_ARTIFACTS[1]["time"]
+ artifact2["length"] = GNU_ARTIFACTS[1]["length"]
+ loader = ArchiveLoader(swh_storage, URL, [artifact2], snapshot_append=True)
+ actual_load_status = loader.load()
+ assert actual_load_status["status"] == "eventful"
+ assert actual_load_status["snapshot_id"] is not None
+ assert_last_visit_matches(swh_storage, URL, status="full", type="tar")
+
+ # check expected snapshot, should contain the same branch as previously
+ # but with different target
+ snapshot = loader.last_snapshot()
+ assert len(snapshot.branches) == 2
+ assert branch_artifact1_name in snapshot.branches
+ branch_target_second_visit = snapshot.branches[branch_artifact1_name].target
+
+ assert branch_target_first_visit != branch_target_second_visit
diff --git a/swh/loader/package/archive/tests/test_tasks.py b/swh/loader/package/archive/tests/test_tasks.py
--- a/swh/loader/package/archive/tests/test_tasks.py
+++ b/swh/loader/package/archive/tests/test_tasks.py
@@ -19,3 +19,20 @@
assert res.successful()
assert mock_load.called
assert res.result == {"status": "eventful"}
+
+
+def test_tasks_archive_loader_snapshot_append(
+ mocker, swh_scheduler_celery_app, swh_scheduler_celery_worker, swh_config
+):
+ mock_load = mocker.patch("swh.loader.package.archive.loader.ArchiveLoader.load")
+ mock_load.return_value = {"status": "eventful"}
+
+ res = swh_scheduler_celery_app.send_task(
+ "swh.loader.package.archive.tasks.LoadArchive",
+ kwargs=dict(url="https://gnu.org/", artifacts=[], snapshot_append=True),
+ )
+ assert res
+ res.wait()
+ assert res.successful()
+ assert mock_load.called
+ assert res.result == {"status": "eventful"}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Dec 21 2024, 5:53 PM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3221905
Attached To
D5789: package/archive: Add snapshot_append parameter to ArchiveLoader
Event Timeline
Log In to Comment