Page MenuHomeSoftware Heritage

D5789.diff
No OneTemporary

D5789.diff

diff --git a/swh/loader/package/archive/loader.py b/swh/loader/package/archive/loader.py
--- a/swh/loader/package/archive/loader.py
+++ b/swh/loader/package/archive/loader.py
@@ -8,7 +8,7 @@
import logging
from os import path
import string
-from typing import Any, Dict, Iterator, Optional, Sequence, Tuple, Union
+from typing import Any, Dict, Iterator, Mapping, Optional, Sequence, Tuple, Union
import attr
import iso8601
@@ -84,6 +84,7 @@
artifacts: Sequence[Dict[str, Any]],
extid_manifest_format: Optional[str] = None,
max_content_size: Optional[int] = None,
+ snapshot_append: bool = False,
):
f"""Loader constructor.
@@ -107,6 +108,8 @@
extid_manifest_format: template string used to format a manifest,
which is hashed to get the extid of a package.
Defaults to {ArchivePackageInfo.MANIFEST_FORMAT!r}
+ snapshot_append: if :const:`True`, append latest snapshot content to
+ the new snapshot created by the loader
"""
super().__init__(storage=storage, url=url, max_content_size=max_content_size)
@@ -116,6 +119,7 @@
if extid_manifest_format is None
else string.Template(extid_manifest_format)
)
+ self.snapshot_append = snapshot_append
def get_versions(self) -> Sequence[str]:
versions = []
@@ -164,3 +168,9 @@
directory=directory,
synthetic=True,
)
+
+ def extra_branches(self) -> Dict[bytes, Mapping[str, Any]]:
+ if not self.snapshot_append:
+ return {}
+ last_snapshot = self.last_snapshot()
+ return last_snapshot.to_dict()["branches"] if last_snapshot else {}
diff --git a/swh/loader/package/archive/tasks.py b/swh/loader/package/archive/tasks.py
--- a/swh/loader/package/archive/tasks.py
+++ b/swh/loader/package/archive/tasks.py
@@ -9,7 +9,9 @@
@shared_task(name=__name__ + ".LoadArchive")
-def load_archive_files(*, url=None, artifacts=None):
+def load_archive_files(*, url=None, artifacts=None, snapshot_append=False):
"""Load archive's artifacts (e.g gnu, etc...)"""
- loader = ArchiveLoader.from_configfile(url=url, artifacts=artifacts)
+ loader = ArchiveLoader.from_configfile(
+ url=url, artifacts=artifacts, snapshot_append=snapshot_append
+ )
return loader.load()
diff --git a/swh/loader/package/archive/tests/test_archive.py b/swh/loader/package/archive/tests/test_archive.py
--- a/swh/loader/package/archive/tests/test_archive.py
+++ b/swh/loader/package/archive/tests/test_archive.py
@@ -22,7 +22,14 @@
"length": 221837,
"filename": "8sync-0.1.0.tar.gz",
"version": "0.1.0",
- }
+ },
+ {
+ "time": 1480991830,
+ "url": "https://ftp.gnu.org/gnu/8sync/8sync-0.2.0.tar.gz",
+ "length": 238466,
+ "filename": "8sync-0.2.0.tar.gz",
+ "version": "0.2.0",
+ },
]
_expected_new_contents_first_visit = [
@@ -115,7 +122,7 @@
"""With no prior visit, load a gnu project ends up with 1 snapshot
"""
- loader = ArchiveLoader(swh_storage, URL, artifacts=GNU_ARTIFACTS)
+ loader = ArchiveLoader(swh_storage, URL, artifacts=GNU_ARTIFACTS[:1])
actual_load_status = loader.load()
assert actual_load_status["status"] == "eventful"
@@ -173,7 +180,7 @@
"""
url = URL
- loader = ArchiveLoader(swh_storage, url, artifacts=GNU_ARTIFACTS)
+ loader = ArchiveLoader(swh_storage, url, artifacts=GNU_ARTIFACTS[:1])
actual_load_status = loader.load()
assert actual_load_status["status"] == "eventful"
@@ -229,13 +236,7 @@
]
assert len(urls) == 1
- artifact2 = {
- "time": 1480991830,
- "url": "https://ftp.gnu.org/gnu/8sync/8sync-0.2.0.tar.gz",
- "length": 238466,
- "filename": "8sync-0.2.0.tar.gz",
- "version": "0.2.0",
- }
+ artifact2 = GNU_ARTIFACTS[1]
loader2 = ArchiveLoader(swh_storage, url, [artifact1, artifact2])
stats2 = get_stats(swh_storage)
@@ -341,3 +342,77 @@
with pytest.raises(KeyError):
p_info.extid(manifest_format=string.Template("$a $unknown_key"))
+
+
+def test_archive_snapshot_append(swh_storage, requests_mock_datadir):
+ # first loading with a first artifact
+ artifact1 = GNU_ARTIFACTS[0]
+ loader = ArchiveLoader(swh_storage, URL, [artifact1], snapshot_append=True)
+ actual_load_status = loader.load()
+ assert actual_load_status["status"] == "eventful"
+ assert actual_load_status["snapshot_id"] is not None
+ assert_last_visit_matches(swh_storage, URL, status="full", type="tar")
+
+ # check expected snapshot
+ snapshot = loader.last_snapshot()
+ assert len(snapshot.branches) == 2
+ branch_artifact1_name = f"releases/{artifact1['version']}".encode()
+ assert b"HEAD" in snapshot.branches
+ assert branch_artifact1_name in snapshot.branches
+ assert snapshot.branches[b"HEAD"].target == branch_artifact1_name
+
+ # second loading with a second artifact
+ artifact2 = GNU_ARTIFACTS[1]
+ loader = ArchiveLoader(swh_storage, URL, [artifact2], snapshot_append=True)
+ actual_load_status = loader.load()
+ assert actual_load_status["status"] == "eventful"
+ assert actual_load_status["snapshot_id"] is not None
+ assert_last_visit_matches(swh_storage, URL, status="full", type="tar")
+
+ # check expected snapshot, should contain a new branch and the
+ # branch for the first artifact
+ snapshot = loader.last_snapshot()
+ assert len(snapshot.branches) == 3
+ branch_artifact2_name = f"releases/{artifact2['version']}".encode()
+ assert b"HEAD" in snapshot.branches
+ assert branch_artifact2_name in snapshot.branches
+ assert branch_artifact1_name in snapshot.branches
+ assert snapshot.branches[b"HEAD"].target == branch_artifact2_name
+
+
+def test_archive_snapshot_append_branch_override(swh_storage, requests_mock_datadir):
+ # first loading for a first artifact
+ artifact1 = GNU_ARTIFACTS[0]
+ loader = ArchiveLoader(swh_storage, URL, [artifact1], snapshot_append=True)
+ actual_load_status = loader.load()
+ assert actual_load_status["status"] == "eventful"
+ assert actual_load_status["snapshot_id"] is not None
+ assert_last_visit_matches(swh_storage, URL, status="full", type="tar")
+
+ # check expected snapshot
+ snapshot = loader.last_snapshot()
+ assert len(snapshot.branches) == 2
+ branch_artifact1_name = f"releases/{artifact1['version']}".encode()
+ assert branch_artifact1_name in snapshot.branches
+ branch_target_first_visit = snapshot.branches[branch_artifact1_name].target
+
+ # second loading for a second artifact with same version as the first one
+ # but with different tarball content
+ artifact2 = dict(GNU_ARTIFACTS[0])
+ artifact2["url"] = GNU_ARTIFACTS[1]["url"]
+ artifact2["time"] = GNU_ARTIFACTS[1]["time"]
+ artifact2["length"] = GNU_ARTIFACTS[1]["length"]
+ loader = ArchiveLoader(swh_storage, URL, [artifact2], snapshot_append=True)
+ actual_load_status = loader.load()
+ assert actual_load_status["status"] == "eventful"
+ assert actual_load_status["snapshot_id"] is not None
+ assert_last_visit_matches(swh_storage, URL, status="full", type="tar")
+
+ # check expected snapshot, should contain the same branch as previously
+ # but with different target
+ snapshot = loader.last_snapshot()
+ assert len(snapshot.branches) == 2
+ assert branch_artifact1_name in snapshot.branches
+ branch_target_second_visit = snapshot.branches[branch_artifact1_name].target
+
+ assert branch_target_first_visit != branch_target_second_visit
diff --git a/swh/loader/package/archive/tests/test_tasks.py b/swh/loader/package/archive/tests/test_tasks.py
--- a/swh/loader/package/archive/tests/test_tasks.py
+++ b/swh/loader/package/archive/tests/test_tasks.py
@@ -19,3 +19,20 @@
assert res.successful()
assert mock_load.called
assert res.result == {"status": "eventful"}
+
+
+def test_tasks_archive_loader_snapshot_append(
+ mocker, swh_scheduler_celery_app, swh_scheduler_celery_worker, swh_config
+):
+ mock_load = mocker.patch("swh.loader.package.archive.loader.ArchiveLoader.load")
+ mock_load.return_value = {"status": "eventful"}
+
+ res = swh_scheduler_celery_app.send_task(
+ "swh.loader.package.archive.tasks.LoadArchive",
+ kwargs=dict(url="https://gnu.org/", artifacts=[], snapshot_append=True),
+ )
+ assert res
+ res.wait()
+ assert res.successful()
+ assert mock_load.called
+ assert res.result == {"status": "eventful"}

File Metadata

Mime Type
text/plain
Expires
Dec 21 2024, 5:53 PM (11 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3221905

Event Timeline