diff --git a/requirements-swh.txt b/requirements-swh.txt --- a/requirements-swh.txt +++ b/requirements-swh.txt @@ -1,4 +1,4 @@ -swh.storage >= 0.1 +swh.storage >= 0.5.0 swh.model >= 0.3 swh.scheduler >= 0.0.39 swh.loader.core >= 0.2 diff --git a/swh/loader/svn/loader.py b/swh/loader/svn/loader.py --- a/swh/loader/svn/loader.py +++ b/swh/loader/svn/loader.py @@ -124,6 +124,7 @@ self.swh_revision = swh_revision self.max_content_length = self.config["max_content_size"] self.snapshot = None + self.last_visit = None def pre_cleanup(self): """Cleanup potential dangling files from prior runs (e.g. OOM killed @@ -166,13 +167,14 @@ self.svnrepo.clean_fs(local_dirname) return h - def swh_latest_snapshot_revision(self, origin_url, previous_swh_revision=None): + def swh_latest_snapshot_revision( + self, origin_url: str, previous_swh_revision: Optional[bytes] = None + ): """Look for latest snapshot revision and returns it if any. Args: - origin_url (str): Origin identifier - previous_swh_revision: (optional) id of a possible - previous swh revision + origin_url: Origin identifier + previous_swh_revision: id of a possible previous swh revision Returns: dict: The latest known point in time. Dict with keys: @@ -180,14 +182,20 @@ 'revision': latest visited revision 'snapshot': latest snapshot - If None is found, return an empty dict. + If nothing matching criteria is found, return an empty dict. """ storage = self.storage if not previous_swh_revision: # check latest snapshot's revision - visit = storage.origin_visit_get_latest(origin_url, require_snapshot=True) - if visit: - latest_snap = snapshot_get_all_branches(storage, visit["snapshot"]) + if self.last_visit is None: + return {} + visit_id = self.last_visit.visit + assert visit_id is not None + visit_status = storage.origin_visit_status_get_latest( + origin_url, visit_id, require_snapshot=True + ) + if visit_status: + latest_snap = snapshot_get_all_branches(storage, visit_status.snapshot) if latest_snap: branches = latest_snap.get("branches") if not branches: @@ -452,6 +460,7 @@ def prepare_origin_visit(self, *args, **kwargs): self.origin = Origin(url=self.origin_url if self.origin_url else self.svn_url) + self.last_visit = self.storage.origin_visit_get_latest(self.origin_url) def prepare(self, *args, **kwargs): if self.swh_revision: diff --git a/swh/loader/svn/tests/test_loader.py b/swh/loader/svn/tests/test_loader.py --- a/swh/loader/svn/tests/test_loader.py +++ b/swh/loader/svn/tests/test_loader.py @@ -6,6 +6,8 @@ import copy import os +from typing import Optional + from swh.loader.core.tests import BaseLoaderTest from swh.loader.svn.loader import ( DEFAULT_BRANCH, @@ -14,7 +16,47 @@ build_swh_snapshot, ) from swh.model import hashutil -from swh.model.model import Origin, Snapshot +from swh.model.model import Origin, OriginVisitStatus, Snapshot + + +def assert_last_visit_ok( + storage, + url: str, + status: str, + type: Optional[str] = None, + snapshot: Optional[bytes] = None, +) -> OriginVisitStatus: + """Ensure a given visit/visit-status is expectedly with type and status. + + This returns the last visit_status for that given origin + + Args: + url: Origin url + status: expected status + type: expected_type + + Raises: + AssertionError in case visit or visit status is not found + + Returns: + the visit status for further check during the remaining part of the test. + + """ + visit = storage.origin_visit_get_latest(url) + assert visit is not None, f"Visit should exist for origin {url}" + if type: + assert visit["type"] == type + + visit_id = visit["visit"] + visit_status = storage.origin_visit_status_get_latest(url, visit_id) + assert ( + visit_status is not None + ), f"Visit status should exist for origin {url}, visit {visit_id}" + assert visit_status.status == status + if snapshot is not None: + assert visit_status.snapshot == snapshot + + return visit_status def test_build_swh_snapshot(): @@ -205,9 +247,13 @@ self.assertCountSnapshots(1) self.assertEqual(self.loader.visit_status(), "full") - visit = self.storage.origin_visit_get_latest(self.repo_url) - self.assertEqual(visit["snapshot"], GOURMET_SNAPSHOT) - self.assertEqual(visit["status"], "full") + assert_last_visit_ok( + self.storage, + self.repo_url, + status="full", + type="svn", + snapshot=GOURMET_SNAPSHOT, + ) _LAST_SNP_REV = { @@ -253,9 +299,14 @@ self.assertCountReleases(0) self.assertCountSnapshots(1) self.assertEqual(self.loader.visit_status(), "full") - visit = self.storage.origin_visit_get_latest(self.repo_url) - self.assertEqual(visit["snapshot"], GOURMET_FLAG_SNAPSHOT) - self.assertEqual(visit["status"], "full") + + assert_last_visit_ok( + self.storage, + self.repo_url, + status="full", + type="svn", + snapshot=GOURMET_FLAG_SNAPSHOT, + ) class SvnLoaderTest3(BaseSvnLoaderTest): @@ -296,9 +347,10 @@ self.assertCountSnapshots(0) self.assertEqual(self.loader.visit_status(), "partial") - visit = self.storage.origin_visit_get_latest(self.repo_url) - self.assertEqual(visit["snapshot"], None) - self.assertEqual(visit["status"], "partial") + visit_status = assert_last_visit_ok( + self.storage, self.repo_url, status="partial", type="svn" + ) + assert visit_status.snapshot is None class SvnLoaderTest4(BaseSvnLoaderTest): @@ -344,9 +396,13 @@ self.assertCountSnapshots(1) self.assertEqual(self.loader.visit_status(), "full") - visit = self.storage.origin_visit_get_latest(self.repo_url) - self.assertEqual(visit["snapshot"], GOURMET_UPDATES_SNAPSHOT) - self.assertEqual(visit["status"], "full") + assert_last_visit_ok( + self.storage, + self.repo_url, + status="full", + type="svn", + snapshot=GOURMET_UPDATES_SNAPSHOT, + ) class SvnLoaderTest5(BaseSvnLoaderTest): @@ -365,11 +421,14 @@ start_from_scratch=True, ) - def test_load(self): + def test_load_twice(self): """Load an existing repository from scratch yields same swh objects """ - # when + # do not clean up repository at the end of the load (so we can run twice the + # ingestion) + self.loader.debug = True + assert self.loader.load() == {"status": "eventful"} # then @@ -398,9 +457,25 @@ self.assertCountSnapshots(1) self.assertEqual(self.loader.visit_status(), "full") - visit = self.storage.origin_visit_get_latest(self.repo_url) - self.assertEqual(visit["snapshot"], GOURMET_UPDATES_SNAPSHOT) - self.assertEqual(visit["status"], "full") + assert_last_visit_ok( + self.storage, + self.repo_url, + status="full", + type="svn", + snapshot=GOURMET_UPDATES_SNAPSHOT, + ) + + self.loader.debug = False # clean now + + assert self.loader.load() == {"status": "eventful"} + + assert_last_visit_ok( + self.storage, + self.repo_url, + status="full", + type="svn", + snapshot=GOURMET_UPDATES_SNAPSHOT, + ) class SvnLoaderTest6(BaseSvnLoaderTest): @@ -469,9 +544,13 @@ self.assertCountSnapshots(1) self.assertEqual(self.loader.visit_status(), "full") - visit = self.storage.origin_visit_get_latest(self.repo_url) - self.assertEqual(visit["snapshot"], GOURMET_UPDATES_SNAPSHOT) - self.assertEqual(visit["status"], "full") + assert_last_visit_ok( + self.storage, + self.repo_url, + status="full", + type="svn", + snapshot=GOURMET_UPDATES_SNAPSHOT, + ) class SvnLoaderTest7(BaseSvnLoaderTest): @@ -535,9 +614,13 @@ self.assertCountSnapshots(1) self.assertEqual(self.loader.visit_status(), "full") - visit = self.storage.origin_visit_get_latest(self.repo_url) - self.assertEqual(visit["snapshot"], GOURMET_UPDATES_SNAPSHOT) - self.assertEqual(visit["status"], "full") + assert_last_visit_ok( + self.storage, + self.repo_url, + status="full", + type="svn", + snapshot=GOURMET_UPDATES_SNAPSHOT, + ) class SvnLoaderTest8(BaseSvnLoaderTest): @@ -622,9 +705,13 @@ self.assertCountSnapshots(1) self.assertEqual(self.loader.visit_status(), "full") - visit = self.storage.origin_visit_get_latest(self.repo_url) - self.assertEqual(visit["snapshot"], GOURMET_UPDATES_SNAPSHOT) - self.assertEqual(visit["status"], "full") + assert_last_visit_ok( + self.storage, + self.repo_url, + status="full", + type="svn", + snapshot=GOURMET_UPDATES_SNAPSHOT, + ) class SvnLoaderTest9(BaseSvnLoaderTest): @@ -655,9 +742,13 @@ self.assertCountSnapshots(1) self.assertEqual(self.loader.visit_status(), "full") - visit = self.storage.origin_visit_get_latest(self.repo_url) - self.assertEqual(visit["snapshot"], MEDIAWIKI_SNAPSHOT) - self.assertEqual(visit["status"], "full") + assert_last_visit_ok( + self.storage, + self.repo_url, + status="full", + type="svn", + snapshot=MEDIAWIKI_SNAPSHOT, + ) class SvnLoaderTest10(BaseSvnLoaderTest): # noqa @@ -689,9 +780,13 @@ self.assertCountSnapshots(1) self.assertEqual(self.loader.visit_status(), "full") - visit = self.storage.origin_visit_get_latest(self.repo_url) - self.assertEqual(visit["snapshot"], PYANG_SNAPSHOT) - self.assertEqual(visit["status"], "full") + assert_last_visit_ok( + self.storage, + self.repo_url, + status="full", + type="svn", + snapshot=PYANG_SNAPSHOT, + ) class SvnLoaderTest11(BaseSvnLoaderTest): @@ -753,9 +848,13 @@ self.assertCountSnapshots(1) self.assertEqual(self.loader.visit_status(), "partial") - visit = self.storage.origin_visit_get_latest(self.repo_url) - self.assertEqual(visit["snapshot"], GOURMET_EXTERNALS_SNAPSHOT) - self.assertEqual(visit["status"], "partial") + assert_last_visit_ok( + self.storage, + self.repo_url, + status="partial", + type="svn", + snapshot=GOURMET_EXTERNALS_SNAPSHOT, + ) class SvnLoaderTest12(BaseSvnLoaderTest): @@ -813,9 +912,13 @@ self.assertCountSnapshots(1) self.assertEqual(self.loader.visit_status(), "full") - visit = self.storage.origin_visit_get_latest(self.repo_url) - self.assertEqual(visit["snapshot"], GOURMET_EDGE_CASES_SNAPSHOT) - self.assertEqual(visit["status"], "full") + assert_last_visit_ok( + self.storage, + self.repo_url, + status="full", + type="svn", + snapshot=GOURMET_EDGE_CASES_SNAPSHOT, + ) class SvnLoaderTest13(BaseSvnLoaderTest): @@ -870,9 +973,13 @@ self.assertCountSnapshots(1) self.assertEqual(self.loader.visit_status(), "full") - visit = self.storage.origin_visit_get_latest(self.repo_url) - self.assertEqual(visit["snapshot"], GOURMET_WRONG_LINKS_SNAPSHOT) - self.assertEqual(visit["status"], "full") + assert_last_visit_ok( + self.storage, + self.repo_url, + status="full", + type="svn", + snapshot=GOURMET_WRONG_LINKS_SNAPSHOT, + ) class SvnLoaderTestFromRemoteDump(SvnLoaderTest, SvnLoaderFromRemoteDump): @@ -908,11 +1015,21 @@ base_storage_stat = base_loader.storage.stat_counters() self.assertEqual(dump_storage_stat, base_storage_stat) - visit = dump_loader.storage.origin_visit_get_latest(self.repo_url) - self.assertEqual(visit["snapshot"], GOURMET_SNAPSHOT) + assert_last_visit_ok( + self.storage, + self.repo_url, + status="full", + type="svn", + snapshot=GOURMET_SNAPSHOT, + ) - visit = base_loader.storage.origin_visit_get_latest(self.repo_url) - self.assertEqual(visit["snapshot"], GOURMET_SNAPSHOT) + assert_last_visit_ok( + base_loader.storage, + self.repo_url, + status="full", + type="svn", + snapshot=GOURMET_SNAPSHOT, + ) class SvnLoaderTest14(BaseSvnLoaderTest): @@ -958,8 +1075,10 @@ self.assertEqual(self.loader.visit_status(), "full") - visit = self.storage.origin_visit_get_latest(self.repo_url) - self.assertEqual( - visit["snapshot"], hashutil.hash_to_bytes(expected_snapshot_id) + assert_last_visit_ok( + self.storage, + self.repo_url, + status="full", + type="svn", + snapshot=hashutil.hash_to_bytes(expected_snapshot_id), ) - self.assertEqual(visit["status"], "full")