diff --git a/swh/loader/tests/__init__.py b/swh/loader/tests/__init__.py --- a/swh/loader/tests/__init__.py +++ b/swh/loader/tests/__init__.py @@ -160,17 +160,17 @@ assert expected_snapshot == actual_snaphot - branches_by_target_type = defaultdict(list) + objects_by_target_type = defaultdict(list) object_to_branch = {} for branch, target in actual_snaphot.branches.items(): if (target.target_type, branch) in allowed_empty: # safe for those elements to not be checked for existence continue - branches_by_target_type[target.target_type].append(target.target) + objects_by_target_type[target.target_type].append(target.target) object_to_branch[target.target] = branch # check that alias references target something that exists, otherwise raise - aliases: List[bytes] = branches_by_target_type.get(TargetType.ALIAS, []) + aliases: List[bytes] = objects_by_target_type.get(TargetType.ALIAS, []) for alias in aliases: if alias not in actual_snaphot.branches: raise InconsistentAliasBranchError( @@ -178,7 +178,7 @@ f"should be in {list(actual_snaphot.branches)}" ) - revs = branches_by_target_type.get(TargetType.REVISION) + revs = objects_by_target_type.get(TargetType.REVISION) if revs: revisions = list(storage.revision_get(revs)) not_found = [rev_id for rev_id, rev in zip(revs, revisions) if rev is None] @@ -189,11 +189,14 @@ raise InexistentObjectsError( f"Branch/Revision(s) {missing_objs} should exist in storage" ) + # retrieve information from revision + for rev in revisions: + objects_by_target_type[TargetType.DIRECTORY].append(rev["directory"]) + object_to_branch[rev["directory"]] = rev["id"] - rels = branches_by_target_type.get(TargetType.RELEASE) + rels = objects_by_target_type.get(TargetType.RELEASE) if rels: - releases = list(storage.release_get(rels)) - not_found = [rel_id for rel_id, rel in zip(rels, releases) if rel is None] + not_found = list(storage.release_missing(rels)) if not_found: missing_objs = ", ".join( str((object_to_branch[rel], rel.hex())) for rel in not_found @@ -202,6 +205,18 @@ f"Branch/Release(s) {missing_objs} should exist in storage" ) + dirs = objects_by_target_type.get(TargetType.DIRECTORY) + if dirs: + not_found = list(storage.directory_missing(dirs)) + if not_found: + missing_objs = ", ".join( + str((object_to_branch[dir_].hex(), dir_.hex())) for dir_ in not_found + ) + raise InexistentObjectsError( + f"Missing directories {missing_objs}: " + "(revision exists, directory target does not)" + ) + # for retro compat, returned the dict, remove when clients are migrated return snapshot_dict diff --git a/swh/loader/tests/test_init.py b/swh/loader/tests/test_init.py --- a/swh/loader/tests/test_init.py +++ b/swh/loader/tests/test_init.py @@ -361,11 +361,13 @@ 2. snapshot id is not correct, it's not found in the storage 3. snapshot reference an alias which does not exist 4. snapshot is found in storage, targeted revision does not exist - 5. snapshot is found in storage, targeted release does not exist + 5. snapshot is found in storage, targeted revision exists but the directory the + revision targets does not exist + 6. snapshot is found in storage, targeted release does not exist The following are not dealt with yet: - 6. snapshot is found in storage, targeted directory does not exist - 7. snapshot is found in storage, targeted content does not exist + 7. snapshot is found in storage, nested targeted directories does not exist + 8. snapshot is found in storage, nested targeted contents does not exist """ snap_id_hex = "2498dbf535f882bc7f9a18fb16c9ad27fda7bab7" @@ -422,6 +424,11 @@ with pytest.raises(InconsistentAliasBranchError, match="Alias branch HEAD"): check_snapshot(snapshot0, swh_storage) + # 4. snapshot exists, revision exists but referenced an unknown directory + + not_yet = list(swh_storage.directory_missing([DIRECTORY.id])) + assert len(not_yet) == 1 + # 4. snapshot is found in storage, targeted revision does not exist snapshot1 = Snapshot( id=hash_to_bytes("456666f535f882bc7f9a18fb16c9ad27fda7bab7"), @@ -433,13 +440,42 @@ }, ) + not_yet = list(swh_storage.directory_missing([DIRECTORY.id])) + assert len(not_yet) == 1 + swh_storage.snapshot_add([snapshot1]) with pytest.raises(InexistentObjectsError, match="Branch/Revision"): check_snapshot(snapshot1, swh_storage) + # 5. snapshot is found in storage, targeted revision exists but the directory the + # revision targets does not exist + + not_found = list(swh_storage.directory_missing([REVISION.directory])) + assert len(not_found) == 1 + swh_storage.revision_add([REVISION.to_dict()]) + snapshot2 = Snapshot( + id=hash_to_bytes("987123f535f882bc7f9a18fb16c9ad27fda7bab7"), + branches={ + b"alias": SnapshotBranch(target=b"HEAD", target_type=TargetType.ALIAS,), + b"HEAD": SnapshotBranch( + target=REVISION.id, target_type=TargetType.REVISION, + ), + }, + ) + + swh_storage.snapshot_add([snapshot2.to_dict()]) + with pytest.raises(InexistentObjectsError, match="Missing directories"): + check_snapshot(snapshot2, swh_storage) + + assert DIRECTORY.id == REVISION.directory + not_found = list(swh_storage.directory_add([DIRECTORY])) + + # 6. snapshot is found in storage, targeted release does not exist + + snapshot3 = Snapshot( id=hash_to_bytes("789666f535f882bc7f9a18fb16c9ad27fda7bab7"), branches={ b"alias": SnapshotBranch(target=b"HEAD", target_type=TargetType.ALIAS,), @@ -452,7 +488,7 @@ }, ) - swh_storage.snapshot_add([snapshot2]) + swh_storage.snapshot_add([snapshot3]) with pytest.raises(InexistentObjectsError, match="Branch/Release"): - check_snapshot(snapshot2, swh_storage) + check_snapshot(snapshot3, swh_storage)