diff --git a/swh/vault/tests/test_to_disk.py b/swh/vault/tests/test_to_disk.py --- a/swh/vault/tests/test_to_disk.py +++ b/swh/vault/tests/test_to_disk.py @@ -70,16 +70,16 @@ "target": content.sha1_git, }, { - "status": None, + "status": "blah", "target": b"c" * 20, }, ] - with pytest.raises(AssertionError, match="unexpected status None"): + with pytest.raises(AssertionError, match="unexpected status 'blah'"): list(get_filtered_files_content(swh_storage, files_data)) -def test_directory_builder(swh_storage, tmp_path): +def _fill_storage(swh_storage, exclude_cnt3=False): cnt1 = Content.from_data(b"foo bar") cnt2 = Content.from_data(b"bar baz") cnt3 = Content.from_data(b"baz qux") @@ -115,9 +115,18 @@ ), ) ) - swh_storage.content_add([cnt1, cnt2, cnt3]) + if exclude_cnt3: + swh_storage.content_add([cnt1, cnt2]) + else: + swh_storage.content_add([cnt1, cnt2, cnt3]) swh_storage.directory_add([dir1, dir2]) + return dir2 + + +def test_directory_builder(swh_storage, tmp_path): + dir2 = _fill_storage(swh_storage) + root = tmp_path / "root" builder = DirectoryBuilder(swh_storage, bytes(root), dir2.id) @@ -132,3 +141,22 @@ root / "subdirectory" / "content2", root / "content3", } + + assert (root / "subdirectory" / "content1").open().read() == "foo bar" + assert (root / "subdirectory" / "content2").open().read() == "bar baz" + assert (root / "content3").open().read() == "baz qux" + + +def test_directory_builder_missing_content(swh_storage, tmp_path): + dir2 = _fill_storage(swh_storage, exclude_cnt3=True) + + root = tmp_path / "root" + builder = DirectoryBuilder(swh_storage, bytes(root), dir2.id) + + assert not root.exists() + + builder.build() + + assert root.is_dir() + + assert "This content is missing" in (root / "content3").open().read() diff --git a/swh/vault/to_disk.py b/swh/vault/to_disk.py --- a/swh/vault/to_disk.py +++ b/swh/vault/to_disk.py @@ -13,6 +13,11 @@ from swh.storage.algos.dir_iterators import dir_iterator from swh.storage.interface import StorageInterface +MISSING_MESSAGE = ( + b"This content is missing from the Software Heritage archive " + b"(or from the mirror used while retrieving it)." +) + SKIPPED_MESSAGE = ( b"This content has not been retrieved in the " b"Software Heritage archive due to its size." @@ -42,17 +47,19 @@ """ for file_data in files_data: status = file_data["status"] - if status == "absent": - content = SKIPPED_MESSAGE - elif status == "hidden": - content = HIDDEN_MESSAGE - elif status == "visible": + if status == "visible": sha1 = file_data["sha1"] data = storage.content_get_data(sha1) if data is None: content = SKIPPED_MESSAGE else: content = data + elif status == "absent": + content = SKIPPED_MESSAGE + elif status == "hidden": + content = HIDDEN_MESSAGE + elif status is None: + content = MISSING_MESSAGE else: assert False, ( f"unexpected status {status!r} "