Changeset View
Changeset View
Standalone View
Standalone View
swh/vault/to_disk.py
# Copyright (C) 2016-2020 The Software Heritage developers | # Copyright (C) 2016-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import collections | import collections | ||||
import functools | import functools | ||||
import os | import os | ||||
from typing import Any, Dict, Iterator, List | from typing import Any, Dict, Iterator, List | ||||
from swh.model import hashutil | from swh.model import hashutil | ||||
from swh.model.from_disk import DentryPerms, mode_to_perms | from swh.model.from_disk import DentryPerms, mode_to_perms | ||||
from swh.storage.algos.dir_iterators import dir_iterator | from swh.storage.algos.dir_iterators import dir_iterator | ||||
from swh.storage.interface import StorageInterface | from swh.storage.interface import StorageInterface | ||||
MISSING_MESSAGE = ( | |||||
b"This content is missing from the Software Heritage archive " | |||||
b"(or from the mirror used while retrieving it)." | |||||
) | |||||
SKIPPED_MESSAGE = ( | SKIPPED_MESSAGE = ( | ||||
b"This content has not been retrieved in the " | b"This content has not been retrieved in the " | ||||
b"Software Heritage archive due to its size." | b"Software Heritage archive due to its size." | ||||
) | ) | ||||
HIDDEN_MESSAGE = b"This content is hidden." | HIDDEN_MESSAGE = b"This content is hidden." | ||||
Show All 13 Lines | Yields: | ||||
The contents can be replaced by a specific message to indicate that | The contents can be replaced by a specific message to indicate that | ||||
they could not be retrieved (either due to privacy policy or because | they could not be retrieved (either due to privacy policy or because | ||||
their sizes were too big for us to archive it). | their sizes were too big for us to archive it). | ||||
""" | """ | ||||
for file_data in files_data: | for file_data in files_data: | ||||
status = file_data["status"] | status = file_data["status"] | ||||
if status == "absent": | if status == "visible": | ||||
content = SKIPPED_MESSAGE | |||||
elif status == "hidden": | |||||
content = HIDDEN_MESSAGE | |||||
elif status == "visible": | |||||
sha1 = file_data["sha1"] | sha1 = file_data["sha1"] | ||||
data = storage.content_get_data(sha1) | data = storage.content_get_data(sha1) | ||||
if data is None: | if data is None: | ||||
content = SKIPPED_MESSAGE | content = SKIPPED_MESSAGE | ||||
else: | else: | ||||
content = data | content = data | ||||
elif status == "absent": | |||||
content = SKIPPED_MESSAGE | |||||
elif status == "hidden": | |||||
content = HIDDEN_MESSAGE | |||||
elif status is None: | |||||
content = MISSING_MESSAGE | |||||
else: | else: | ||||
assert False, ( | assert False, ( | ||||
f"unexpected status {status!r} " | f"unexpected status {status!r} " | ||||
f"for content {hashutil.hash_to_hex(file_data['target'])}" | f"for content {hashutil.hash_to_hex(file_data['target'])}" | ||||
) | ) | ||||
yield {"content": content, **file_data} | yield {"content": content, **file_data} | ||||
▲ Show 20 Lines • Show All 76 Lines • Show Last 20 Lines |