Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/loader.py
Show All 25 Lines | from typing import ( | ||||
TypeVar, | TypeVar, | ||||
) | ) | ||||
import attr | import attr | ||||
from requests.exceptions import ContentDecodingError | from requests.exceptions import ContentDecodingError | ||||
import sentry_sdk | import sentry_sdk | ||||
from swh.core.tarball import uncompress | from swh.core.tarball import uncompress | ||||
from swh.loader.core import discovery | |||||
from swh.loader.core.loader import BaseLoader | from swh.loader.core.loader import BaseLoader | ||||
from swh.loader.exception import NotFound | from swh.loader.exception import NotFound | ||||
from swh.loader.package.utils import download | from swh.loader.package.utils import download | ||||
from swh.model import from_disk | from swh.model import from_disk | ||||
from swh.model.hashutil import hash_to_hex | from swh.model.hashutil import hash_to_hex | ||||
from swh.model.model import ( | from swh.model.model import ( | ||||
ExtID, | ExtID, | ||||
MetadataAuthority, | MetadataAuthority, | ||||
▲ Show 20 Lines • Show All 770 Lines • ▼ Show 20 Lines | ) -> Tuple[str, from_disk.Directory]: | ||||
directory = from_disk.Directory.from_disk( | directory = from_disk.Directory.from_disk( | ||||
path=uncompressed_path.encode("utf-8"), | path=uncompressed_path.encode("utf-8"), | ||||
max_content_length=self.max_content_size, | max_content_length=self.max_content_size, | ||||
) | ) | ||||
contents, skipped_contents, directories = from_disk.iter_directory(directory) | contents, skipped_contents, directories = from_disk.iter_directory(directory) | ||||
# Instead of sending everything from the bottom up to the storage, | |||||
# use a Merkle graph discovery algorithm to filter out known objects. | |||||
contents, skipped_contents, directories = discovery.filter_known_objects( | |||||
self.storage, contents, skipped_contents, directories | |||||
) | |||||
logger.debug("Number of skipped contents: %s", len(skipped_contents)) | logger.debug("Number of skipped contents: %s", len(skipped_contents)) | ||||
self.storage.skipped_content_add(skipped_contents) | self.storage.skipped_content_add(skipped_contents) | ||||
logger.debug("Number of contents: %s", len(contents)) | logger.debug("Number of contents: %s", len(contents)) | ||||
self.storage.content_add(contents) | self.storage.content_add(contents) | ||||
logger.debug("Number of directories: %s", len(directories)) | logger.debug("Number of directories: %s", len(directories)) | ||||
self.storage.directory_add(directories) | self.storage.directory_add(directories) | ||||
▲ Show 20 Lines • Show All 284 Lines • Show Last 20 Lines |