Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/loader.py
Show All 40 Lines | from swh.model.model import ( | ||||
OriginVisitStatus, | OriginVisitStatus, | ||||
MetadataAuthority, | MetadataAuthority, | ||||
MetadataFetcher, | MetadataFetcher, | ||||
MetadataTargetType, | MetadataTargetType, | ||||
RawExtrinsicMetadata, | RawExtrinsicMetadata, | ||||
) | ) | ||||
from swh.model.identifiers import SWHID | from swh.model.identifiers import SWHID | ||||
from swh.storage import get_storage | from swh.storage import get_storage | ||||
from swh.storage.interface import StorageInterface | |||||
from swh.storage.utils import now | from swh.storage.utils import now | ||||
from swh.storage.algos.snapshot import snapshot_get_latest | from swh.storage.algos.snapshot import snapshot_get_latest | ||||
from swh.loader.package.utils import download | from swh.loader.package.utils import download | ||||
logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||
▲ Show 20 Lines • Show All 64 Lines • ▼ Show 20 Lines | def __init__(self, url): | ||||
Args: | Args: | ||||
url (str): Origin url to load data from | url (str): Origin url to load data from | ||||
""" | """ | ||||
# This expects to use the environment variable SWH_CONFIG_FILENAME | # This expects to use the environment variable SWH_CONFIG_FILENAME | ||||
self.config = SWHConfig.parse_config_file() | self.config = SWHConfig.parse_config_file() | ||||
self._check_configuration() | self._check_configuration() | ||||
self.storage = get_storage(**self.config["storage"]) | self.storage: StorageInterface = get_storage(**self.config["storage"]) | ||||
self.url = url | self.url = url | ||||
self.visit_date = datetime.datetime.now(tz=datetime.timezone.utc) | self.visit_date = datetime.datetime.now(tz=datetime.timezone.utc) | ||||
self.max_content_size = self.config["max_content_size"] | self.max_content_size = self.config["max_content_size"] | ||||
def _check_configuration(self): | def _check_configuration(self): | ||||
"""Checks the minimal configuration required is set for the loader. | """Checks the minimal configuration required is set for the loader. | ||||
If some required configuration is missing, exception detailing the | If some required configuration is missing, exception detailing the | ||||
▲ Show 20 Lines • Show All 543 Lines • ▼ Show 20 Lines | ) -> None: | ||||
self.storage.raw_extrinsic_metadata_add(metadata_objects) | self.storage.raw_extrinsic_metadata_add(metadata_objects) | ||||
def _create_authorities(self, authorities: Iterable[MetadataAuthority]) -> None: | def _create_authorities(self, authorities: Iterable[MetadataAuthority]) -> None: | ||||
deduplicated_authorities = { | deduplicated_authorities = { | ||||
(authority.type, authority.url): authority for authority in authorities | (authority.type, authority.url): authority for authority in authorities | ||||
} | } | ||||
if authorities: | if authorities: | ||||
self.storage.metadata_authority_add(deduplicated_authorities.values()) | self.storage.metadata_authority_add(list(deduplicated_authorities.values())) | ||||
def _create_fetchers(self, fetchers: Iterable[MetadataFetcher]) -> None: | def _create_fetchers(self, fetchers: Iterable[MetadataFetcher]) -> None: | ||||
deduplicated_fetchers = { | deduplicated_fetchers = { | ||||
(fetcher.name, fetcher.version): fetcher for fetcher in fetchers | (fetcher.name, fetcher.version): fetcher for fetcher in fetchers | ||||
} | } | ||||
if fetchers: | if fetchers: | ||||
self.storage.metadata_fetcher_add(deduplicated_fetchers.values()) | self.storage.metadata_fetcher_add(list(deduplicated_fetchers.values())) |