Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/loader.py
Show All 10 Lines | |||||
import os | import os | ||||
import string | import string | ||||
import sys | import sys | ||||
import tempfile | import tempfile | ||||
from typing import ( | from typing import ( | ||||
Any, | Any, | ||||
Dict, | Dict, | ||||
Generic, | Generic, | ||||
Iterable, | |||||
Iterator, | Iterator, | ||||
List, | List, | ||||
Mapping, | Mapping, | ||||
Optional, | Optional, | ||||
Sequence, | Sequence, | ||||
Set, | Set, | ||||
Tuple, | Tuple, | ||||
TypeVar, | TypeVar, | ||||
▲ Show 20 Lines • Show All 704 Lines • ▼ Show 20 Lines | def load(self) -> Dict: | ||||
errors.append(f"{error}: {e}") | errors.append(f"{error}: {e}") | ||||
sentry_sdk.capture_exception(e) | sentry_sdk.capture_exception(e) | ||||
status_visit = "failed" | status_visit = "failed" | ||||
status_load = "failed" | status_load = "failed" | ||||
if snapshot: | if snapshot: | ||||
try: | try: | ||||
metadata_objects = self.build_extrinsic_snapshot_metadata(snapshot.id) | metadata_objects = self.build_extrinsic_snapshot_metadata(snapshot.id) | ||||
self._load_metadata_objects(metadata_objects) | self.load_metadata_objects(metadata_objects) | ||||
except Exception as e: | except Exception as e: | ||||
error = ( | error = ( | ||||
f"Failed to load extrinsic snapshot metadata for {self.origin.url}" | f"Failed to load extrinsic snapshot metadata for {self.origin.url}" | ||||
) | ) | ||||
logger.exception(error) | logger.exception(error) | ||||
errors.append(f"{error}: {e}") | errors.append(f"{error}: {e}") | ||||
sentry_sdk.capture_exception(e) | sentry_sdk.capture_exception(e) | ||||
status_visit = "partial" | status_visit = "partial" | ||||
status_load = "failed" | status_load = "failed" | ||||
try: | try: | ||||
metadata_objects = self.build_extrinsic_origin_metadata() | metadata_objects = self.build_extrinsic_origin_metadata() | ||||
self._load_metadata_objects(metadata_objects) | self.load_metadata_objects(metadata_objects) | ||||
except Exception as e: | except Exception as e: | ||||
error = f"Failed to load extrinsic origin metadata for {self.origin.url}" | error = f"Failed to load extrinsic origin metadata for {self.origin.url}" | ||||
logger.exception(error) | logger.exception(error) | ||||
errors.append(f"{error}: {e}") | errors.append(f"{error}: {e}") | ||||
sentry_sdk.capture_exception(e) | sentry_sdk.capture_exception(e) | ||||
status_visit = "partial" | status_visit = "partial" | ||||
status_load = "failed" | status_load = "failed" | ||||
▲ Show 20 Lines • Show All 72 Lines • ▼ Show 20 Lines | ) -> Optional[Tuple[Sha1Git, Sha1Git]]: | ||||
discovery_date=self.visit_date, | discovery_date=self.visit_date, | ||||
authority=SWH_METADATA_AUTHORITY, | authority=SWH_METADATA_AUTHORITY, | ||||
fetcher=self.get_metadata_fetcher(), | fetcher=self.get_metadata_fetcher(), | ||||
format="original-artifacts-json", | format="original-artifacts-json", | ||||
metadata=json.dumps(metadata).encode(), | metadata=json.dumps(metadata).encode(), | ||||
origin=self.origin.url, | origin=self.origin.url, | ||||
release=release.swhid(), | release=release.swhid(), | ||||
) | ) | ||||
self._load_metadata_objects([original_artifact_metadata]) | self.load_metadata_objects([original_artifact_metadata]) | ||||
logger.debug("Release: %s", release) | logger.debug("Release: %s", release) | ||||
self.storage.release_add([release]) | self.storage.release_add([release]) | ||||
assert directory.hash | assert directory.hash | ||||
return (release.id, directory.hash) | return (release.id, directory.hash) | ||||
def _load_snapshot( | def _load_snapshot( | ||||
▲ Show 20 Lines • Show All 52 Lines • ▼ Show 20 Lines | def get_loader_version(self) -> str: | ||||
module_name_parts = module_name.split(".") | module_name_parts = module_name.split(".") | ||||
# Iterate rootward through the package hierarchy until we find a parent of this | # Iterate rootward through the package hierarchy until we find a parent of this | ||||
# loader's module with a __version__ attribute. | # loader's module with a __version__ attribute. | ||||
for prefix_size in range(len(module_name_parts), 0, -1): | for prefix_size in range(len(module_name_parts), 0, -1): | ||||
package_name = ".".join(module_name_parts[0:prefix_size]) | package_name = ".".join(module_name_parts[0:prefix_size]) | ||||
module = sys.modules[package_name] | module = sys.modules[package_name] | ||||
if hasattr(module, "__version__"): | if hasattr(module, "__version__"): | ||||
return module.__version__ # type: ignore | return module.__version__ | ||||
# If this loader's class has no parent package with a __version__, | # If this loader's class has no parent package with a __version__, | ||||
# it should implement it itself. | # it should implement it itself. | ||||
raise NotImplementedError( | raise NotImplementedError( | ||||
f"Could not dynamically find the version of {self.get_loader_name()}." | f"Could not dynamically find the version of {self.get_loader_name()}." | ||||
) | ) | ||||
def get_metadata_fetcher(self) -> MetadataFetcher: | def get_metadata_fetcher(self) -> MetadataFetcher: | ||||
▲ Show 20 Lines • Show All 121 Lines • ▼ Show 20 Lines | def _load_extrinsic_directory_metadata( | ||||
self, | self, | ||||
p_info: TPackageInfo, | p_info: TPackageInfo, | ||||
release_id: Sha1Git, | release_id: Sha1Git, | ||||
directory_id: Sha1Git, | directory_id: Sha1Git, | ||||
) -> None: | ) -> None: | ||||
metadata_objects = self.build_extrinsic_directory_metadata( | metadata_objects = self.build_extrinsic_directory_metadata( | ||||
p_info, release_id, directory_id | p_info, release_id, directory_id | ||||
) | ) | ||||
self._load_metadata_objects(metadata_objects) | self.load_metadata_objects(metadata_objects) | ||||
def _load_metadata_objects( | |||||
self, metadata_objects: List[RawExtrinsicMetadata] | |||||
) -> None: | |||||
if not metadata_objects: | |||||
# If this package loader doesn't write metadata, no need to require | |||||
# an implementation for get_metadata_authority. | |||||
return | |||||
self._create_authorities(mo.authority for mo in metadata_objects) | |||||
self._create_fetchers(mo.fetcher for mo in metadata_objects) | |||||
self.storage.raw_extrinsic_metadata_add(metadata_objects) | |||||
def _create_authorities(self, authorities: Iterable[MetadataAuthority]) -> None: | |||||
deduplicated_authorities = { | |||||
(authority.type, authority.url): authority for authority in authorities | |||||
} | |||||
if authorities: | |||||
self.storage.metadata_authority_add(list(deduplicated_authorities.values())) | |||||
def _create_fetchers(self, fetchers: Iterable[MetadataFetcher]) -> None: | |||||
deduplicated_fetchers = { | |||||
(fetcher.name, fetcher.version): fetcher for fetcher in fetchers | |||||
} | |||||
if fetchers: | |||||
self.storage.metadata_fetcher_add(list(deduplicated_fetchers.values())) | |||||
def _load_extids(self, extids: Set[ExtID]) -> None: | def _load_extids(self, extids: Set[ExtID]) -> None: | ||||
if not extids: | if not extids: | ||||
return | return | ||||
try: | try: | ||||
self.storage.extid_add(list(extids)) | self.storage.extid_add(list(extids)) | ||||
except Exception as e: | except Exception as e: | ||||
logger.exception("Failed to load new ExtIDs for %s", self.origin.url) | logger.exception("Failed to load new ExtIDs for %s", self.origin.url) | ||||
Show All 19 Lines |