Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/loader.py
Show All 38 Lines | |||||
from swh.model.hashutil import hash_to_hex | from swh.model.hashutil import hash_to_hex | ||||
from swh.model.identifiers import ( | from swh.model.identifiers import ( | ||||
CoreSWHID, | CoreSWHID, | ||||
ExtendedObjectType, | ExtendedObjectType, | ||||
ExtendedSWHID, | ExtendedSWHID, | ||||
ObjectType, | ObjectType, | ||||
) | ) | ||||
from swh.model.model import ( | from swh.model.model import ( | ||||
ExtID, | |||||
MetadataAuthority, | MetadataAuthority, | ||||
MetadataAuthorityType, | MetadataAuthorityType, | ||||
MetadataFetcher, | MetadataFetcher, | ||||
Origin, | Origin, | ||||
OriginVisit, | OriginVisit, | ||||
OriginVisitStatus, | OriginVisitStatus, | ||||
RawExtrinsicMetadata, | RawExtrinsicMetadata, | ||||
Revision, | Revision, | ||||
▲ Show 20 Lines • Show All 528 Lines • ▼ Show 20 Lines | def load(self) -> Dict: | ||||
if last_snapshot is None: | if last_snapshot is None: | ||||
last_snapshot_targets: Set[Sha1Git] = set() | last_snapshot_targets: Set[Sha1Git] = set() | ||||
else: | else: | ||||
last_snapshot_targets = { | last_snapshot_targets = { | ||||
branch.target for branch in last_snapshot.branches.values() | branch.target for branch in last_snapshot.branches.values() | ||||
} | } | ||||
new_extids: Set[ExtID] = set() | |||||
tmp_revisions: Dict[str, List[Tuple[str, Sha1Git]]] = { | tmp_revisions: Dict[str, List[Tuple[str, Sha1Git]]] = { | ||||
version: [] for version in versions | version: [] for version in versions | ||||
} | } | ||||
for (version, branch_name, p_info) in packages_info: | for (version, branch_name, p_info) in packages_info: | ||||
logger.debug("package_info: %s", p_info) | logger.debug("package_info: %s", p_info) | ||||
# Check if the package was already loaded, using its ExtID | # Check if the package was already loaded, using its ExtID | ||||
revision_id = self.resolve_revision_from_extids( | revision_id = self.resolve_revision_from_extids( | ||||
Show All 29 Lines | def load(self) -> Dict: | ||||
"Failed loading branch %s for %s", branch_name, self.url | "Failed loading branch %s for %s", branch_name, self.url | ||||
) | ) | ||||
failed_branches.append(branch_name) | failed_branches.append(branch_name) | ||||
continue | continue | ||||
if revision_id is None: | if revision_id is None: | ||||
continue | continue | ||||
partial_extid = p_info.extid() | |||||
if partial_extid is not None: | |||||
(extid_type, extid) = partial_extid | |||||
revision_swhid = CoreSWHID( | |||||
object_type=ObjectType.REVISION, object_id=revision_id | |||||
) | |||||
new_extids.add( | |||||
ExtID(extid_type=extid_type, extid=extid, target=revision_swhid) | |||||
) | |||||
tmp_revisions[version].append((branch_name, revision_id)) | tmp_revisions[version].append((branch_name, revision_id)) | ||||
if load_exceptions: | if load_exceptions: | ||||
status_visit = "partial" | status_visit = "partial" | ||||
if not tmp_revisions: | if not tmp_revisions: | ||||
# We could not load any revisions; fail completely | # We could not load any revisions; fail completely | ||||
return self.finalize_visit( | return self.finalize_visit( | ||||
Show All 40 Lines | def load(self) -> Dict: | ||||
except Exception as e: | except Exception as e: | ||||
logger.exception( | logger.exception( | ||||
"Failed to load extrinsic origin metadata for %s", self.url | "Failed to load extrinsic origin metadata for %s", self.url | ||||
) | ) | ||||
sentry_sdk.capture_exception(e) | sentry_sdk.capture_exception(e) | ||||
status_visit = "partial" | status_visit = "partial" | ||||
status_load = "failed" | status_load = "failed" | ||||
self._load_extids(new_extids) | |||||
return self.finalize_visit( | return self.finalize_visit( | ||||
snapshot=snapshot, | snapshot=snapshot, | ||||
visit=visit, | visit=visit, | ||||
failed_branches=failed_branches, | failed_branches=failed_branches, | ||||
status_visit=status_visit, | status_visit=status_visit, | ||||
status_load=status_load, | status_load=status_load, | ||||
) | ) | ||||
▲ Show 20 Lines • Show All 296 Lines • ▼ Show 20 Lines | def _create_authorities(self, authorities: Iterable[MetadataAuthority]) -> None: | ||||
self.storage.metadata_authority_add(list(deduplicated_authorities.values())) | self.storage.metadata_authority_add(list(deduplicated_authorities.values())) | ||||
def _create_fetchers(self, fetchers: Iterable[MetadataFetcher]) -> None: | def _create_fetchers(self, fetchers: Iterable[MetadataFetcher]) -> None: | ||||
deduplicated_fetchers = { | deduplicated_fetchers = { | ||||
(fetcher.name, fetcher.version): fetcher for fetcher in fetchers | (fetcher.name, fetcher.version): fetcher for fetcher in fetchers | ||||
} | } | ||||
if fetchers: | if fetchers: | ||||
self.storage.metadata_fetcher_add(list(deduplicated_fetchers.values())) | self.storage.metadata_fetcher_add(list(deduplicated_fetchers.values())) | ||||
def _load_extids(self, extids: Set[ExtID]) -> None: | |||||
if not extids: | |||||
return | |||||
try: | |||||
self.storage.extid_add(list(extids)) | |||||
except Exception as e: | |||||
logger.exception("Failed to load new ExtIDs for %s", self.url) | |||||
sentry_sdk.capture_exception(e) | |||||
# No big deal, it just means the next visit will load the same versions | |||||
# again. |