Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/loader.py
Show All 30 Lines | |||||
from requests.exceptions import ContentDecodingError | from requests.exceptions import ContentDecodingError | ||||
import sentry_sdk | import sentry_sdk | ||||
from swh.core.tarball import uncompress | from swh.core.tarball import uncompress | ||||
from swh.loader.core.loader import BaseLoader | from swh.loader.core.loader import BaseLoader | ||||
from swh.loader.exception import NotFound | from swh.loader.exception import NotFound | ||||
from swh.loader.package.utils import download | from swh.loader.package.utils import download | ||||
from swh.model import from_disk | from swh.model import from_disk | ||||
from swh.model.collections import ImmutableDict | |||||
from swh.model.hashutil import hash_to_hex | from swh.model.hashutil import hash_to_hex | ||||
from swh.model.model import ( | from swh.model.model import ( | ||||
ExtID, | ExtID, | ||||
MetadataAuthority, | MetadataAuthority, | ||||
MetadataAuthorityType, | MetadataAuthorityType, | ||||
MetadataFetcher, | MetadataFetcher, | ||||
Origin, | Origin, | ||||
OriginVisit, | OriginVisit, | ||||
OriginVisitStatus, | OriginVisitStatus, | ||||
RawExtrinsicMetadata, | RawExtrinsicMetadata, | ||||
Revision, | Revision, | ||||
Sha1Git, | Sha1Git, | ||||
Snapshot, | Snapshot, | ||||
TargetType, | |||||
) | ) | ||||
from swh.model.swhids import CoreSWHID, ExtendedObjectType, ExtendedSWHID, ObjectType | from swh.model.swhids import CoreSWHID, ExtendedObjectType, ExtendedSWHID, ObjectType | ||||
from swh.storage.algos.snapshot import snapshot_get_latest | from swh.storage.algos.snapshot import snapshot_get_latest | ||||
from swh.storage.interface import StorageInterface | from swh.storage.interface import StorageInterface | ||||
from swh.storage.utils import now | from swh.storage.utils import now | ||||
logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||
▲ Show 20 Lines • Show All 152 Lines • ▼ Show 20 Lines | def get_default_version(self) -> str: | ||||
return "" | return "" | ||||
def last_snapshot(self) -> Optional[Snapshot]: | def last_snapshot(self) -> Optional[Snapshot]: | ||||
"""Retrieve the last snapshot out of the last visit. | """Retrieve the last snapshot out of the last visit. | ||||
""" | """ | ||||
return snapshot_get_latest(self.storage, self.url) | return snapshot_get_latest(self.storage, self.url) | ||||
def known_artifacts( | |||||
self, snapshot: Optional[Snapshot] | |||||
) -> Dict[Sha1Git, Optional[ImmutableDict[str, object]]]: | |||||
"""Retrieve the known releases/artifact for the origin. | |||||
Args | |||||
snapshot: snapshot for the visit | |||||
Returns: | |||||
Dict of keys revision id (bytes), values a metadata Dict. | |||||
""" | |||||
if not snapshot: | |||||
return {} | |||||
# retrieve only revisions (e.g the alias we do not want here) | |||||
revs = [ | |||||
rev.target | |||||
for rev in snapshot.branches.values() | |||||
if rev and rev.target_type == TargetType.REVISION | |||||
] | |||||
known_revisions = self.storage.revision_get(revs) | |||||
return { | |||||
revision.id: revision.metadata for revision in known_revisions if revision | |||||
} | |||||
def new_packageinfo_to_extid(self, p_info: TPackageInfo) -> Optional[PartialExtID]: | def new_packageinfo_to_extid(self, p_info: TPackageInfo) -> Optional[PartialExtID]: | ||||
return p_info.extid() | return p_info.extid() | ||||
def _get_known_extids( | def _get_known_extids( | ||||
self, packages_info: List[TPackageInfo] | self, packages_info: List[TPackageInfo] | ||||
) -> Dict[PartialExtID, List[CoreSWHID]]: | ) -> Dict[PartialExtID, List[CoreSWHID]]: | ||||
"""Compute the ExtIDs from new PackageInfo objects, searches which are already | """Compute the ExtIDs from new PackageInfo objects, searches which are already | ||||
loaded in the archive, and returns them if any.""" | loaded in the archive, and returns them if any.""" | ||||
▲ Show 20 Lines • Show All 253 Lines • ▼ Show 20 Lines | def load(self) -> Dict: | ||||
sentry_sdk.capture_exception(e) | sentry_sdk.capture_exception(e) | ||||
return {"status": "failed"} | return {"status": "failed"} | ||||
# Get the previous snapshot for this origin. It is then used to see which | # Get the previous snapshot for this origin. It is then used to see which | ||||
# of the package's versions are already loaded in the archive. | # of the package's versions are already loaded in the archive. | ||||
try: | try: | ||||
last_snapshot = self.last_snapshot() | last_snapshot = self.last_snapshot() | ||||
logger.debug("last snapshot: %s", last_snapshot) | logger.debug("last snapshot: %s", last_snapshot) | ||||
known_artifacts = self.known_artifacts(last_snapshot) | |||||
logger.debug("known artifacts: %s", known_artifacts) | |||||
except Exception as e: | except Exception as e: | ||||
logger.exception("Failed to get previous state for %s", self.url) | logger.exception("Failed to get previous state for %s", self.url) | ||||
sentry_sdk.capture_exception(e) | sentry_sdk.capture_exception(e) | ||||
return self.finalize_visit( | return self.finalize_visit( | ||||
snapshot=snapshot, | snapshot=snapshot, | ||||
visit=visit, | visit=visit, | ||||
failed_branches=failed_branches, | failed_branches=failed_branches, | ||||
status_visit="failed", | status_visit="failed", | ||||
▲ Show 20 Lines • Show All 469 Lines • Show Last 20 Lines |