Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/loader.py
Show All 20 Lines | from typing import ( | ||||
List, | List, | ||||
Mapping, | Mapping, | ||||
Optional, | Optional, | ||||
Sequence, | Sequence, | ||||
Set, | Set, | ||||
Tuple, | Tuple, | ||||
TypeVar, | TypeVar, | ||||
) | ) | ||||
import warnings | |||||
import attr | import attr | ||||
from requests.exceptions import ContentDecodingError | from requests.exceptions import ContentDecodingError | ||||
import sentry_sdk | import sentry_sdk | ||||
from swh.core.tarball import uncompress | from swh.core.tarball import uncompress | ||||
from swh.loader.core.loader import BaseLoader | from swh.loader.core.loader import BaseLoader | ||||
from swh.loader.exception import NotFound | from swh.loader.exception import NotFound | ||||
from swh.loader.package.utils import download | from swh.loader.package.utils import download | ||||
from swh.model import from_disk | from swh.model import from_disk | ||||
from swh.model.hashutil import hash_to_hex | from swh.model.hashutil import hash_to_hex | ||||
from swh.model.model import ( | from swh.model.model import ( | ||||
ExtID, | ExtID, | ||||
MetadataAuthority, | MetadataAuthority, | ||||
MetadataAuthorityType, | MetadataAuthorityType, | ||||
MetadataFetcher, | MetadataFetcher, | ||||
) | |||||
from swh.model.model import ( | |||||
Origin, | Origin, | ||||
OriginVisit, | OriginVisit, | ||||
OriginVisitStatus, | OriginVisitStatus, | ||||
RawExtrinsicMetadata, | RawExtrinsicMetadata, | ||||
Release, | |||||
Revision, | Revision, | ||||
Sha1Git, | Sha1Git, | ||||
Snapshot, | Snapshot, | ||||
) | ) | ||||
from swh.model.model import ObjectType as ModelObjectType | |||||
from swh.model.swhids import CoreSWHID, ExtendedObjectType, ExtendedSWHID, ObjectType | from swh.model.swhids import CoreSWHID, ExtendedObjectType, ExtendedSWHID, ObjectType | ||||
from swh.storage.algos.snapshot import snapshot_get_latest | from swh.storage.algos.snapshot import snapshot_get_latest | ||||
from swh.storage.interface import StorageInterface | from swh.storage.interface import StorageInterface | ||||
from swh.storage.utils import now | from swh.storage.utils import now | ||||
logger = logging.getLogger(__name__) | logger = logging.getLogger(__name__) | ||||
▲ Show 20 Lines • Show All 52 Lines • ▼ Show 20 Lines | class BasePackageInfo: | ||||
# See <https://github.com/python-attrs/attrs/issues/38> | # See <https://github.com/python-attrs/attrs/issues/38> | ||||
directory_extrinsic_metadata = attr.ib( | directory_extrinsic_metadata = attr.ib( | ||||
type=List[RawExtrinsicMetadataCore], default=[], kw_only=True, | type=List[RawExtrinsicMetadataCore], default=[], kw_only=True, | ||||
) | ) | ||||
""":term:`extrinsic metadata` collected by the loader, that will be attached to the | """:term:`extrinsic metadata` collected by the loader, that will be attached to the | ||||
loaded directory and added to the Metadata storage.""" | loaded directory and added to the Metadata storage.""" | ||||
# TODO: add support for metadata for revisions and contents | # TODO: add support for metadata for releases and contents | ||||
def extid(self) -> Optional[PartialExtID]: | def extid(self) -> Optional[PartialExtID]: | ||||
"""Returns a unique intrinsic identifier of this package info, | """Returns a unique intrinsic identifier of this package info, | ||||
or None if this package info is not 'deduplicatable' (meaning that | or None if this package info is not 'deduplicatable' (meaning that | ||||
we will always load it, instead of checking the ExtID storage | we will always load it, instead of checking the ExtID storage | ||||
to see if we already did)""" | to see if we already did)""" | ||||
if self.MANIFEST_FORMAT is None: | if self.MANIFEST_FORMAT is None: | ||||
return None | return None | ||||
▲ Show 20 Lines • Show All 57 Lines • ▼ Show 20 Lines | def get_package_info(self, version: str) -> Iterator[Tuple[str, TPackageInfo]]: | ||||
yield from {} | yield from {} | ||||
def build_revision( | def build_revision( | ||||
self, p_info: TPackageInfo, uncompressed_path: str, directory: Sha1Git | self, p_info: TPackageInfo, uncompressed_path: str, directory: Sha1Git | ||||
) -> Optional[Revision]: | ) -> Optional[Revision]: | ||||
"""Build the revision from the archive metadata (extrinsic | """Build the revision from the archive metadata (extrinsic | ||||
artifact metadata) and the intrinsic metadata. | artifact metadata) and the intrinsic metadata. | ||||
This method is deprecated, :meth:`build_release` should be implemented instead. | |||||
Args: | Args: | ||||
p_info: Package information | p_info: Package information | ||||
uncompressed_path: Artifact uncompressed path on disk | uncompressed_path: Artifact uncompressed path on disk | ||||
""" | |||||
raise NotImplementedError("build_revision") | |||||
Returns: | def build_release( | ||||
Revision object | self, | ||||
version: str, | |||||
p_info: TPackageInfo, | |||||
uncompressed_path: str, | |||||
directory: Sha1Git, | |||||
) -> Optional[Release]: | |||||
"""Build the release from the archive metadata (extrinsic | |||||
artifact metadata) and the intrinsic metadata. | |||||
Args: | |||||
p_info: Package information | |||||
uncompressed_path: Artifact uncompressed path on disk | |||||
""" | """ | ||||
raise NotImplementedError("build_revision") | warnings.warn( | ||||
f"{self.get_loader_name()} is missing a build_release() method. " | |||||
f"Falling back to `build_revision` + automatic conversion to release.", | |||||
DeprecationWarning, | |||||
) | |||||
rev = self.build_revision(p_info, uncompressed_path, directory) | |||||
if rev is None: | |||||
return None | |||||
else: | |||||
return rev2rel(rev, version) | |||||
def get_default_version(self) -> str: | def get_default_version(self) -> str: | ||||
"""Retrieve the latest release version if any. | """Retrieve the latest release version if any. | ||||
Returns: | Returns: | ||||
Latest version | Latest version | ||||
""" | """ | ||||
Show All 28 Lines | ) -> Dict[PartialExtID, List[CoreSWHID]]: | ||||
for (extid_type, extids) in new_extids.items(): | for (extid_type, extids) in new_extids.items(): | ||||
for extid in self.storage.extid_get_from_extid(extid_type, extids): | for extid in self.storage.extid_get_from_extid(extid_type, extids): | ||||
if extid is not None: | if extid is not None: | ||||
key = (extid.extid_type, extid.extid) | key = (extid.extid_type, extid.extid) | ||||
known_extids.setdefault(key, []).append(extid.target) | known_extids.setdefault(key, []).append(extid.target) | ||||
return known_extids | return known_extids | ||||
def resolve_revision_from_extids( | def resolve_object_from_extids( | ||||
self, | self, | ||||
known_extids: Dict[PartialExtID, List[CoreSWHID]], | known_extids: Dict[PartialExtID, List[CoreSWHID]], | ||||
p_info: TPackageInfo, | p_info: TPackageInfo, | ||||
revision_whitelist: Set[Sha1Git], | whitelist: Set[Sha1Git], | ||||
) -> Optional[Sha1Git]: | ) -> Optional[CoreSWHID]: | ||||
"""Resolve the revision from known ExtIDs and a package info object. | """Resolve the revision/release from known ExtIDs and a package info object. | ||||
If the artifact has already been downloaded, this will return the | If the artifact has already been downloaded, this will return the | ||||
existing revision targeting that uncompressed artifact directory. | existing release (or revision) targeting that uncompressed artifact directory. | ||||
Otherwise, this returns None. | Otherwise, this returns None. | ||||
Args: | Args: | ||||
known_extids: Dict built from a list of ExtID, with the target as value | known_extids: Dict built from a list of ExtID, with the target as value | ||||
p_info: Package information | p_info: Package information | ||||
revision_whitelist: Any ExtID with target not in this set is filtered out | whitelist: Any ExtID with target not in this set is filtered out | ||||
Returns: | Returns: | ||||
None or revision identifier | None or release/revision SWHID | ||||
""" | """ | ||||
new_extid = p_info.extid() | new_extid = p_info.extid() | ||||
if new_extid is None: | if new_extid is None: | ||||
return None | return None | ||||
extid_targets = [] | |||||
for extid_target in known_extids.get(new_extid, []): | for extid_target in known_extids.get(new_extid, []): | ||||
if extid_target.object_id not in revision_whitelist: | if extid_target.object_id not in whitelist: | ||||
# There is a known ExtID for this package, but its target is not | # There is a known ExtID for this package, but its target is not | ||||
# in the snapshot. | # in the snapshot. | ||||
# This can happen for three reasons: | # This can happen for three reasons: | ||||
# | # | ||||
# 1. a loader crashed after writing the ExtID, but before writing | # 1. a loader crashed after writing the ExtID, but before writing | ||||
# the snapshot | # the snapshot | ||||
# 2. some other loader loaded the same artifact, but produced | # 2. some other loader loaded the same artifact, but produced | ||||
# a different revision, causing an additional ExtID object | # a different revision, causing an additional ExtID object | ||||
# to be written. We will probably find this loader's ExtID | # to be written. We will probably find this loader's ExtID | ||||
# in a future iteration of this loop. | # in a future iteration of this loop. | ||||
# Note that for now, this is impossible, as each loader has a | # Note that for now, this is impossible, as each loader has a | ||||
# completely different extid_type, but this is an implementation | # completely different extid_type, but this is an implementation | ||||
# detail of each loader. | # detail of each loader. | ||||
# 3. we took a snapshot, then the package disappeared, | # 3. we took a snapshot, then the package disappeared, | ||||
# then we took another snapshot, and the package reappeared | # then we took another snapshot, and the package reappeared | ||||
# | # | ||||
# In case of 1, we must actually load the package now, | # In case of 1, we must actually load the package now, | ||||
# so let's do it. | # so let's do it. | ||||
# TODO: detect when we are in case 3 using revision_missing instead | # TODO: detect when we are in case 3 using release_missing | ||||
# of the snapshot. | # or revision_missing instead of the snapshot. | ||||
continue | continue | ||||
elif extid_target.object_type != ObjectType.REVISION: | elif extid_target.object_type in (ObjectType.RELEASE, ObjectType.REVISION): | ||||
# We only support revisions for now. | extid_targets.append(extid_target) | ||||
else: | |||||
# Note that this case should never be reached unless there is a | # Note that this case should never be reached unless there is a | ||||
# collision between a revision hash and some non-revision object's | # collision between a revision hash and some non-revision object's | ||||
# hash, but better safe than sorry. | # hash, but better safe than sorry. | ||||
logger.warning( | logger.warning( | ||||
"%s is in the revision whitelist, but is not a revision.", | "%s is in the whitelist, but is not a revision/release.", | ||||
hash_to_hex(extid_target.object_type), | hash_to_hex(extid_target.object_type), | ||||
) | ) | ||||
continue | |||||
return extid_target.object_id | |||||
if extid_targets: | |||||
# This is a known package version, as we have an extid to reference it. | |||||
# Let's return one of them. | |||||
# If there is a release extid, return it. | |||||
release_extid_targets = [ | |||||
extid_target | |||||
for extid_target in extid_targets | |||||
if extid_target.object_type == ObjectType.RELEASE | |||||
] | |||||
if release_extid_targets: | |||||
assert len(release_extid_targets) == 1, release_extid_targets | |||||
return release_extid_targets[0] | |||||
# If there is no release extid (ie. if the package was only loaded with | |||||
# older versions of this loader, which produced revision objects instead | |||||
# of releases), return a revision extid. | |||||
assert len(extid_targets) == 1, extid_targets | |||||
assert extid_targets[0].object_type == ObjectType.REVISION, extid_targets | |||||
return extid_targets[0] | |||||
else: | |||||
# No target found (this is probably a new package version) | |||||
return None | return None | ||||
def download_package( | def download_package( | ||||
self, p_info: TPackageInfo, tmpdir: str | self, p_info: TPackageInfo, tmpdir: str | ||||
) -> List[Tuple[str, Mapping]]: | ) -> List[Tuple[str, Mapping]]: | ||||
"""Download artifacts for a specific package. All downloads happen in | """Download artifacts for a specific package. All downloads happen in | ||||
in the tmpdir folder. | in the tmpdir folder. | ||||
Default implementation expects the artifacts package info to be | Default implementation expects the artifacts package info to be | ||||
▲ Show 20 Lines • Show All 220 Lines • ▼ Show 20 Lines | def load(self) -> Dict: | ||||
if last_snapshot is None: | if last_snapshot is None: | ||||
last_snapshot_targets: Set[Sha1Git] = set() | last_snapshot_targets: Set[Sha1Git] = set() | ||||
else: | else: | ||||
last_snapshot_targets = { | last_snapshot_targets = { | ||||
branch.target for branch in last_snapshot.branches.values() | branch.target for branch in last_snapshot.branches.values() | ||||
} | } | ||||
new_extids: Set[ExtID] = set() | new_extids: Set[ExtID] = set() | ||||
tmp_revisions: Dict[str, List[Tuple[str, Sha1Git]]] = { | tmp_releases: Dict[str, List[Tuple[str, Sha1Git]]] = { | ||||
version: [] for version in versions | version: [] for version in versions | ||||
} | } | ||||
errors = [] | errors = [] | ||||
for (version, branch_name, p_info) in packages_info: | for (version, branch_name, p_info) in packages_info: | ||||
logger.debug("package_info: %s", p_info) | logger.debug("package_info: %s", p_info) | ||||
# Check if the package was already loaded, using its ExtID | # Check if the package was already loaded, using its ExtID | ||||
revision_id = self.resolve_revision_from_extids( | swhid = self.resolve_object_from_extids( | ||||
known_extids, p_info, last_snapshot_targets | known_extids, p_info, last_snapshot_targets | ||||
) | ) | ||||
if revision_id is None: | if swhid is not None and swhid.object_type == ObjectType.REVISION: | ||||
# No matching revision found in the last snapshot, load it. | # This package was already loaded, but by an older version | ||||
# of this loader, which produced revisions instead of releases. | |||||
# Let's fetch the revision's data, and "upgrade" it into a release. | |||||
(rev,) = self.storage.revision_get([swhid.object_id]) | |||||
if not rev: | |||||
logger.error( | |||||
"Failed to upgrade branch %s from revision to " | |||||
"release, %s is missing from the storage. " | |||||
"Falling back to re-loading from the origin.", | |||||
branch_name, | |||||
swhid, | |||||
) | |||||
else: | |||||
rev = None | |||||
if swhid is None or (swhid.object_type == ObjectType.REVISION and not rev): | |||||
# No matching revision or release found in the last snapshot, load it. | |||||
release_id = None | |||||
try: | try: | ||||
res = self._load_revision(p_info, origin) | res = self._load_release(version, p_info, origin) | ||||
if res: | if res: | ||||
(revision_id, directory_id) = res | (release_id, directory_id) = res | ||||
assert revision_id | assert release_id | ||||
assert directory_id | assert directory_id | ||||
self._load_extrinsic_directory_metadata( | self._load_extrinsic_directory_metadata( | ||||
p_info, revision_id, directory_id | p_info, release_id, directory_id | ||||
) | ) | ||||
self.storage.flush() | self.storage.flush() | ||||
status_load = "eventful" | status_load = "eventful" | ||||
except Exception as e: | except Exception as e: | ||||
self.storage.clear_buffers() | self.storage.clear_buffers() | ||||
load_exceptions.append(e) | load_exceptions.append(e) | ||||
sentry_sdk.capture_exception(e) | sentry_sdk.capture_exception(e) | ||||
error = f"Failed to load branch {branch_name} for {self.url}" | error = f"Failed to load branch {branch_name} for {self.url}" | ||||
logger.exception(error) | logger.exception(error) | ||||
failed_branches.append(branch_name) | failed_branches.append(branch_name) | ||||
errors.append(f"{error}: {e}") | errors.append(f"{error}: {e}") | ||||
continue | continue | ||||
if revision_id is None: | if release_id is None: | ||||
continue | continue | ||||
add_extid = True | |||||
elif swhid.object_type == ObjectType.REVISION: | |||||
# If 'rev' was None, the previous block would have run. | |||||
assert rev is not None | |||||
rel = rev2rel(rev, version) | |||||
self.storage.release_add([rel]) | |||||
logger.debug("Upgraded %s to %s", swhid, rel.swhid()) | |||||
release_id = rel.id | |||||
# Create a new extid for this package, so the next run of this loader | |||||
# will be able to find the new release, and use it (instead of the | |||||
# old revision) | |||||
add_extid = True | |||||
elif swhid.object_type == ObjectType.RELEASE: | |||||
# This package was already loaded, nothing to do. | |||||
release_id = swhid.object_id | |||||
add_extid = False | |||||
else: | |||||
assert False, f"Unexpected object type: {swhid}" | |||||
assert release_id is not None | |||||
if add_extid: | |||||
partial_extid = p_info.extid() | partial_extid = p_info.extid() | ||||
if partial_extid is not None: | if partial_extid is not None: | ||||
(extid_type, extid) = partial_extid | (extid_type, extid) = partial_extid | ||||
revision_swhid = CoreSWHID( | release_swhid = CoreSWHID( | ||||
object_type=ObjectType.REVISION, object_id=revision_id | object_type=ObjectType.RELEASE, object_id=release_id | ||||
) | ) | ||||
new_extids.add( | new_extids.add( | ||||
ExtID(extid_type=extid_type, extid=extid, target=revision_swhid) | ExtID(extid_type=extid_type, extid=extid, target=release_swhid) | ||||
) | ) | ||||
tmp_revisions[version].append((branch_name, revision_id)) | tmp_releases[version].append((branch_name, release_id)) | ||||
if load_exceptions: | if load_exceptions: | ||||
status_visit = "partial" | status_visit = "partial" | ||||
if not tmp_revisions: | if not tmp_releases: | ||||
# We could not load any revisions; fail completely | # We could not load any releases; fail completely | ||||
return self.finalize_visit( | return self.finalize_visit( | ||||
snapshot=snapshot, | snapshot=snapshot, | ||||
visit=visit, | visit=visit, | ||||
failed_branches=failed_branches, | failed_branches=failed_branches, | ||||
status_visit="failed", | status_visit="failed", | ||||
status_load="failed", | status_load="failed", | ||||
errors=errors, | errors=errors, | ||||
) | ) | ||||
try: | try: | ||||
# Retrieve the default release version (the "latest" one) | # Retrieve the default release version (the "latest" one) | ||||
default_version = self.get_default_version() | default_version = self.get_default_version() | ||||
logger.debug("default version: %s", default_version) | logger.debug("default version: %s", default_version) | ||||
# Retrieve extra branches | # Retrieve extra branches | ||||
extra_branches = self.extra_branches() | extra_branches = self.extra_branches() | ||||
logger.debug("extra branches: %s", extra_branches) | logger.debug("extra branches: %s", extra_branches) | ||||
snapshot = self._load_snapshot( | snapshot = self._load_snapshot( | ||||
default_version, tmp_revisions, extra_branches | default_version, tmp_releases, extra_branches | ||||
) | ) | ||||
self.storage.flush() | self.storage.flush() | ||||
except Exception as e: | except Exception as e: | ||||
error = f"Failed to build snapshot for origin {self.url}" | error = f"Failed to build snapshot for origin {self.url}" | ||||
logger.exception(error) | logger.exception(error) | ||||
errors.append(f"{error}: {e}") | errors.append(f"{error}: {e}") | ||||
sentry_sdk.capture_exception(e) | sentry_sdk.capture_exception(e) | ||||
status_visit = "failed" | status_visit = "failed" | ||||
▲ Show 20 Lines • Show All 51 Lines • ▼ Show 20 Lines | ) -> Tuple[str, from_disk.Directory]: | ||||
logger.debug("Number of contents: %s", len(contents)) | logger.debug("Number of contents: %s", len(contents)) | ||||
self.storage.content_add(contents) | self.storage.content_add(contents) | ||||
logger.debug("Number of directories: %s", len(directories)) | logger.debug("Number of directories: %s", len(directories)) | ||||
self.storage.directory_add(directories) | self.storage.directory_add(directories) | ||||
return (uncompressed_path, directory) | return (uncompressed_path, directory) | ||||
def _load_revision( | def _load_release( | ||||
self, p_info: TPackageInfo, origin | self, version: str, p_info: TPackageInfo, origin | ||||
) -> Optional[Tuple[Sha1Git, Sha1Git]]: | ) -> Optional[Tuple[Sha1Git, Sha1Git]]: | ||||
"""Does all the loading of a revision itself: | """Does all the loading of a release itself: | ||||
* downloads a package and uncompresses it | * downloads a package and uncompresses it | ||||
* loads it from disk | * loads it from disk | ||||
* adds contents, directories, and revision to self.storage | * adds contents, directories, and release to self.storage | ||||
* returns (revision_id, directory_id) | * returns (release_id, directory_id) | ||||
Raises | Raises | ||||
exception when unable to download or uncompress artifacts | exception when unable to download or uncompress artifacts | ||||
""" | """ | ||||
with tempfile.TemporaryDirectory() as tmpdir: | with tempfile.TemporaryDirectory() as tmpdir: | ||||
dl_artifacts = self.download_package(p_info, tmpdir) | dl_artifacts = self.download_package(p_info, tmpdir) | ||||
(uncompressed_path, directory) = self._load_directory(dl_artifacts, tmpdir) | (uncompressed_path, directory) = self._load_directory(dl_artifacts, tmpdir) | ||||
# FIXME: This should be release. cf. D409 | # FIXME: This should be release. cf. D409 | ||||
revision = self.build_revision( | release = self.build_release( | ||||
p_info, uncompressed_path, directory=directory.hash | version, p_info, uncompressed_path, directory=directory.hash | ||||
) | ) | ||||
if not revision: | if not release: | ||||
# Some artifacts are missing intrinsic metadata | # Some artifacts are missing intrinsic metadata | ||||
# skipping those | # skipping those | ||||
return None | return None | ||||
metadata = [metadata for (filepath, metadata) in dl_artifacts] | metadata = [metadata for (filepath, metadata) in dl_artifacts] | ||||
assert release.target is not None, release | |||||
assert release.target_type == ModelObjectType.DIRECTORY, release | |||||
metadata_target = ExtendedSWHID( | |||||
object_type=ExtendedObjectType.DIRECTORY, object_id=release.target | |||||
) | |||||
original_artifact_metadata = RawExtrinsicMetadata( | original_artifact_metadata = RawExtrinsicMetadata( | ||||
target=ExtendedSWHID( | target=metadata_target, | ||||
object_type=ExtendedObjectType.DIRECTORY, object_id=revision.directory | |||||
), | |||||
discovery_date=self.visit_date, | discovery_date=self.visit_date, | ||||
authority=SWH_METADATA_AUTHORITY, | authority=SWH_METADATA_AUTHORITY, | ||||
fetcher=self.get_metadata_fetcher(), | fetcher=self.get_metadata_fetcher(), | ||||
format="original-artifacts-json", | format="original-artifacts-json", | ||||
metadata=json.dumps(metadata).encode(), | metadata=json.dumps(metadata).encode(), | ||||
origin=self.url, | origin=self.url, | ||||
revision=CoreSWHID(object_type=ObjectType.REVISION, object_id=revision.id), | release=release.swhid(), | ||||
) | ) | ||||
self._load_metadata_objects([original_artifact_metadata]) | self._load_metadata_objects([original_artifact_metadata]) | ||||
logger.debug("Revision: %s", revision) | logger.debug("Release: %s", release) | ||||
self.storage.revision_add([revision]) | self.storage.release_add([release]) | ||||
assert directory.hash | assert directory.hash | ||||
return (revision.id, directory.hash) | return (release.id, directory.hash) | ||||
def _load_snapshot( | def _load_snapshot( | ||||
self, | self, | ||||
default_version: str, | default_version: str, | ||||
revisions: Dict[str, List[Tuple[str, bytes]]], | releases: Dict[str, List[Tuple[str, bytes]]], | ||||
extra_branches: Dict[bytes, Mapping[str, Any]], | extra_branches: Dict[bytes, Mapping[str, Any]], | ||||
) -> Optional[Snapshot]: | ) -> Optional[Snapshot]: | ||||
"""Build snapshot out of the current revisions stored and extra branches. | """Build snapshot out of the current releases stored and extra branches. | ||||
Then load it in the storage. | Then load it in the storage. | ||||
""" | """ | ||||
logger.debug("revisions: %s", revisions) | logger.debug("releases: %s", releases) | ||||
# Build and load the snapshot | # Build and load the snapshot | ||||
branches = {} # type: Dict[bytes, Mapping[str, Any]] | branches = {} # type: Dict[bytes, Mapping[str, Any]] | ||||
for version, branch_name_revisions in revisions.items(): | for version, branch_name_releases in releases.items(): | ||||
if version == default_version and len(branch_name_revisions) == 1: | if version == default_version and len(branch_name_releases) == 1: | ||||
# only 1 branch (no ambiguity), we can create an alias | # only 1 branch (no ambiguity), we can create an alias | ||||
# branch 'HEAD' | # branch 'HEAD' | ||||
branch_name, _ = branch_name_revisions[0] | branch_name, _ = branch_name_releases[0] | ||||
# except for some corner case (deposit) | # except for some corner case (deposit) | ||||
if branch_name != "HEAD": | if branch_name != "HEAD": | ||||
branches[b"HEAD"] = { | branches[b"HEAD"] = { | ||||
"target_type": "alias", | "target_type": "alias", | ||||
"target": branch_name.encode("utf-8"), | "target": branch_name.encode("utf-8"), | ||||
} | } | ||||
for branch_name, target in branch_name_revisions: | for branch_name, target in branch_name_releases: | ||||
branches[branch_name.encode("utf-8")] = { | branches[branch_name.encode("utf-8")] = { | ||||
"target_type": "revision", | "target_type": "release", | ||||
ardumont: or sthg? | |||||
Done Inline Actionslater; this needs a few more changes. vlorentz: later; this needs a few more changes. | |||||
Not Done Inline Actionssure ;) ardumont: sure ;) | |||||
"target": target, | "target": target, | ||||
} | } | ||||
# Deal with extra-branches | # Deal with extra-branches | ||||
for name, branch_target in extra_branches.items(): | for name, branch_target in extra_branches.items(): | ||||
if name in branches: | if name in branches: | ||||
logger.error("Extra branch '%s' has been ignored", name) | logger.error("Extra branch '%s' has been ignored", name) | ||||
else: | else: | ||||
▲ Show 20 Lines • Show All 109 Lines • ▼ Show 20 Lines | ) -> List[RawExtrinsicMetadata]: | ||||
metadata=item.metadata, | metadata=item.metadata, | ||||
origin=self.url, | origin=self.url, | ||||
) | ) | ||||
) | ) | ||||
return metadata_objects | return metadata_objects | ||||
def build_extrinsic_directory_metadata( | def build_extrinsic_directory_metadata( | ||||
self, p_info: TPackageInfo, revision_id: Sha1Git, directory_id: Sha1Git, | self, p_info: TPackageInfo, release_id: Sha1Git, directory_id: Sha1Git, | ||||
) -> List[RawExtrinsicMetadata]: | ) -> List[RawExtrinsicMetadata]: | ||||
if not p_info.directory_extrinsic_metadata: | if not p_info.directory_extrinsic_metadata: | ||||
# If this package loader doesn't write metadata, no need to require | # If this package loader doesn't write metadata, no need to require | ||||
# an implementation for get_metadata_authority. | # an implementation for get_metadata_authority. | ||||
return [] | return [] | ||||
authority = self.get_metadata_authority() | authority = self.get_metadata_authority() | ||||
fetcher = self.get_metadata_fetcher() | fetcher = self.get_metadata_fetcher() | ||||
metadata_objects = [] | metadata_objects = [] | ||||
for item in p_info.directory_extrinsic_metadata: | for item in p_info.directory_extrinsic_metadata: | ||||
metadata_objects.append( | metadata_objects.append( | ||||
RawExtrinsicMetadata( | RawExtrinsicMetadata( | ||||
target=ExtendedSWHID( | target=ExtendedSWHID( | ||||
object_type=ExtendedObjectType.DIRECTORY, object_id=directory_id | object_type=ExtendedObjectType.DIRECTORY, object_id=directory_id | ||||
), | ), | ||||
discovery_date=item.discovery_date or self.visit_date, | discovery_date=item.discovery_date or self.visit_date, | ||||
authority=authority, | authority=authority, | ||||
fetcher=fetcher, | fetcher=fetcher, | ||||
format=item.format, | format=item.format, | ||||
metadata=item.metadata, | metadata=item.metadata, | ||||
origin=self.url, | origin=self.url, | ||||
revision=CoreSWHID( | release=CoreSWHID( | ||||
object_type=ObjectType.REVISION, object_id=revision_id | object_type=ObjectType.RELEASE, object_id=release_id | ||||
), | ), | ||||
) | ) | ||||
) | ) | ||||
return metadata_objects | return metadata_objects | ||||
def _load_extrinsic_directory_metadata( | def _load_extrinsic_directory_metadata( | ||||
self, p_info: TPackageInfo, revision_id: Sha1Git, directory_id: Sha1Git, | self, p_info: TPackageInfo, release_id: Sha1Git, directory_id: Sha1Git, | ||||
) -> None: | ) -> None: | ||||
metadata_objects = self.build_extrinsic_directory_metadata( | metadata_objects = self.build_extrinsic_directory_metadata( | ||||
p_info, revision_id, directory_id | p_info, release_id, directory_id | ||||
) | ) | ||||
self._load_metadata_objects(metadata_objects) | self._load_metadata_objects(metadata_objects) | ||||
def _load_metadata_objects( | def _load_metadata_objects( | ||||
self, metadata_objects: List[RawExtrinsicMetadata] | self, metadata_objects: List[RawExtrinsicMetadata] | ||||
) -> None: | ) -> None: | ||||
if not metadata_objects: | if not metadata_objects: | ||||
# If this package loader doesn't write metadata, no need to require | # If this package loader doesn't write metadata, no need to require | ||||
Show All 24 Lines | def _load_extids(self, extids: Set[ExtID]) -> None: | ||||
return | return | ||||
try: | try: | ||||
self.storage.extid_add(list(extids)) | self.storage.extid_add(list(extids)) | ||||
except Exception as e: | except Exception as e: | ||||
logger.exception("Failed to load new ExtIDs for %s", self.url) | logger.exception("Failed to load new ExtIDs for %s", self.url) | ||||
sentry_sdk.capture_exception(e) | sentry_sdk.capture_exception(e) | ||||
# No big deal, it just means the next visit will load the same versions | # No big deal, it just means the next visit will load the same versions | ||||
# again. | # again. | ||||
def rev2rel(rev: Revision, version: str) -> Release: | |||||
"""Converts a revision to a release.""" | |||||
return Release( | |||||
name=version.encode(), | |||||
message=rev.message, | |||||
target=rev.directory, | |||||
target_type=ModelObjectType.DIRECTORY, | |||||
synthetic=rev.synthetic, | |||||
author=rev.author, | |||||
date=rev.date, | |||||
) |
or sthg?