Changeset View
Changeset View
Standalone View
Standalone View
swh/loader/package/loader.py
Show First 20 Lines • Show All 98 Lines • ▼ Show 20 Lines | class BasePackageInfo: | ||||
Returns: | Returns: | ||||
The identity for that dict entry | The identity for that dict entry | ||||
""" | """ | ||||
url = attr.ib(type=str) | url = attr.ib(type=str) | ||||
filename = attr.ib(type=Optional[str]) | filename = attr.ib(type=Optional[str]) | ||||
version = attr.ib(type=str) | |||||
"""Version name/number.""" | |||||
MANIFEST_FORMAT: Optional[string.Template] = None | MANIFEST_FORMAT: Optional[string.Template] = None | ||||
"""If not None, used by the default extid() implementation to format a manifest, | """If not None, used by the default extid() implementation to format a manifest, | ||||
before hashing it to produce an ExtID.""" | before hashing it to produce an ExtID.""" | ||||
EXTID_TYPE: str = "package-manifest-sha256" | EXTID_TYPE: str = "package-manifest-sha256" | ||||
# The following attribute has kw_only=True in order to allow subclasses | # The following attribute has kw_only=True in order to allow subclasses | ||||
▲ Show 20 Lines • Show All 71 Lines • ▼ Show 20 Lines | def get_package_info(self, version: str) -> Iterator[Tuple[str, TPackageInfo]]: | ||||
Returns: | Returns: | ||||
(branch name, package metadata) | (branch name, package metadata) | ||||
""" | """ | ||||
yield from {} | yield from {} | ||||
def build_release( | def build_release( | ||||
self, | self, p_info: TPackageInfo, uncompressed_path: str, directory: Sha1Git | ||||
version: str, | |||||
p_info: TPackageInfo, | |||||
uncompressed_path: str, | |||||
directory: Sha1Git, | |||||
) -> Optional[Release]: | ) -> Optional[Release]: | ||||
"""Build the release from the archive metadata (extrinsic | """Build the release from the archive metadata (extrinsic | ||||
artifact metadata) and the intrinsic metadata. | artifact metadata) and the intrinsic metadata. | ||||
Args: | Args: | ||||
p_info: Package information | p_info: Package information | ||||
uncompressed_path: Artifact uncompressed path on disk | uncompressed_path: Artifact uncompressed path on disk | ||||
""" | """ | ||||
▲ Show 20 Lines • Show All 337 Lines • ▼ Show 20 Lines | def load(self) -> Dict: | ||||
visit=visit, | visit=visit, | ||||
failed_branches=failed_branches, | failed_branches=failed_branches, | ||||
status_visit="failed", | status_visit="failed", | ||||
status_load="failed", | status_load="failed", | ||||
errors=[str(e)], | errors=[str(e)], | ||||
) | ) | ||||
# Get the metadata of each version's package | # Get the metadata of each version's package | ||||
packages_info: List[Tuple[str, str, TPackageInfo]] = [ | packages_info: List[Tuple[str, TPackageInfo]] = [ | ||||
(version, branch_name, p_info) | (branch_name, p_info) | ||||
for version in versions | for version in versions | ||||
for (branch_name, p_info) in self.get_package_info(version) | for (branch_name, p_info) in self.get_package_info(version) | ||||
] | ] | ||||
# Compute the ExtID of each of these packages | # Compute the ExtID of each of these packages | ||||
known_extids = self._get_known_extids( | known_extids = self._get_known_extids([p_info for (_, p_info) in packages_info]) | ||||
[p_info for (_, _, p_info) in packages_info] | |||||
) | |||||
if last_snapshot is None: | if last_snapshot is None: | ||||
last_snapshot_targets: Set[Sha1Git] = set() | last_snapshot_targets: Set[Sha1Git] = set() | ||||
else: | else: | ||||
last_snapshot_targets = { | last_snapshot_targets = { | ||||
branch.target for branch in last_snapshot.branches.values() | branch.target for branch in last_snapshot.branches.values() | ||||
} | } | ||||
new_extids: Set[ExtID] = set() | new_extids: Set[ExtID] = set() | ||||
tmp_releases: Dict[str, List[Tuple[str, Sha1Git]]] = { | tmp_releases: Dict[str, List[Tuple[str, Sha1Git]]] = { | ||||
version: [] for version in versions | version: [] for version in versions | ||||
} | } | ||||
errors = [] | errors = [] | ||||
for (version, branch_name, p_info) in packages_info: | for (branch_name, p_info) in packages_info: | ||||
logger.debug("package_info: %s", p_info) | logger.debug("package_info: %s", p_info) | ||||
# Check if the package was already loaded, using its ExtID | # Check if the package was already loaded, using its ExtID | ||||
swhid = self.resolve_object_from_extids( | swhid = self.resolve_object_from_extids( | ||||
known_extids, p_info, last_snapshot_targets | known_extids, p_info, last_snapshot_targets | ||||
) | ) | ||||
if swhid is not None and swhid.object_type == ObjectType.REVISION: | if swhid is not None and swhid.object_type == ObjectType.REVISION: | ||||
Show All 13 Lines | def load(self) -> Dict: | ||||
rev = None | rev = None | ||||
if swhid is None or (swhid.object_type == ObjectType.REVISION and not rev): | if swhid is None or (swhid.object_type == ObjectType.REVISION and not rev): | ||||
# No matching revision or release found in the last snapshot, load it. | # No matching revision or release found in the last snapshot, load it. | ||||
release_id = None | release_id = None | ||||
try: | try: | ||||
res = self._load_release(version, p_info, origin) | res = self._load_release(p_info, origin) | ||||
if res: | if res: | ||||
(release_id, directory_id) = res | (release_id, directory_id) = res | ||||
assert release_id | assert release_id | ||||
assert directory_id | assert directory_id | ||||
self._load_extrinsic_directory_metadata( | self._load_extrinsic_directory_metadata( | ||||
p_info, release_id, directory_id | p_info, release_id, directory_id | ||||
) | ) | ||||
self.storage.flush() | self.storage.flush() | ||||
Show All 10 Lines | def load(self) -> Dict: | ||||
if release_id is None: | if release_id is None: | ||||
continue | continue | ||||
add_extid = True | add_extid = True | ||||
elif swhid.object_type == ObjectType.REVISION: | elif swhid.object_type == ObjectType.REVISION: | ||||
# If 'rev' was None, the previous block would have run. | # If 'rev' was None, the previous block would have run. | ||||
assert rev is not None | assert rev is not None | ||||
rel = rev2rel(rev, version) | rel = rev2rel(rev, p_info.version) | ||||
self.storage.release_add([rel]) | self.storage.release_add([rel]) | ||||
logger.debug("Upgraded %s to %s", swhid, rel.swhid()) | logger.debug("Upgraded %s to %s", swhid, rel.swhid()) | ||||
release_id = rel.id | release_id = rel.id | ||||
# Create a new extid for this package, so the next run of this loader | # Create a new extid for this package, so the next run of this loader | ||||
# will be able to find the new release, and use it (instead of the | # will be able to find the new release, and use it (instead of the | ||||
# old revision) | # old revision) | ||||
add_extid = True | add_extid = True | ||||
Show All 12 Lines | def load(self) -> Dict: | ||||
(extid_type, extid) = partial_extid | (extid_type, extid) = partial_extid | ||||
release_swhid = CoreSWHID( | release_swhid = CoreSWHID( | ||||
object_type=ObjectType.RELEASE, object_id=release_id | object_type=ObjectType.RELEASE, object_id=release_id | ||||
) | ) | ||||
new_extids.add( | new_extids.add( | ||||
ExtID(extid_type=extid_type, extid=extid, target=release_swhid) | ExtID(extid_type=extid_type, extid=extid, target=release_swhid) | ||||
) | ) | ||||
tmp_releases[version].append((branch_name, release_id)) | tmp_releases[p_info.version].append((branch_name, release_id)) | ||||
if load_exceptions: | if load_exceptions: | ||||
status_visit = "partial" | status_visit = "partial" | ||||
if not tmp_releases: | if not tmp_releases: | ||||
# We could not load any releases; fail completely | # We could not load any releases; fail completely | ||||
return self.finalize_visit( | return self.finalize_visit( | ||||
snapshot=snapshot, | snapshot=snapshot, | ||||
▲ Show 20 Lines • Show All 77 Lines • ▼ Show 20 Lines | ) -> Tuple[str, from_disk.Directory]: | ||||
self.storage.content_add(contents) | self.storage.content_add(contents) | ||||
logger.debug("Number of directories: %s", len(directories)) | logger.debug("Number of directories: %s", len(directories)) | ||||
self.storage.directory_add(directories) | self.storage.directory_add(directories) | ||||
return (uncompressed_path, directory) | return (uncompressed_path, directory) | ||||
def _load_release( | def _load_release( | ||||
self, version: str, p_info: TPackageInfo, origin | self, p_info: TPackageInfo, origin | ||||
) -> Optional[Tuple[Sha1Git, Sha1Git]]: | ) -> Optional[Tuple[Sha1Git, Sha1Git]]: | ||||
"""Does all the loading of a release itself: | """Does all the loading of a release itself: | ||||
* downloads a package and uncompresses it | * downloads a package and uncompresses it | ||||
* loads it from disk | * loads it from disk | ||||
* adds contents, directories, and release to self.storage | * adds contents, directories, and release to self.storage | ||||
* returns (release_id, directory_id) | * returns (release_id, directory_id) | ||||
Raises | Raises | ||||
exception when unable to download or uncompress artifacts | exception when unable to download or uncompress artifacts | ||||
""" | """ | ||||
with tempfile.TemporaryDirectory() as tmpdir: | with tempfile.TemporaryDirectory() as tmpdir: | ||||
dl_artifacts = self.download_package(p_info, tmpdir) | dl_artifacts = self.download_package(p_info, tmpdir) | ||||
(uncompressed_path, directory) = self._load_directory(dl_artifacts, tmpdir) | (uncompressed_path, directory) = self._load_directory(dl_artifacts, tmpdir) | ||||
# FIXME: This should be release. cf. D409 | # FIXME: This should be release. cf. D409 | ||||
release = self.build_release( | release = self.build_release( | ||||
version, p_info, uncompressed_path, directory=directory.hash | p_info, uncompressed_path, directory=directory.hash | ||||
) | ) | ||||
print(release) | |||||
if not release: | if not release: | ||||
# Some artifacts are missing intrinsic metadata | # Some artifacts are missing intrinsic metadata | ||||
# skipping those | # skipping those | ||||
return None | return None | ||||
metadata = [metadata for (filepath, metadata) in dl_artifacts] | metadata = [metadata for (filepath, metadata) in dl_artifacts] | ||||
assert release.target is not None, release | assert release.target is not None, release | ||||
▲ Show 20 Lines • Show All 265 Lines • Show Last 20 Lines |