diff --git a/swh/loader/package/deposit/loader.py b/swh/loader/package/deposit/loader.py --- a/swh/loader/package/deposit/loader.py +++ b/swh/loader/package/deposit/loader.py @@ -253,14 +253,18 @@ # Then usual loading return super().load() - def finalize_visit(self, status_visit: str, **kwargs) -> Dict[str, Any]: + def finalize_visit( + self, status_visit: str, errors: Optional[List[str]] = None, **kwargs + ) -> Dict[str, Any]: r = super().finalize_visit(status_visit=status_visit, **kwargs) success = status_visit == "full" # Update deposit status try: if not success: - self.client.status_update(self.deposit_id, status="failed") + self.client.status_update( + self.deposit_id, status="failed", errors=errors, + ) return r snapshot_id = hash_to_bytes(r["snapshot_id"]) @@ -358,6 +362,7 @@ self, deposit_id: Union[int, str], status: str, + errors: Optional[List[str]] = None, revision_id: Optional[str] = None, directory_id: Optional[str] = None, snapshot_id: Optional[str] = None, @@ -368,7 +373,7 @@ """ url = f"{self.base_url}/{deposit_id}/update/" - payload = {"status": status} + payload: Dict[str, Any] = {"status": status} if revision_id: payload["revision_id"] = revision_id if directory_id: @@ -377,5 +382,7 @@ payload["snapshot_id"] = snapshot_id if origin_url: payload["origin_url"] = origin_url + if errors: + payload["status_detail"] = {"loading": errors} self.do("put", url, json=payload) diff --git a/swh/loader/package/deposit/tests/test_deposit.py b/swh/loader/package/deposit/tests/test_deposit.py --- a/swh/loader/package/deposit/tests/test_deposit.py +++ b/swh/loader/package/deposit/tests/test_deposit.py @@ -152,6 +152,12 @@ body = update_query.json() expected_body = { "status": "failed", + "status_detail": { + "loading": [ + "Failed to load branch HEAD for some-url-2: Fail to query " + "'https://deposit.softwareheritage.org/1/private/666/raw/'. Reason: 404" + ] + }, } assert body == expected_body diff --git a/swh/loader/package/loader.py b/swh/loader/package/loader.py --- a/swh/loader/package/loader.py +++ b/swh/loader/package/loader.py @@ -406,6 +406,7 @@ status_visit: str, status_load: str, failed_branches: List[str], + errors: Optional[List[str]] = None, ) -> Dict[str, Any]: """Finalize the visit: @@ -529,6 +530,7 @@ failed_branches=failed_branches, status_visit="failed", status_load="failed", + errors=[str(e)], ) load_exceptions: List[Exception] = [] @@ -536,21 +538,23 @@ # Get the list of all version names try: versions = self.get_versions() - except NotFound: + except NotFound as e: return self.finalize_visit( snapshot=snapshot, visit=visit, failed_branches=failed_branches, status_visit="not_found", status_load="failed", + errors=[str(e)], ) - except Exception: + except Exception as e: return self.finalize_visit( snapshot=snapshot, visit=visit, failed_branches=failed_branches, status_visit="failed", status_load="failed", + errors=[str(e)], ) # Get the metadata of each version's package @@ -576,6 +580,7 @@ tmp_revisions: Dict[str, List[Tuple[str, Sha1Git]]] = { version: [] for version in versions } + errors = [] for (version, branch_name, p_info) in packages_info: logger.debug("package_info: %s", p_info) @@ -601,10 +606,10 @@ self.storage.clear_buffers() load_exceptions.append(e) sentry_sdk.capture_exception(e) - logger.exception( - "Failed loading branch %s for %s", branch_name, self.url - ) + error = f"Failed to load branch {branch_name} for {self.url}" + logger.exception(error) failed_branches.append(branch_name) + errors.append(f"{error}: {e}") continue if revision_id is None: @@ -633,6 +638,7 @@ failed_branches=failed_branches, status_visit="failed", status_load="failed", + errors=errors, ) try: @@ -648,7 +654,9 @@ ) self.storage.flush() except Exception as e: - logger.exception("Failed to build snapshot for origin %s", self.url) + error = f"Failed to build snapshot for origin {self.url}" + logger.exception(error) + errors.append(f"{error}: {e}") sentry_sdk.capture_exception(e) status_visit = "failed" status_load = "failed" @@ -658,9 +666,9 @@ metadata_objects = self.build_extrinsic_snapshot_metadata(snapshot.id) self._load_metadata_objects(metadata_objects) except Exception as e: - logger.exception( - "Failed to load extrinsic snapshot metadata for %s", self.url - ) + error = f"Failed to load extrinsic snapshot metadata for {self.url}" + logger.exception(error) + errors.append(f"{error}: {e}") sentry_sdk.capture_exception(e) status_visit = "partial" status_load = "failed" @@ -669,9 +677,9 @@ metadata_objects = self.build_extrinsic_origin_metadata() self._load_metadata_objects(metadata_objects) except Exception as e: - logger.exception( - "Failed to load extrinsic origin metadata for %s", self.url - ) + error = f"Failed to load extrinsic origin metadata for {self.url}" + logger.exception(error) + errors.append(f"{error}: {e}") sentry_sdk.capture_exception(e) status_visit = "partial" status_load = "failed" @@ -684,6 +692,7 @@ failed_branches=failed_branches, status_visit=status_visit, status_load=status_load, + errors=errors, ) def _load_directory(