diff --git a/swh/deposit/api/private/deposit_check.py b/swh/deposit/api/private/deposit_check.py --- a/swh/deposit/api/private/deposit_check.py +++ b/swh/deposit/api/private/deposit_check.py @@ -54,7 +54,7 @@ class APIChecks(APIPrivateView, APIGet, DepositReadMixin): - """Dedicated class to read a deposit's raw archives content. + """Dedicated class to trigger the deposit checks on deposit archives and metadata. Only GET is supported. @@ -67,7 +67,7 @@ The deposit to check archives for Returns - tuple (status, error_detail): True, None if all archives + tuple (status, error_or_warning_details): True, None if all archives are ok, (False, ) otherwise. """ @@ -133,8 +133,14 @@ def process_get( self, req: Request, collection_name: str, deposit: Deposit ) -> Tuple[int, Dict, str]: - """Build a unique tarball from the multiple received and stream that - content to the client. + """Trigger the checks on the deposit archives and then on the deposit metadata. If any + problems or warnings are raises, the deposit status is updated accordingly + (verified or rejected) with details. + + Note that if all checks are ok, the deposit status is updated to the verified + status and a loading task is scheduled for the deposit to be ingested. + Otherwise, the deposit is marked as rejected with the problems' details. A json + response is returned to the caller with the deposit details. Args: req: Client request @@ -142,45 +148,48 @@ deposit: Deposit concerned by the reading Returns: - Tuple status, stream of content, content-type + Tuple (status, json response, content-type) """ metadata, _ = self._metadata_get(deposit) - problems: Dict = {} + problems_or_warnings: Dict = {} # will check each deposit's associated request (both of type # archive and metadata) for errors - archives_status, error_detail = self._check_deposit_archives(deposit) - if not archives_status: - assert error_detail is not None - problems.update(error_detail) - - metadata_status, error_detail = check_metadata(metadata) - if not metadata_status: - assert error_detail is not None - problems.update(error_detail) - - deposit_status = archives_status and metadata_status - - # if any problems arose, the deposit is rejected - if not deposit_status: - deposit.status = DEPOSIT_STATUS_REJECTED - deposit.status_detail = problems - response = { - "status": deposit.status, - "details": deposit.status_detail, - } - else: - deposit.status = DEPOSIT_STATUS_VERIFIED - response = { - "status": deposit.status, - } - if not deposit.load_task_id and self.config["checks"]: - url = deposit.origin_url - task = create_oneshot_task_dict( - "load-deposit", url=url, deposit_id=deposit.id, retries_left=3 - ) - load_task_id = self.scheduler.create_tasks([task])[0]["id"] - deposit.load_task_id = load_task_id + archives_status_ok, error_or_warning_details = self._check_deposit_archives( + deposit + ) + if not archives_status_ok: + assert error_or_warning_details is not None + problems_or_warnings.update(error_or_warning_details) + + metadata_status_ok, error_or_warning_details = check_metadata(metadata) + # Ensure in case of error, we do have the rejection details + assert metadata_status_ok or ( + not metadata_status_ok and error_or_warning_details is not None + ) + # we can have warnings even if checks are ok (e.g. missing suggested field) + problems_or_warnings.update(error_or_warning_details or {}) + + deposit_status_ok = archives_status_ok and metadata_status_ok + # if any problems_or_warnings arose, the deposit is rejected + deposit.status = ( + DEPOSIT_STATUS_VERIFIED if deposit_status_ok else DEPOSIT_STATUS_REJECTED + ) + response: Dict = { + "status": deposit.status, + } + if problems_or_warnings: + deposit.status_detail = problems_or_warnings + response["details"] = problems_or_warnings + + # Deposit ok, then we schedule the deposit loading task (if not already done) + if deposit_status_ok and not deposit.load_task_id and self.config["checks"]: + url = deposit.origin_url + task = create_oneshot_task_dict( + "load-deposit", url=url, deposit_id=deposit.id, retries_left=3 + ) + load_task_id = self.scheduler.create_tasks([task])[0]["id"] + deposit.load_task_id = load_task_id deposit.save() diff --git a/swh/deposit/tests/api/test_deposit_private_check.py b/swh/deposit/tests/api/test_deposit_private_check.py --- a/swh/deposit/tests/api/test_deposit_private_check.py +++ b/swh/deposit/tests/api/test_deposit_private_check.py @@ -59,6 +59,14 @@ deposit = Deposit.objects.get(pk=deposit.id) assert deposit.status == DEPOSIT_STATUS_VERIFIED + # Deposit is ok but it's missing suggested fields in its metadata detected by + # the checks + status_detail = deposit.status_detail["metadata"] + assert len(status_detail) == 1 + suggested = status_detail[0] + assert suggested["summary"] == SUGGESTED_FIELDS_MISSING + assert set(suggested["fields"]) == set([METADATA_PROVENANCE_KEY]) + deposit.status = DEPOSIT_STATUS_DEPOSITED deposit.save()