diff --git a/swh/deposit/api/private/deposit_check.py b/swh/deposit/api/private/deposit_check.py --- a/swh/deposit/api/private/deposit_check.py +++ b/swh/deposit/api/private/deposit_check.py @@ -54,7 +54,7 @@ class APIChecks(APIPrivateView, APIGet, DepositReadMixin): - """Dedicated class to read a deposit's raw archives content. + """Dedicated class to trigger the deposit checks on deposit archives and metadata. Only GET is supported. @@ -67,7 +67,7 @@ The deposit to check archives for Returns - tuple (status, error_detail): True, None if all archives + tuple (status, error_or_warning_details): True, None if all archives are ok, (False, ) otherwise. """ @@ -133,8 +133,14 @@ def process_get( self, req: Request, collection_name: str, deposit: Deposit ) -> Tuple[int, Dict, str]: - """Build a unique tarball from the multiple received and stream that - content to the client. + """Trigger the checks on the deposit archives and then on the deposit metadata. If any + problems or warnings are raises, the deposit status is updated accordingly + (verified or rejected) with details. + + Note that if all checks are ok, the deposit status is updated to the verified + status and a loading task is scheduled for the deposit to be ingested. + Otherwise, the deposit is marked as rejected with the problems' details. A json + response is returned to the caller with the deposit details. Args: req: Client request @@ -142,45 +148,48 @@ deposit: Deposit concerned by the reading Returns: - Tuple status, stream of content, content-type + Tuple (status, json response, content-type) """ metadata, _ = self._metadata_get(deposit) - problems: Dict = {} + problems_or_warnings: Dict = {} # will check each deposit's associated request (both of type # archive and metadata) for errors - archives_status, error_detail = self._check_deposit_archives(deposit) - if not archives_status: - assert error_detail is not None - problems.update(error_detail) - - metadata_status, error_detail = check_metadata(metadata) - if not metadata_status: - assert error_detail is not None - problems.update(error_detail) - - deposit_status = archives_status and metadata_status - - # if any problems arose, the deposit is rejected - if not deposit_status: - deposit.status = DEPOSIT_STATUS_REJECTED - deposit.status_detail = problems - response = { - "status": deposit.status, - "details": deposit.status_detail, - } - else: - deposit.status = DEPOSIT_STATUS_VERIFIED - response = { - "status": deposit.status, - } - if not deposit.load_task_id and self.config["checks"]: - url = deposit.origin_url - task = create_oneshot_task_dict( - "load-deposit", url=url, deposit_id=deposit.id, retries_left=3 - ) - load_task_id = self.scheduler.create_tasks([task])[0]["id"] - deposit.load_task_id = load_task_id + archives_status_ok, error_or_warning_details = self._check_deposit_archives( + deposit + ) + if not archives_status_ok: + assert error_or_warning_details is not None + problems_or_warnings.update(error_or_warning_details) + + metadata_status_ok, error_or_warning_details = check_metadata(metadata) + # Ensure in case of error, we do have the rejection details + assert metadata_status_ok or ( + not metadata_status_ok and error_or_warning_details is not None + ) + # we can have warnings even if checks are ok (e.g. missing suggested field) + problems_or_warnings.update(error_or_warning_details or {}) + + deposit_status_ok = archives_status_ok and metadata_status_ok + # if any problems_or_warnings arose, the deposit is rejected + deposit.status = ( + DEPOSIT_STATUS_VERIFIED if deposit_status_ok else DEPOSIT_STATUS_REJECTED + ) + response: Dict = { + "status": deposit.status, + } + if problems_or_warnings: + deposit.status_detail = problems_or_warnings + response["details"] = problems_or_warnings + + # Deposit ok, then we schedule the deposit loading task (if not already done) + if deposit_status_ok and not deposit.load_task_id and self.config["checks"]: + url = deposit.origin_url + task = create_oneshot_task_dict( + "load-deposit", url=url, deposit_id=deposit.id, retries_left=3 + ) + load_task_id = self.scheduler.create_tasks([task])[0]["id"] + deposit.load_task_id = load_task_id deposit.save() diff --git a/swh/deposit/tests/api/test_deposit_private_check.py b/swh/deposit/tests/api/test_deposit_private_check.py --- a/swh/deposit/tests/api/test_deposit_private_check.py +++ b/swh/deposit/tests/api/test_deposit_private_check.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2021 The Software Heritage developers +# Copyright (C) 2017-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -7,7 +7,11 @@ import pytest from rest_framework import status -from swh.deposit.api.checks import MANDATORY_FIELDS_MISSING +from swh.deposit.api.checks import ( + MANDATORY_FIELDS_MISSING, + METADATA_PROVENANCE_KEY, + SUGGESTED_FIELDS_MISSING, +) from swh.deposit.api.private.deposit_check import ( MANDATORY_ARCHIVE_INVALID, MANDATORY_ARCHIVE_MISSING, @@ -55,6 +59,14 @@ deposit = Deposit.objects.get(pk=deposit.id) assert deposit.status == DEPOSIT_STATUS_VERIFIED + # Deposit is ok but it's missing suggested fields in its metadata detected by + # the checks + status_detail = deposit.status_detail["metadata"] + assert len(status_detail) == 1 + suggested = status_detail[0] + assert suggested["summary"] == SUGGESTED_FIELDS_MISSING + assert set(suggested["fields"]) == set([METADATA_PROVENANCE_KEY]) + deposit.status = DEPOSIT_STATUS_DEPOSITED deposit.save() @@ -130,7 +142,7 @@ assert len(details["archive"]) == 1 assert details["archive"][0]["summary"] == MANDATORY_ARCHIVE_UNSUPPORTED # metadata check failure - assert len(details["metadata"]) == 1 + assert len(details["metadata"]) == 2 mandatory = details["metadata"][0] assert mandatory["summary"] == MANDATORY_FIELDS_MISSING assert set(mandatory["fields"]) == set( @@ -139,6 +151,9 @@ "atom:name or atom:title or codemeta:name", ] ) + suggested = details["metadata"][1] + assert suggested["summary"] == SUGGESTED_FIELDS_MISSING + assert set(suggested["fields"]) == set([METADATA_PROVENANCE_KEY]) deposit = Deposit.objects.get(pk=deposit.id) assert deposit.status == DEPOSIT_STATUS_REJECTED