diff --git a/swh/deposit/api/private/deposit_check.py b/swh/deposit/api/private/deposit_check.py --- a/swh/deposit/api/private/deposit_check.py +++ b/swh/deposit/api/private/deposit_check.py @@ -46,19 +46,36 @@ The deposit to check archives for Returns - True if all archives are ok, False otherwise. + tuple (status, error_detail): True, None if all archives + are ok, (False, ) otherwise. """ requests = list(self._deposit_requests( deposit, request_type=ARCHIVE_TYPE)) if len(requests) == 0: # no associated archive is refused - return False + return False, { + 'archive': { + 'summary': 'Deposit without archive is rejected.', + 'id': deposit.id, + } + } + rejected_dr_ids = [] for dr in requests: - check = self._check_archive(dr.archive.path) + _path = dr.archive.path + check = self._check_archive(_path) if not check: - return False - return True + rejected_dr_ids.append(dr.id) + + if rejected_dr_ids: + return False, { + 'archive': { + 'summary': 'Following deposit request ids are rejected ' + 'because their associated archive is not ' + 'readable', + 'ids': rejected_dr_ids, + }} + return True, None def _check_archive(self, archive_path): """Check that a given archive is actually ok for reading. @@ -81,10 +98,11 @@ """Given a deposit, aggregate all metadata requests. Args: - The deposit to check metadata for. + deposit (Deposit): The deposit instance to extract + metadata from. Returns: - True if the deposit's associated metadata are ok, False otherwise. + metadata dict from the deposit. """ metadata = {} @@ -96,22 +114,54 @@ """Check to execute on all metadata for mandatory field presence. Args: - metadata (dict): Metadata to actually check + metadata (dict): Metadata dictionary to check for mandatory fields Returns: - True if metadata is ok, False otherwise. + tuple (status, error_detail): True, None if metadata are + ok (False, ) otherwise. """ - required_fields = (('url',), - ('external_identifier',), - ('name', 'title'), - ('author',)) - - result = all(any(name in field - for field in metadata - for name in possible_names) - for possible_names in required_fields) - return result + required_fields = { + 'url': False, + 'external_identifier': False, + 'author': False, + } + alternate_fields = { + ('name', 'title'): False, # alternate field, at least one + # of them must be present + } + + for field, value in metadata.items(): + for name in required_fields: + if name in field: + required_fields[name] = True + + for possible_names in alternate_fields: + for possible_name in possible_names: + if possible_name in field: + alternate_fields[possible_names] = True + continue + + mandatory_result = [k for k, v in required_fields.items() if not v] + optional_result = [ + k for k, v in alternate_fields.items() if not v] + + if mandatory_result == [] and optional_result == []: + return True, None + detail = [] + if mandatory_result != []: + detail.append({ + 'summary': 'Mandatory fields are missing', + 'fields': mandatory_result + }) + if optional_result != []: + detail.append({ + 'summary': 'Mandatory alternate fields are missing', + 'fields': optional_result, + }) + return False, { + 'metadata': detail + } def _check_url(self, client_domain, metadata): """Check compatibility between client_domain and url field in metadata @@ -119,17 +169,25 @@ Args: client_domain (str): url associated with the deposit's client metadata (dict): Metadata where to find url + Returns: - True if url is ok, False otherwise. + tuple (status, error_detail): True, None if url associated + with the deposit's client is ok, (False, + ) otherwise. """ - metadata_urls = [] + url_fields = [] for field in metadata: - if 'url' in field: - metadata_urls.append(metadata[field]) + url_fields.append(field) + if 'url' in field and client_domain in metadata[field]: + return True, None - return any(client_domain in url - for url in metadata_urls) + return False, { + 'url': { + 'summary': "No url field provided is compatible with the " + "client's domain'", + 'fields': url_fields, + }} def process_get(self, req, collection_name, deposit_id): """Build a unique tarball from the multiple received and stream that @@ -147,30 +205,30 @@ deposit = Deposit.objects.get(pk=deposit_id) client_domain = deposit.client.domain metadata = self._metadata_get(deposit) - problems = [] + problems = {} # will check each deposit's associated request (both of type # archive and metadata) for errors - archives_status = self._check_deposit_archives(deposit) + archives_status, error_detail = self._check_deposit_archives(deposit) if not archives_status: - problems.append('archive(s)') + problems.update(error_detail) - metadata_status = self._check_metadata(metadata) + metadata_status, error_detail = self._check_metadata(metadata) if not metadata_status: - problems.append('metadata') + problems.update(error_detail) - url_status = self._check_url(client_domain, metadata) + url_status, error_detail = self._check_url(client_domain, metadata) if not url_status: - problems.append('url') + problems.update(error_detail) deposit_status = archives_status and metadata_status and url_status # if any problems arose, the deposit is rejected if not deposit_status: deposit.status = DEPOSIT_STATUS_REJECTED + deposit.status_detail = problems response = { 'status': deposit.status, - 'details': 'Some %s failed the checks.' % ( - ' and '.join(problems), ), + 'details': deposit.status_detail, } else: deposit.status = DEPOSIT_STATUS_VERIFIED diff --git a/swh/deposit/migrations/0012_deposit_status_detail.py b/swh/deposit/migrations/0012_deposit_status_detail.py new file mode 100644 --- /dev/null +++ b/swh/deposit/migrations/0012_deposit_status_detail.py @@ -0,0 +1,21 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.14 on 2018-07-09 13:08 +from __future__ import unicode_literals + +import django.contrib.postgres.fields.jsonb +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('deposit', '0011_auto_20180115_1510'), + ] + + operations = [ + migrations.AddField( + model_name='deposit', + name='status_detail', + field=django.contrib.postgres.fields.jsonb.JSONField(null=True), + ), + ] diff --git a/swh/deposit/models.py b/swh/deposit/models.py --- a/swh/deposit/models.py +++ b/swh/deposit/models.py @@ -114,6 +114,7 @@ status = models.TextField( choices=DEPOSIT_STATUS, default=DEPOSIT_STATUS_PARTIAL) + status_detail = JSONField(null=True) # deposit can have one parent parent = models.ForeignKey('self', null=True) @@ -121,14 +122,18 @@ db_table = 'deposit' def __str__(self): - return str({ + d = { 'id': self.id, 'reception_date': self.reception_date, 'collection': self.collection.name, 'external_id': self.external_id, 'client': self.client.username, - 'status': self.status - }) + 'status': self.status, + } + + if self.status in (DEPOSIT_STATUS_LOAD_FAILURE, 'rejected'): + d['status_detail'] = self.status_detail + return str(d) class DepositRequestType(models.Model): diff --git a/swh/deposit/tests/api/test_deposit_check.py b/swh/deposit/tests/api/test_deposit_check.py --- a/swh/deposit/tests/api/test_deposit_check.py +++ b/swh/deposit/tests/api/test_deposit_check.py @@ -58,10 +58,10 @@ """Invalid deposit should fail the checks (-> status rejected) """ - deposit_id = self.create_invalid_deposit() + deposit_id = self.create_deposit_with_invalid_archive() deposit = Deposit.objects.get(pk=deposit_id) - self.assertEquals(deposit.status, DEPOSIT_STATUS_DEPOSITED) + self.assertEquals(DEPOSIT_STATUS_DEPOSITED, deposit.status) url = reverse(PRIVATE_CHECK_DEPOSIT, args=[self.collection.name, deposit.id]) @@ -71,9 +71,30 @@ self.assertEqual(response.status_code, status.HTTP_200_OK) data = json.loads(response.content.decode('utf-8')) self.assertEqual(data['status'], DEPOSIT_STATUS_REJECTED) - self.assertEqual(data['details'], - 'Some archive(s) and metadata and url ' + - 'failed the checks.') + expected_error = { + 'archive': { + 'ids': [20], # FIXME: no order is really guaranteed + 'summary': 'Following deposit request ids are ' + 'rejected because their associated archive' + ' is not readable' + }, + 'metadata': [ + { + 'fields': ['url', 'external_identifier', 'author'], + 'summary': 'Mandatory fields are missing' + }, + { + 'fields': [['name', 'title']], + 'summary': 'Mandatory alternate fields are missing' + }], + 'url': { + 'summary': "No url field provided is compatible with the client" + "'s domain'", + 'fields': [] + } + } + self.assertEqual(data['details'], expected_error) + deposit = Deposit.objects.get(pk=deposit.id) self.assertEquals(deposit.status, DEPOSIT_STATUS_REJECTED) @@ -98,6 +119,7 @@ self.assertEqual(response.status_code, status.HTTP_200_OK) data = json.loads(response.content.decode('utf-8')) + self.assertEqual(data['status'], DEPOSIT_STATUS_VERIFIED) deposit = Deposit.objects.get(pk=deposit.id) self.assertEquals(deposit.status, DEPOSIT_STATUS_VERIFIED) @@ -106,7 +128,7 @@ class CheckMetadata(unittest.TestCase, SWHChecksDeposit): @istest def check_metadata_ok(self): - actual_check = self._check_metadata({ + actual_check, detail = self._check_metadata({ 'url': 'something', 'external_identifier': 'something-else', 'name': 'foo', @@ -114,10 +136,11 @@ }) self.assertTrue(actual_check) + self.assertIsNone(detail) @istest def check_metadata_ok2(self): - actual_check = self._check_metadata({ + actual_check, detail = self._check_metadata({ 'url': 'something', 'external_identifier': 'something-else', 'title': 'bar', @@ -125,23 +148,45 @@ }) self.assertTrue(actual_check) + self.assertIsNone(detail) @istest def check_metadata_ko(self): - actual_check = self._check_metadata({ + """Missing optional field should be caught + + """ + actual_check, error_detail = self._check_metadata({ 'url': 'something', 'external_identifier': 'something-else', 'author': 'someone', }) + expected_error = { + 'metadata': [{ + 'summary': 'Mandatory alternate fields are missing', + 'fields': [('name', 'title')], + }] + } self.assertFalse(actual_check) + self.assertEqual(error_detail, expected_error) @istest def check_metadata_ko2(self): - actual_check = self._check_metadata({ + """Missing mandatory fields should be caught + + """ + actual_check, error_detail = self._check_metadata({ 'url': 'something', 'external_identifier': 'something-else', 'title': 'foobar', }) + expected_error = { + 'metadata': [{ + 'summary': 'Mandatory fields are missing', + 'fields': ['author'], + }] + } + self.assertFalse(actual_check) + self.assertEqual(error_detail, expected_error) diff --git a/swh/deposit/tests/common.py b/swh/deposit/tests/common.py --- a/swh/deposit/tests/common.py +++ b/swh/deposit/tests/common.py @@ -15,7 +15,10 @@ from nose.plugins.attrib import attr from rest_framework import status -from swh.deposit.config import COL_IRI, EM_IRI, EDIT_SE_IRI +from swh.deposit.config import (COL_IRI, EM_IRI, EDIT_SE_IRI, + DEPOSIT_STATUS_PARTIAL, + DEPOSIT_STATUS_VERIFIED, + DEPOSIT_STATUS_DEPOSITED) from swh.deposit.models import DepositClient, DepositCollection, Deposit from swh.deposit.models import DepositRequest from swh.deposit.models import DepositRequestType @@ -121,8 +124,14 @@ self.archive['name'], )) # then - assert response.status_code == status.HTTP_201_CREATED + self.assertEqual(response.status_code, status.HTTP_201_CREATED) response_content = parse_xml(BytesIO(response.content)) + _status = response_content['deposit_status'] + if status_partial: + expected_status = DEPOSIT_STATUS_PARTIAL + else: + expected_status = DEPOSIT_STATUS_VERIFIED + self.assertEqual(_status, expected_status) deposit_id = int(response_content['deposit_id']) return deposit_id @@ -158,8 +167,14 @@ HTTP_IN_PROGRESS=status_partial) # then - # assert response.status_code == status.HTTP_201_CREATED + self.assertEqual(response.status_code, status.HTTP_201_CREATED) response_content = parse_xml(BytesIO(response.content)) + _status = response_content['deposit_status'] + if status_partial: + expected_status = DEPOSIT_STATUS_PARTIAL + else: + expected_status = DEPOSIT_STATUS_DEPOSITED + self.assertEqual(_status, expected_status) deposit_id = int(response_content['deposit_id']) return deposit_id @@ -313,7 +328,8 @@ """ - def create_invalid_deposit(self, external_id='some-external-id-1'): + def create_deposit_with_invalid_archive(self, + external_id='some-external-id-1'): url = reverse(COL_IRI, args=[self.collection.name]) data = b'some data which is clearly not a zip file' @@ -338,13 +354,12 @@ def create_deposit_with_status( self, status, external_id='some-external-id-1', swh_id=None): - deposit_id = self.create_invalid_deposit(external_id) + # create an invalid deposit which we will update further down the line + deposit_id = self.create_deposit_with_invalid_archive(external_id) # We cannot create some form of deposit with a given status in - # test context ('rejected' for example). As flipped off the - # checks in the configuration so all deposits have the status - # deposited). Update in place the deposit with such - # status + # test context ('rejected' for example). Update in place the + # deposit with such status to permit some further tests. deposit = Deposit.objects.get(pk=deposit_id) deposit.status = status if swh_id: diff --git a/swh/deposit/tests/loader/test_checker.py b/swh/deposit/tests/loader/test_checker.py --- a/swh/deposit/tests/loader/test_checker.py +++ b/swh/deposit/tests/loader/test_checker.py @@ -33,7 +33,7 @@ @istest def check_deposit_ready(self): - """Check a valid deposit deposited should result in ready state + """Check on a valid 'deposited' deposit should result in 'verified' """ # 1. create a deposit with archive and metadata @@ -46,7 +46,6 @@ # when actual_result = self.checker.check(deposit_check_url=deposit_check_url) - # then deposit = Deposit.objects.get(pk=deposit_id) self.assertEquals(deposit.status, DEPOSIT_STATUS_VERIFIED) @@ -54,11 +53,11 @@ @istest def check_deposit_rejected(self): - """Check an invalid deposit deposited should result in rejected + """Check on invalid 'deposited' deposit should result in 'rejected' """ # 1. create a deposit with archive and metadata - deposit_id = self.create_invalid_deposit() + deposit_id = self.create_deposit_with_invalid_archive() args = [self.collection.name, deposit_id] deposit_check_url = reverse(PRIVATE_CHECK_DEPOSIT, args=args)