diff --git a/swh/deposit/api/deposit_status.py b/swh/deposit/api/deposit_status.py index e4907607..52930a6d 100644 --- a/swh/deposit/api/deposit_status.py +++ b/swh/deposit/api/deposit_status.py @@ -1,107 +1,111 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from django.shortcuts import render from rest_framework import status from .common import SWHBaseDeposit from ..errors import NOT_FOUND, make_error_response from ..errors import make_error_response_from_dict from ..models import DEPOSIT_STATUS_DETAIL, Deposit def convert_status_detail(status_detail): """Given a status_detail dict, transforms it into a human readable string. Dict has the following form (all first level keys are optional): { 'url': { 'summary': , 'fields': }, 'metadata': [{ 'summary': , 'fields': , }], - 'archive': { + 'archive': [{ 'summary': , - 'fields': [], - } + 'fields': , + }] } Args: status_detail (dict): Returns: Status detail as inlined string. """ if not status_detail: return None msg = [] - if 'metadata' in status_detail: - for data in status_detail['metadata']: - fields = ', '.join(data['fields']) - msg.append('- %s (%s)\n' % (data['summary'], fields)) - - for key in ['url', 'archive']: - if key in status_detail: - _detail = status_detail[key] - fields = _detail.get('fields') - suffix_msg = '' - if fields: - suffix_msg = ' (%s)' % ', '.join(fields) - msg.append('- %s%s\n' % (_detail['summary'], suffix_msg)) + for key in ['metadata', 'archive']: + _detail = status_detail.get(key) + if _detail: + for data in _detail: + suffix_msg = '' + fields = data.get('fields') + if fields: + suffix_msg = ' (%s)' % ', '.join(fields) + msg.append('- %s%s\n' % (data['summary'], suffix_msg)) + + _detail = status_detail.get('url') + if _detail: + fields = _detail.get('fields') + suffix_msg = '' + if fields: + suffix_msg = ' (%s)' % ', '.join(fields) + msg.append('- %s%s\n' % (_detail['summary'], suffix_msg)) if not msg: return None return ''.join(msg) class SWHDepositStatus(SWHBaseDeposit): """Deposit status. What's known as 'State IRI' in the sword specification. HTTP verbs supported: GET """ def get(self, req, collection_name, deposit_id, format=None): checks = self.checks(req, collection_name, deposit_id) if 'error' in checks: return make_error_response_from_dict(req, checks['error']) try: deposit = Deposit.objects.get(pk=deposit_id) if deposit.collection.name != collection_name: raise Deposit.DoesNotExist except Deposit.DoesNotExist: return make_error_response( req, NOT_FOUND, 'deposit %s does not belong to collection %s' % ( deposit_id, collection_name)) status_detail = convert_status_detail(deposit.status_detail) if not status_detail: status_detail = DEPOSIT_STATUS_DETAIL[deposit.status] context = { 'deposit_id': deposit.id, 'status': deposit.status, 'status_detail': status_detail, 'swh_id': None, } if deposit.swh_id: context['swh_id'] = deposit.swh_id return render(req, 'deposit/status.xml', context=context, content_type='application/xml', status=status.HTTP_200_OK) diff --git a/swh/deposit/api/private/deposit_check.py b/swh/deposit/api/private/deposit_check.py index eae73e38..956e8cbc 100644 --- a/swh/deposit/api/private/deposit_check.py +++ b/swh/deposit/api/private/deposit_check.py @@ -1,236 +1,236 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json import re import tarfile import zipfile from rest_framework import status from . import DepositReadMixin from ..common import SWHGetDepositAPI, SWHPrivateAPIView from ...config import DEPOSIT_STATUS_VERIFIED, DEPOSIT_STATUS_REJECTED from ...config import ARCHIVE_TYPE from ...models import Deposit MANDATORY_FIELDS_MISSING = 'Mandatory fields are missing' ALTERNATE_FIELDS_MISSING = 'Mandatory alternate fields are missing' MANDATORY_ARCHIVE_UNREADABLE = 'Deposit was rejected because at least one of its associated archives was not readable' # noqa MANDATORY_ARCHIVE_INVALID = 'Mandatory archive is invalid (e.g contains an archive)' # noqa MANDATORY_ARCHIVE_UNSUPPORTED = 'Mandatory archive type is not supported' MANDATORY_ARCHIVE_MISSING = 'Deposit without archive is rejected' INCOMPATIBLE_URL_FIELDS = "At least one url field must be compatible with the client's domain name" # noqa class SWHChecksDeposit(SWHGetDepositAPI, SWHPrivateAPIView, DepositReadMixin): """Dedicated class to read a deposit's raw archives content. Only GET is supported. """ def _check_deposit_archives(self, deposit): """Given a deposit, check each deposit request of type archive. Args: The deposit to check archives for Returns tuple (status, error_detail): True, None if all archives are ok, (False, ) otherwise. """ requests = list(self._deposit_requests( deposit, request_type=ARCHIVE_TYPE)) if len(requests) == 0: # no associated archive is refused return False, { 'archive': [{ 'summary': MANDATORY_ARCHIVE_MISSING, }] } errors = [] for archive_request in requests: check, error_message = self._check_archive(archive_request) if not check: errors.append({ 'summary': error_message, - 'fields': archive_request.id + 'fields': [archive_request.id] }) if not errors: return True, None return False, { 'archive': errors } def _check_archive(self, archive_request): """Check that a given archive is actually ok: - reading ok - content of the archive at the first level is not only an archive. Args: archive_path (DepositRequest): Archive to check Returns: True if archive is check compliant, False otherwise. """ archive_path = archive_request.archive.path try: if zipfile.is_zipfile(archive_path): with zipfile.ZipFile(archive_path) as f: files = f.namelist() elif tarfile.is_tarfile(archive_path): with tarfile.open(archive_path) as f: files = f.getnames() else: return False, MANDATORY_ARCHIVE_UNSUPPORTED except Exception: return False, MANDATORY_ARCHIVE_UNREADABLE if len(files) > 1: return True, None element = files[0] pattern = re.compile( r'.*\.(zip|tar|tar.gz|.xz|tar.xz|Z|.tar.Z|bz2|tar.bz2)$') if pattern.match(element): # invalid archive in archive return False, MANDATORY_ARCHIVE_INVALID return True, None def _check_metadata(self, metadata): """Check to execute on all metadata for mandatory field presence. Args: metadata (dict): Metadata dictionary to check for mandatory fields Returns: tuple (status, error_detail): True, None if metadata are ok (False, ) otherwise. """ required_fields = { 'url': False, 'external_identifier': False, 'author': False, } alternate_fields = { ('name', 'title'): False, # alternate field, at least one # of them must be present } for field, value in metadata.items(): for name in required_fields: if name in field: required_fields[name] = True for possible_names in alternate_fields: for possible_name in possible_names: if possible_name in field: alternate_fields[possible_names] = True continue mandatory_result = [k for k, v in required_fields.items() if not v] optional_result = [ ' or '.join(k) for k, v in alternate_fields.items() if not v] if mandatory_result == [] and optional_result == []: return True, None detail = [] if mandatory_result != []: detail.append({ 'summary': MANDATORY_FIELDS_MISSING, 'fields': mandatory_result }) if optional_result != []: detail.append({ 'summary': ALTERNATE_FIELDS_MISSING, 'fields': optional_result, }) return False, { 'metadata': detail } def _check_url(self, client_domain, metadata): """Check compatibility between client_domain and url field in metadata Args: client_domain (str): url associated with the deposit's client metadata (dict): Metadata where to find url Returns: tuple (status, error_detail): True, None if url associated with the deposit's client is ok, (False, ) otherwise. """ url_fields = [] for field in metadata: if 'url' in field: if client_domain in metadata[field]: return True, None url_fields.append(field) detail = { 'url': { 'summary': INCOMPATIBLE_URL_FIELDS, } } if url_fields: detail['url']['fields'] = url_fields return False, detail def process_get(self, req, collection_name, deposit_id): """Build a unique tarball from the multiple received and stream that content to the client. Args: req (Request): collection_name (str): Collection owning the deposit deposit_id (id): Deposit concerned by the reading Returns: Tuple status, stream of content, content-type """ deposit = Deposit.objects.get(pk=deposit_id) client_domain = deposit.client.domain metadata = self._metadata_get(deposit) problems = {} # will check each deposit's associated request (both of type # archive and metadata) for errors archives_status, error_detail = self._check_deposit_archives(deposit) if not archives_status: problems.update(error_detail) metadata_status, error_detail = self._check_metadata(metadata) if not metadata_status: problems.update(error_detail) url_status, error_detail = self._check_url(client_domain, metadata) if not url_status: problems.update(error_detail) deposit_status = archives_status and metadata_status and url_status # if any problems arose, the deposit is rejected if not deposit_status: deposit.status = DEPOSIT_STATUS_REJECTED deposit.status_detail = problems response = { 'status': deposit.status, 'details': deposit.status_detail, } else: deposit.status = DEPOSIT_STATUS_VERIFIED response = { 'status': deposit.status, } deposit.save() return status.HTTP_200_OK, json.dumps(response), 'application/json' diff --git a/swh/deposit/tests/api/test_deposit_status.py b/swh/deposit/tests/api/test_deposit_status.py index 180eacb8..ced39896 100644 --- a/swh/deposit/tests/api/test_deposit_status.py +++ b/swh/deposit/tests/api/test_deposit_status.py @@ -1,218 +1,230 @@ # Copyright (C) 2017-2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from django.core.urlresolvers import reverse from io import BytesIO from nose.tools import istest from rest_framework import status from rest_framework.test import APITestCase from swh.deposit.api.deposit_status import convert_status_detail from swh.deposit.config import (COL_IRI, STATE_IRI, DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_REJECTED) from swh.deposit.models import Deposit, DEPOSIT_STATUS_DETAIL from swh.deposit.models import DEPOSIT_STATUS_LOAD_SUCCESS from swh.deposit.parsers import parse_xml from ..common import BasicTestCase, WithAuthTestCase, FileSystemCreationRoutine from ..common import CommonCreationRoutine class DepositStatusTestCase(APITestCase, WithAuthTestCase, BasicTestCase, FileSystemCreationRoutine, CommonCreationRoutine): """Status on deposit """ @istest def post_deposit_with_status_check(self): """Binary upload should be accepted """ # given url = reverse(COL_IRI, args=[self.collection.name]) external_id = 'some-external-id-1' # when response = self.client.post( url, content_type='application/zip', # as zip data=self.archive['data'], # + headers CONTENT_LENGTH=self.archive['length'], HTTP_SLUG=external_id, HTTP_CONTENT_MD5=self.archive['md5sum'], HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_IN_PROGRESS='false', HTTP_CONTENT_DISPOSITION='attachment; filename=filename0') # then self.assertEqual(response.status_code, status.HTTP_201_CREATED) deposit = Deposit.objects.get(external_id=external_id) status_url = reverse(STATE_IRI, args=[self.collection.name, deposit.id]) # check status status_response = self.client.get(status_url) self.assertEqual(status_response.status_code, status.HTTP_200_OK) r = parse_xml(BytesIO(status_response.content)) self.assertEqual(int(r['deposit_id']), deposit.id) self.assertEqual(r['deposit_status'], DEPOSIT_STATUS_DEPOSITED) self.assertEqual(r['deposit_status_detail'], DEPOSIT_STATUS_DETAIL[DEPOSIT_STATUS_DEPOSITED]) @istest def status_with_swh_id(self): _status = DEPOSIT_STATUS_LOAD_SUCCESS _swh_id = '548b3c0a2bb43e1fca191e24b5803ff6b3bc7c10' # given deposit_id = self.create_deposit_with_status( status=_status, swh_id=_swh_id) url = reverse(STATE_IRI, args=[self.collection.name, deposit_id]) # when status_response = self.client.get(url) # then self.assertEqual(status_response.status_code, status.HTTP_200_OK) r = parse_xml(BytesIO(status_response.content)) self.assertEqual(int(r['deposit_id']), deposit_id) self.assertEqual(r['deposit_status'], _status) self.assertEqual(r['deposit_status_detail'], DEPOSIT_STATUS_DETAIL[DEPOSIT_STATUS_LOAD_SUCCESS]) self.assertEqual(r['deposit_swh_id'], _swh_id) @istest def status_on_unknown_deposit(self): """Asking for the status of unknown deposit returns 404 response""" status_url = reverse(STATE_IRI, args=[self.collection.name, 999]) status_response = self.client.get(status_url) self.assertEqual(status_response.status_code, status.HTTP_404_NOT_FOUND) @istest def status_with_http_accept_header_should_not_break(self): """Asking deposit status with Accept header should return 200 """ deposit_id = self.create_deposit_partial() status_url = reverse(STATE_IRI, args=[ self.collection.name, deposit_id]) response = self.client.get( status_url, HTTP_ACCEPT='text/html,application/xml;q=9,*/*,q=8') self.assertEqual(response.status_code, status.HTTP_200_OK) @istest def convert_status_detail_empty(self): actual_status_detail = convert_status_detail({}) self.assertIsNone(actual_status_detail) actual_status_detail = convert_status_detail({'dummy-keys': []}) self.assertIsNone(actual_status_detail) actual_status_detail = convert_status_detail(None) self.assertIsNone(actual_status_detail) @istest def convert_status_detail(self): status_detail = { 'url': { 'summary': "At least one url field must be compatible with the client\'s domain name. The following url fields failed the check", # noqa 'fields': ['blahurl', 'testurl'], }, 'metadata': [ { 'summary': 'Mandatory fields missing', 'fields': ['url', 'title'], }, { 'summary': 'Alternate fields missing', 'fields': ['name or title', 'url or badurl'] } ], - 'archive': { + 'archive': [{ 'summary': 'Unreadable archive', - 'fields': ['1', '2'], - }, + 'fields': ['1'], + }], } expected_status_detail = '''- Mandatory fields missing (url, title) - Alternate fields missing (name or title, url or badurl) +- Unreadable archive (1) - At least one url field must be compatible with the client's domain name. The following url fields failed the check (blahurl, testurl) -- Unreadable archive (1, 2) ''' # noqa actual_status_detail = convert_status_detail(status_detail) self.assertEqual(actual_status_detail, expected_status_detail) @istest def convert_status_detail_2(self): status_detail = { 'url': { 'summary': 'At least one compatible url field. Failed', 'fields': ['testurl'], }, 'metadata': [ { 'summary': 'Mandatory fields missing', 'fields': ['name'], }, ], + 'archive': [ + { + 'summary': 'Invalid archive', + 'fields': ['2'], + }, + { + 'summary': 'Unsupported archive', + 'fields': ['1'], + } + ], } expected_status_detail = '''- Mandatory fields missing (name) +- Invalid archive (2) +- Unsupported archive (1) - At least one compatible url field. Failed (testurl) ''' actual_status_detail = convert_status_detail(status_detail) self.assertEqual(actual_status_detail, expected_status_detail) @istest def convert_status_detail_3(self): status_detail = { 'url': { 'summary': 'At least one compatible url field', }, } expected_status_detail = '- At least one compatible url field\n' actual_status_detail = convert_status_detail(status_detail) self.assertEqual(actual_status_detail, expected_status_detail) @istest def status_on_deposit_rejected(self): _status = DEPOSIT_STATUS_REJECTED _swh_id = '548b3c0a2bb43e1fca191e24b5803ff6b3bc7c10' _status_detail = {'url': {'summary': 'Wrong url'}} # given deposit_id = self.create_deposit_with_status( status=_status, swh_id=_swh_id, status_detail=_status_detail) url = reverse(STATE_IRI, args=[self.collection.name, deposit_id]) # when status_response = self.client.get(url) # then self.assertEqual(status_response.status_code, status.HTTP_200_OK) r = parse_xml(BytesIO(status_response.content)) self.assertEqual(int(r['deposit_id']), deposit_id) self.assertEqual(r['deposit_status'], _status) self.assertEqual(r['deposit_status_detail'], '- Wrong url') self.assertEqual(r['deposit_swh_id'], _swh_id)