diff --git a/swh/deposit/api/private/deposit_check.py b/swh/deposit/api/private/deposit_check.py index e986c329..b7885022 100644 --- a/swh/deposit/api/private/deposit_check.py +++ b/swh/deposit/api/private/deposit_check.py @@ -1,121 +1,122 @@ # Copyright (C) 2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json import zipfile from rest_framework import status from ..common import SWHGetDepositAPI, SWHPrivateAPIView from ...config import DEPOSIT_STATUS_READY, DEPOSIT_STATUS_REJECTED from ...config import ARCHIVE_TYPE, METADATA_TYPE from ...models import Deposit, DepositRequest class SWHChecksDeposit(SWHGetDepositAPI, SWHPrivateAPIView): """Dedicated class to read a deposit's raw archives content. Only GET is supported. """ def deposit_requests(self, deposit): """Given a deposit, yields its associated deposit_request Yields: deposit request """ deposit_requests = DepositRequest.objects.filter( deposit=deposit).order_by('id') for deposit_request in deposit_requests: yield deposit_request def _check_archive(self, archive): """Check that a given archive is actually ok for reading. Args: archive (File): Archive to check Returns: True if archive is successfully read, False otherwise. """ try: zf = zipfile.ZipFile(archive.path) zf.infolist() except Exception as e: return False else: return True def _check_metadata(self, metadata): """Check to execute on metadata. Args: metadata (): Metadata to actually check Returns: True if metadata is ok, False otherwise. """ must_meta = ['url', 'external_identifier', ['name', 'title'], 'author'] # checks only for must metadata on all metadata requests for mm in must_meta: found = False for k in metadata: if isinstance(mm, list): for p in mm: if p in k: found = True break elif mm in k: found = True break if not found: return False return True def process_get(self, req, collection_name, deposit_id): """Build a unique tarball from the multiple received and stream that content to the client. Args: req (Request): collection_name (str): Collection owning the deposit deposit_id (id): Deposit concerned by the reading Returns: Tuple status, stream of content, content-type """ deposit = Deposit.objects.get(pk=deposit_id) all_metadata = {} # will check each deposit request for the deposit for dr in self.deposit_requests(deposit): if dr.type.name == ARCHIVE_TYPE: - deposit_status = self._check_archive(dr.archive) + archives_status = self._check_archive(dr.archive) elif dr.type.name == METADATA_TYPE: # aggregating all metadata requests for check on complete set all_metadata.update(dr.metadata) - if not deposit_status: + if not archives_status: break - deposit_status = self._check_metadata(all_metadata) + metadatas_status = self._check_metadata(all_metadata) + deposit_status = archives_status and metadatas_status # if problem in any deposit requests, the deposit is rejected if not deposit_status: deposit.status = DEPOSIT_STATUS_REJECTED else: deposit.status = DEPOSIT_STATUS_READY deposit.save() return (status.HTTP_200_OK, json.dumps({ 'status': deposit.status }), 'application/json') diff --git a/swh/deposit/tests/api/test_deposit_check.py b/swh/deposit/tests/api/test_deposit_check.py index 851e1317..5e0867f3 100644 --- a/swh/deposit/tests/api/test_deposit_check.py +++ b/swh/deposit/tests/api/test_deposit_check.py @@ -1,98 +1,144 @@ # Copyright (C) 2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json +import unittest from django.core.urlresolvers import reverse from nose.tools import istest from nose.plugins.attrib import attr from rest_framework import status from rest_framework.test import APITestCase from ...models import Deposit from ...config import DEPOSIT_STATUS_READY, PRIVATE_CHECK_DEPOSIT from ...config import DEPOSIT_STATUS_READY_FOR_CHECKS, DEPOSIT_STATUS_REJECTED from ..common import BasicTestCase, WithAuthTestCase, CommonCreationRoutine from ..common import FileSystemCreationRoutine +from ...api.private.deposit_check import SWHChecksDeposit @attr('fs') class CheckDepositTest(APITestCase, WithAuthTestCase, BasicTestCase, CommonCreationRoutine, FileSystemCreationRoutine): """Check deposit endpoints. """ def setUp(self): super().setUp() @istest def deposit_ok(self): """Proper deposit should succeed the checks (-> status ready) """ deposit_id = self.create_simple_binary_deposit(status_partial=True) deposit_id = self.update_binary_deposit(deposit_id, status_partial=False) deposit = Deposit.objects.get(pk=deposit_id) self.assertEquals(deposit.status, DEPOSIT_STATUS_READY_FOR_CHECKS) url = reverse(PRIVATE_CHECK_DEPOSIT, args=[self.collection.name, deposit.id]) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_200_OK) data = json.loads(response.content.decode('utf-8')) self.assertEqual(data['status'], DEPOSIT_STATUS_READY) deposit = Deposit.objects.get(pk=deposit.id) self.assertEquals(deposit.status, DEPOSIT_STATUS_READY) @istest def deposit_ko(self): """Invalid deposit should fail the checks (-> status rejected) """ deposit_id = self.create_invalid_deposit() deposit = Deposit.objects.get(pk=deposit_id) self.assertEquals(deposit.status, DEPOSIT_STATUS_READY_FOR_CHECKS) url = reverse(PRIVATE_CHECK_DEPOSIT, args=[self.collection.name, deposit.id]) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_200_OK) data = json.loads(response.content.decode('utf-8')) self.assertEqual(data['status'], DEPOSIT_STATUS_REJECTED) deposit = Deposit.objects.get(pk=deposit.id) self.assertEquals(deposit.status, DEPOSIT_STATUS_REJECTED) @istest def check_deposit_metadata_ok(self): """Proper deposit should succeed the checks (-> status ready) with all **MUST** metadata using the codemeta metadata test set """ deposit_id = self.create_simple_binary_deposit(status_partial=True) deposit_id_metadata = self.add_metadata_to_deposit(deposit_id) self.assertEquals(deposit_id, deposit_id_metadata) deposit = Deposit.objects.get(pk=deposit_id) self.assertEquals(deposit.status, DEPOSIT_STATUS_READY_FOR_CHECKS) url = reverse(PRIVATE_CHECK_DEPOSIT, args=[self.collection.name, deposit.id]) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_200_OK) data = json.loads(response.content.decode('utf-8')) self.assertEqual(data['status'], DEPOSIT_STATUS_READY) deposit = Deposit.objects.get(pk=deposit.id) self.assertEquals(deposit.status, DEPOSIT_STATUS_READY) + + +class CheckMetadata(unittest.TestCase, SWHChecksDeposit): + @istest + def check_metadata_ok(self): + actual_check = self._check_metadata({ + 'url': 'something', + 'external_identifier': 'something-else', + 'name': 'foo', + 'author': 'someone', + }) + + self.assertTrue(actual_check) + + @istest + def check_metadata_ok2(self): + actual_check = self._check_metadata({ + 'url': 'something', + 'external_identifier': 'something-else', + 'title': 'bar', + 'author': 'someone', + }) + + self.assertTrue(actual_check) + + @istest + def check_metadata_ko(self): + actual_check = self._check_metadata({ + 'url': 'something', + 'external_identifier': 'something-else', + 'author': 'someone', + }) + + self.assertFalse(actual_check) + + @istest + def check_metadata_ko2(self): + actual_check = self._check_metadata({ + 'url': 'something', + 'external_identifier': 'something-else', + 'title': 'foobar', + }) + + self.assertFalse(actual_check)