diff --git a/PKG-INFO b/PKG-INFO index 46302db6..d57f7c8e 100644 --- a/PKG-INFO +++ b/PKG-INFO @@ -1,11 +1,11 @@ Metadata-Version: 2.1 Name: swh.deposit -Version: 0.0.39 +Version: 0.0.40 Summary: Software Heritage Deposit Server Home-page: https://forge.softwareheritage.org/source/swh-deposit/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Description: UNKNOWN Platform: UNKNOWN Provides-Extra: loader diff --git a/swh.deposit.egg-info/PKG-INFO b/swh.deposit.egg-info/PKG-INFO index 46302db6..d57f7c8e 100644 --- a/swh.deposit.egg-info/PKG-INFO +++ b/swh.deposit.egg-info/PKG-INFO @@ -1,11 +1,11 @@ Metadata-Version: 2.1 Name: swh.deposit -Version: 0.0.39 +Version: 0.0.40 Summary: Software Heritage Deposit Server Home-page: https://forge.softwareheritage.org/source/swh-deposit/ Author: Software Heritage developers Author-email: swh-devel@inria.fr License: UNKNOWN Description: UNKNOWN Platform: UNKNOWN Provides-Extra: loader diff --git a/swh/deposit/api/private/deposit_check.py b/swh/deposit/api/private/deposit_check.py index e3875c48..f6961f6a 100644 --- a/swh/deposit/api/private/deposit_check.py +++ b/swh/deposit/api/private/deposit_check.py @@ -1,117 +1,158 @@ # Copyright (C) 2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json import zipfile from rest_framework import status from ..common import SWHGetDepositAPI, SWHPrivateAPIView from ...config import DEPOSIT_STATUS_READY, DEPOSIT_STATUS_REJECTED from ...config import ARCHIVE_TYPE, METADATA_TYPE from ...models import Deposit, DepositRequest class SWHChecksDeposit(SWHGetDepositAPI, SWHPrivateAPIView): """Dedicated class to read a deposit's raw archives content. Only GET is supported. """ - - def deposit_requests(self, deposit): + def _deposit_requests(self, deposit, request_type): """Given a deposit, yields its associated deposit_request + Args: + deposit (Deposit): Deposit to list requests for + request_type (str): Archive or metadata type + Yields: - deposit request + deposit requests of type request_type associated to the deposit """ deposit_requests = DepositRequest.objects.filter( + type=self.deposit_request_types[request_type], deposit=deposit).order_by('id') for deposit_request in deposit_requests: yield deposit_request + def _check_deposit_archives(self, deposit): + """Given a deposit, check each deposit request of type archive. + + Args: + The deposit to check archives for + + Returns + True if all archives are ok, False otherwise. + + """ + requests = list(self._deposit_requests( + deposit, request_type=ARCHIVE_TYPE)) + if len(requests) == 0: # no associated archive is refused + return False + + for dr in requests: + check = self._check_archive(dr.archive) + if not check: + return False + return True + def _check_archive(self, archive): """Check that a given archive is actually ok for reading. Args: archive (File): Archive to check Returns: True if archive is successfully read, False otherwise. """ try: zf = zipfile.ZipFile(archive.path) zf.infolist() except Exception as e: return False else: return True + def _check_deposit_metadata(self, deposit): + """Given a deposit, check each deposit request of type metadata. + + Args: + The deposit to check metadata for. + + Returns: + True if the deposit's associated metadata are ok, False otherwise. + + """ + metadata = {} + for dr in self._deposit_requests(deposit, request_type=METADATA_TYPE): + metadata.update(dr.metadata) + + return self._check_metadata(metadata) + def _check_metadata(self, metadata): - """Check to execute on metadata. + """Check to execute on all metadata. Args: metadata (): Metadata to actually check Returns: True if metadata is ok, False otherwise. """ required_fields = (('url',), ('external_identifier',), ('name', 'title'), ('author',)) result = all(any(name in field for field in metadata for name in possible_names) for possible_names in required_fields) return result def process_get(self, req, collection_name, deposit_id): """Build a unique tarball from the multiple received and stream that content to the client. Args: req (Request): collection_name (str): Collection owning the deposit deposit_id (id): Deposit concerned by the reading Returns: Tuple status, stream of content, content-type """ deposit = Deposit.objects.get(pk=deposit_id) - all_metadata = {} - archives_status = False - # will check each deposit request for the deposit - for dr in self.deposit_requests(deposit): - if dr.type.name == ARCHIVE_TYPE: - archives_status = self._check_archive(dr.archive) - elif dr.type.name == METADATA_TYPE: - # aggregating all metadata requests for check on complete set - all_metadata.update(dr.metadata) - if not archives_status: - break - - metadatas_status = self._check_metadata(all_metadata) - deposit_status = archives_status and metadatas_status - # if problem in any deposit requests, the deposit is rejected + problems = [] + # will check each deposit's associated request (both of type + # archive and metadata) for errors + archives_status = self._check_deposit_archives(deposit) + if not archives_status: + problems.append('archive(s)') + + metadata_status = self._check_deposit_metadata(deposit) + if not metadata_status: + problems.append('metadata') + + deposit_status = archives_status and metadata_status + + # if any problems arose, the deposit is rejected if not deposit_status: deposit.status = DEPOSIT_STATUS_REJECTED else: deposit.status = DEPOSIT_STATUS_READY - deposit.save() return (status.HTTP_200_OK, json.dumps({ - 'status': deposit.status + 'status': deposit.status, + 'details': 'Some %s failed the checks.' % ( + ' and '.join(problems), ), }), 'application/json') diff --git a/swh/deposit/tests/api/test_deposit_check.py b/swh/deposit/tests/api/test_deposit_check.py index 5e0867f3..8edf7bec 100644 --- a/swh/deposit/tests/api/test_deposit_check.py +++ b/swh/deposit/tests/api/test_deposit_check.py @@ -1,144 +1,146 @@ # Copyright (C) 2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import json import unittest from django.core.urlresolvers import reverse from nose.tools import istest from nose.plugins.attrib import attr from rest_framework import status from rest_framework.test import APITestCase from ...models import Deposit from ...config import DEPOSIT_STATUS_READY, PRIVATE_CHECK_DEPOSIT from ...config import DEPOSIT_STATUS_READY_FOR_CHECKS, DEPOSIT_STATUS_REJECTED from ..common import BasicTestCase, WithAuthTestCase, CommonCreationRoutine from ..common import FileSystemCreationRoutine from ...api.private.deposit_check import SWHChecksDeposit @attr('fs') class CheckDepositTest(APITestCase, WithAuthTestCase, BasicTestCase, CommonCreationRoutine, FileSystemCreationRoutine): """Check deposit endpoints. """ def setUp(self): super().setUp() @istest def deposit_ok(self): """Proper deposit should succeed the checks (-> status ready) """ deposit_id = self.create_simple_binary_deposit(status_partial=True) deposit_id = self.update_binary_deposit(deposit_id, status_partial=False) deposit = Deposit.objects.get(pk=deposit_id) self.assertEquals(deposit.status, DEPOSIT_STATUS_READY_FOR_CHECKS) url = reverse(PRIVATE_CHECK_DEPOSIT, args=[self.collection.name, deposit.id]) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_200_OK) data = json.loads(response.content.decode('utf-8')) self.assertEqual(data['status'], DEPOSIT_STATUS_READY) deposit = Deposit.objects.get(pk=deposit.id) self.assertEquals(deposit.status, DEPOSIT_STATUS_READY) @istest def deposit_ko(self): """Invalid deposit should fail the checks (-> status rejected) """ deposit_id = self.create_invalid_deposit() deposit = Deposit.objects.get(pk=deposit_id) self.assertEquals(deposit.status, DEPOSIT_STATUS_READY_FOR_CHECKS) url = reverse(PRIVATE_CHECK_DEPOSIT, args=[self.collection.name, deposit.id]) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_200_OK) data = json.loads(response.content.decode('utf-8')) self.assertEqual(data['status'], DEPOSIT_STATUS_REJECTED) + self.assertEqual(data['details'], + 'Some archive(s) and metadata failed the checks.') deposit = Deposit.objects.get(pk=deposit.id) self.assertEquals(deposit.status, DEPOSIT_STATUS_REJECTED) @istest def check_deposit_metadata_ok(self): """Proper deposit should succeed the checks (-> status ready) with all **MUST** metadata using the codemeta metadata test set """ deposit_id = self.create_simple_binary_deposit(status_partial=True) deposit_id_metadata = self.add_metadata_to_deposit(deposit_id) self.assertEquals(deposit_id, deposit_id_metadata) deposit = Deposit.objects.get(pk=deposit_id) self.assertEquals(deposit.status, DEPOSIT_STATUS_READY_FOR_CHECKS) url = reverse(PRIVATE_CHECK_DEPOSIT, args=[self.collection.name, deposit.id]) response = self.client.get(url) self.assertEqual(response.status_code, status.HTTP_200_OK) data = json.loads(response.content.decode('utf-8')) self.assertEqual(data['status'], DEPOSIT_STATUS_READY) deposit = Deposit.objects.get(pk=deposit.id) self.assertEquals(deposit.status, DEPOSIT_STATUS_READY) class CheckMetadata(unittest.TestCase, SWHChecksDeposit): @istest def check_metadata_ok(self): actual_check = self._check_metadata({ 'url': 'something', 'external_identifier': 'something-else', 'name': 'foo', 'author': 'someone', }) self.assertTrue(actual_check) @istest def check_metadata_ok2(self): actual_check = self._check_metadata({ 'url': 'something', 'external_identifier': 'something-else', 'title': 'bar', 'author': 'someone', }) self.assertTrue(actual_check) @istest def check_metadata_ko(self): actual_check = self._check_metadata({ 'url': 'something', 'external_identifier': 'something-else', 'author': 'someone', }) self.assertFalse(actual_check) @istest def check_metadata_ko2(self): actual_check = self._check_metadata({ 'url': 'something', 'external_identifier': 'something-else', 'title': 'foobar', }) self.assertFalse(actual_check) diff --git a/version.txt b/version.txt index b67d3fa4..3598bf6b 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -v0.0.39-0-g5214a45 \ No newline at end of file +v0.0.40-0-g58d4a62 \ No newline at end of file