diff --git a/swh/deposit/api/private/deposit_check.py b/swh/deposit/api/private/deposit_check.py new file mode 100644 index 00000000..21eae3d0 --- /dev/null +++ b/swh/deposit/api/private/deposit_check.py @@ -0,0 +1,104 @@ +# Copyright (C) 2017 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import json +import zipfile + +from rest_framework import status + + +from ..common import SWHGetDepositAPI, SWHPrivateAPIView +from ...config import DEPOSIT_STATUS_READY, DEPOSIT_STATUS_REJECTED +from ...config import ARCHIVE_TYPE +from ...models import Deposit, DepositRequest + + +class SWHChecksDeposit(SWHGetDepositAPI, SWHPrivateAPIView): + """Dedicated class to read a deposit's raw archives content. + + Only GET is supported. + + """ + def deposit_requests(self, deposit): + """Given a deposit, yields its associated deposit_request + + Yields: + deposit request + + """ + deposit_requests = DepositRequest.objects.filter( + deposit=deposit).order_by('id') + + for deposit_request in deposit_requests: + yield deposit_request + + def _check_archive(self, archive): + """Check that a given archive is actually ok for reading. + + Args: + archive (File): Archive to check + + Returns: + True if archive is successfully read, False otherwise. + + """ + try: + zf = zipfile.ZipFile(archive.path) + zf.infolist() + except Exception as e: + return False + else: + return True + + def _check_metadata(self, metadata): + """Check to execute on metadata. + + Args: + metadata (): Metadata to actually check + + Returns: + True if metadata is ok, False otherwise. + + """ + # FIXME: Define checks to implement + return True + + def process_get(self, req, collection_name, deposit_id): + """Build a unique tarball from the multiple received and stream that + content to the client. + + Args: + req (Request): + collection_name (str): Collection owning the deposit + deposit_id (id): Deposit concerned by the reading + + Returns: + Tuple status, stream of content, content-type + + """ + deposit = Deposit.objects.get(pk=deposit_id) + # will check each deposit request for the deposit + for dr in self.deposit_requests(deposit): + if dr.type.name == ARCHIVE_TYPE: + deposit_status = self._check_archive(dr.archive) + else: + deposit_status = self._check_metadata(dr.metadata) + + if not deposit_status: + break + + # if problem in any deposit requests, the deposit is rejected + if not deposit_status: + deposit.status = DEPOSIT_STATUS_REJECTED + else: + deposit.status = DEPOSIT_STATUS_READY + + deposit.save() + + return (status.HTTP_200_OK, + json.dumps({ + 'status': deposit.status + }), + 'application/json') diff --git a/swh/deposit/api/urls.py b/swh/deposit/api/urls.py index 69fd612b..98250ca9 100644 --- a/swh/deposit/api/urls.py +++ b/swh/deposit/api/urls.py @@ -1,91 +1,97 @@ # Copyright (C) 2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information """swh URL Configuration The `urlpatterns` list routes URLs to views. For more information please see: https://docs.djangoproject.com/en/1.10/topics/http/urls/ Examples: Function views 1. Add an import: from my_app import views 2. Add a URL to urlpatterns: url(r'^$', views.home, name='home') Class-based views 1. Add an import: from other_app.views import Home 2. Add a URL to urlpatterns: url(r'^$', Home.as_view(), name='home') Including another URLconf 1. Import the include() function: from django.conf.urls import url, include 2. Add a URL to urlpatterns: url(r'^blog/', include('blog.urls')) """ from django.conf.urls import url from ..config import EDIT_SE_IRI, EM_IRI, CONT_FILE_IRI from ..config import SD_IRI, COL_IRI, STATE_IRI, PRIVATE_GET_RAW_CONTENT from ..config import PRIVATE_PUT_DEPOSIT, PRIVATE_GET_DEPOSIT_METADATA +from ..config import PRIVATE_CHECK_DEPOSIT from .deposit import SWHDeposit from .deposit_status import SWHDepositStatus from .deposit_update import SWHUpdateMetadataDeposit from .deposit_update import SWHUpdateArchiveDeposit from .deposit_content import SWHDepositContent from .service_document import SWHServiceDocument from .private.deposit_read import SWHDepositReadArchives from .private.deposit_read import SWHDepositReadMetadata from .private.deposit_update_status import SWHUpdateStatusDeposit - +from .private.deposit_check import SWHChecksDeposit urlpatterns = [ # PUBLIC API # SD IRI - Service Document IRI # -> GET url(r'^servicedocument/', SWHServiceDocument.as_view(), name=SD_IRI), # Col IRI - Collection IRI # -> POST url(r'^(?P[^/]+)/$', SWHDeposit.as_view(), name=COL_IRI), # EM IRI - Atom Edit Media IRI (update archive IRI) # -> PUT (update-in-place existing archive) # -> POST (add new archive) url(r'^(?P[^/]+)/(?P[^/]+)/media/$', SWHUpdateArchiveDeposit.as_view(), name=EM_IRI), # Edit IRI - Atom Entry Edit IRI (update metadata IRI) # SE IRI - Sword Edit IRI ;; possibly same as Edit IRI # -> PUT (update in place) # -> POST (add new metadata) url(r'^(?P[^/]+)/(?P[^/]+)/metadata/$', SWHUpdateMetadataDeposit.as_view(), name=EDIT_SE_IRI), # State IRI # -> GET url(r'^(?P[^/]+)/(?P[^/]+)/status/$', SWHDepositStatus.as_view(), name=STATE_IRI), # Cont/File IRI # -> GET url(r'^(?P[^/]+)/(?P[^/]+)/content/$', SWHDepositContent.as_view(), name=CONT_FILE_IRI), # specification is not clear about # FILE-IRI, we assume it's the same as # the CONT-IRI one # PRIVATE API # Retrieve deposit's raw archives' content # -> GET url(r'^(?P[^/]+)/(?P[^/]+)/raw/$', SWHDepositReadArchives.as_view(), name=PRIVATE_GET_RAW_CONTENT), # Update deposit's status # -> PUT url(r'^(?P[^/]+)/(?P[^/]+)/update/$', SWHUpdateStatusDeposit.as_view(), name=PRIVATE_PUT_DEPOSIT), # Retrieve metadata information on a specific deposit # -> GET url(r'^(?P[^/]+)/(?P[^/]+)/meta/$', SWHDepositReadMetadata.as_view(), name=PRIVATE_GET_DEPOSIT_METADATA), + # Check archive and metadata information on a specific deposit + # -> GET + url(r'^(?P[^/]+)/(?P[^/]+)/check/$', + SWHChecksDeposit.as_view(), + name=PRIVATE_CHECK_DEPOSIT), ] diff --git a/swh/deposit/tests/api/test_deposit_check.py b/swh/deposit/tests/api/test_deposit_check.py new file mode 100644 index 00000000..e03a4181 --- /dev/null +++ b/swh/deposit/tests/api/test_deposit_check.py @@ -0,0 +1,71 @@ +# Copyright (C) 2017 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import json + +from django.core.urlresolvers import reverse +from nose.tools import istest +from nose.plugins.attrib import attr +from rest_framework import status +from rest_framework.test import APITestCase + +from ...models import Deposit +from ...config import DEPOSIT_STATUS_READY, PRIVATE_CHECK_DEPOSIT +from ...config import DEPOSIT_STATUS_READY_FOR_CHECKS, DEPOSIT_STATUS_REJECTED +from ..common import BasicTestCase, WithAuthTestCase, CommonCreationRoutine +from ..common import FileSystemCreationRoutine + + +@attr('fs') +class CheckDepositTest(APITestCase, WithAuthTestCase, + BasicTestCase, CommonCreationRoutine, + FileSystemCreationRoutine): + """Check deposit endpoints. + + """ + def setUp(self): + super().setUp() + + @istest + def deposit_ok(self): + """Proper deposit should succeed the checks (-> status ready) + + """ + deposit_id = self.create_simple_binary_deposit(status_partial=False) + + deposit = Deposit.objects.get(pk=deposit_id) + self.assertEquals(deposit.status, DEPOSIT_STATUS_READY_FOR_CHECKS) + + url = reverse(PRIVATE_CHECK_DEPOSIT, + args=[self.collection.name, deposit.id]) + + response = self.client.get(url) + + self.assertEqual(response.status_code, status.HTTP_200_OK) + data = json.loads(response.content.decode('utf-8')) + self.assertEqual(data['status'], DEPOSIT_STATUS_READY) + deposit = Deposit.objects.get(pk=deposit.id) + self.assertEquals(deposit.status, DEPOSIT_STATUS_READY) + + @istest + def deposit_ko(self): + """Invalid deposit should fail the checks (-> status rejected) + + """ + deposit_id = self.create_invalid_deposit() + + deposit = Deposit.objects.get(pk=deposit_id) + self.assertEquals(deposit.status, DEPOSIT_STATUS_READY_FOR_CHECKS) + + url = reverse(PRIVATE_CHECK_DEPOSIT, + args=[self.collection.name, deposit.id]) + + response = self.client.get(url) + + self.assertEqual(response.status_code, status.HTTP_200_OK) + data = json.loads(response.content.decode('utf-8')) + self.assertEqual(data['status'], DEPOSIT_STATUS_REJECTED) + deposit = Deposit.objects.get(pk=deposit.id) + self.assertEquals(deposit.status, DEPOSIT_STATUS_REJECTED) diff --git a/swh/deposit/tests/common.py b/swh/deposit/tests/common.py index 7663ce55..4e0ccccc 100644 --- a/swh/deposit/tests/common.py +++ b/swh/deposit/tests/common.py @@ -1,419 +1,426 @@ # Copyright (C) 2017 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information import base64 import hashlib import os import shutil import tempfile from django.core.urlresolvers import reverse from django.test import TestCase from io import BytesIO from nose.plugins.attrib import attr from rest_framework import status from swh.deposit.config import COL_IRI, EM_IRI, EDIT_SE_IRI from swh.deposit.config import DEPOSIT_STATUS_REJECTED from swh.deposit.models import DepositClient, DepositCollection, Deposit from swh.deposit.models import DepositRequest from swh.deposit.models import DepositRequestType from swh.deposit.parsers import parse_xml from swh.deposit.settings.testing import MEDIA_ROOT from swh.loader.tar import tarball def create_arborescence_zip(root_path, archive_name, filename, content, up_to_size=None): """Build an archive named archive_name in the root_path. This archive contains one file named filename with the content content. Returns: dict with the keys: - dir: the directory of that archive - path: full path to the archive - sha1sum: archive's sha1sum - length: archive's length """ os.makedirs(root_path, exist_ok=True) archive_path_dir = tempfile.mkdtemp(dir=root_path) dir_path = os.path.join(archive_path_dir, archive_name) os.mkdir(dir_path) filepath = os.path.join(dir_path, filename) l = len(content) count = 0 batch_size = 128 with open(filepath, 'wb') as f: f.write(content) if up_to_size: # fill with blank content up to a given size count += l while count < up_to_size: f.write(b'0'*batch_size) count += batch_size zip_path = dir_path + '.zip' zip_path = tarball.compress(zip_path, 'zip', dir_path) with open(zip_path, 'rb') as f: length = 0 sha1sum = hashlib.sha1() md5sum = hashlib.md5() data = b'' for chunk in f: sha1sum.update(chunk) md5sum.update(chunk) length += len(chunk) data += chunk return { 'dir': archive_path_dir, 'name': archive_name, 'data': data, 'path': zip_path, 'sha1sum': sha1sum.hexdigest(), 'md5sum': md5sum.hexdigest(), 'length': length, } @attr('fs') class FileSystemCreationRoutine(TestCase): """Mixin intended for tests needed to tamper with archives. """ def setUp(self): """Define the test client and other test variables.""" super().setUp() self.root_path = '/tmp/swh-deposit/test/build-zip/' os.makedirs(self.root_path, exist_ok=True) self.archive = create_arborescence_zip( self.root_path, 'archive1', 'file1', b'some content in file') def tearDown(self): super().tearDown() shutil.rmtree(self.root_path) def create_simple_binary_deposit(self, status_partial=True): response = self.client.post( reverse(COL_IRI, args=[self.collection.name]), content_type='application/zip', data=self.archive['data'], CONTENT_LENGTH=self.archive['length'], HTTP_MD5SUM=self.archive['md5sum'], HTTP_SLUG='external-id', HTTP_IN_PROGRESS=status_partial, HTTP_CONTENT_DISPOSITION='attachment; filename=%s' % ( self.archive['name'], )) # then assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) deposit_id = response_content[ '{http://www.w3.org/2005/Atom}deposit_id'] return deposit_id def create_complex_binary_deposit(self, status_partial=False): deposit_id = self.create_simple_binary_deposit( status_partial=True) # Add a second archive to the deposit # update its status to DEPOSIT_STATUS_READY response = self.client.post( reverse(EM_IRI, args=[self.collection.name, deposit_id]), content_type='application/zip', data=self.archive2['data'], CONTENT_LENGTH=self.archive2['length'], HTTP_MD5SUM=self.archive2['md5sum'], HTTP_SLUG='external-id', HTTP_IN_PROGRESS=status_partial, HTTP_CONTENT_DISPOSITION='attachment; filename=filename1.zip') # then assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) deposit_id = response_content[ '{http://www.w3.org/2005/Atom}deposit_id'] return deposit_id @attr('fs') class BasicTestCase(TestCase): """Mixin intended for data setup purposes (user, collection, etc...) """ def setUp(self): """Define the test client and other test variables.""" super().setUp() # expanding diffs in tests self.maxDiff = None # basic minimum test data deposit_request_types = {} # Add deposit request types for deposit_request_type in ['archive', 'metadata']: drt = DepositRequestType(name=deposit_request_type) drt.save() deposit_request_types[deposit_request_type] = drt _name = 'hal' _url = 'https://hal.test.fr/' # set collection up _collection = DepositCollection(name=_name) _collection.save() # set user/client up _client = DepositClient.objects.create_user(username=_name, password=_name, url=_url) _client.collections = [_collection.id] _client.save() self.collection = _collection self.user = _client self.username = _name self.userpass = _name self.deposit_request_types = deposit_request_types def tearDown(self): super().tearDown() # Clean up uploaded files in temporary directory (tests have # their own media root folder) if os.path.exists(MEDIA_ROOT): for d in os.listdir(MEDIA_ROOT): shutil.rmtree(os.path.join(MEDIA_ROOT, d)) class WithAuthTestCase(TestCase): """Mixin intended for testing the api with basic authentication. """ def setUp(self): super().setUp() _token = '%s:%s' % (self.username, self.userpass) token = base64.b64encode(_token.encode('utf-8')) authorization = 'Basic %s' % token.decode('utf-8') self.client.credentials(HTTP_AUTHORIZATION=authorization) def tearDown(self): super().tearDown() self.client.credentials() class CommonCreationRoutine(TestCase): """Mixin class to share initialization routine. cf: `class`:test_deposit_update.DepositReplaceExistingDataTest `class`:test_deposit_update.DepositUpdateDepositWithNewDataTest `class`:test_deposit_update.DepositUpdateFailuresTest `class`:test_deposit_delete.DepositDeleteTest """ def setUp(self): super().setUp() self.atom_entry_data0 = b""" some-external-id """ self.atom_entry_data1 = b""" anotherthing """ self.atom_entry_data2 = b""" Awesome Compiler urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a 1785io25c695 2017-10-07T15:17:08Z some awesome author """ self.codemeta_entry_data0 = b""" Awesome Compiler urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a 1785io25c695 2017-10-07T15:17:08Z some awesome author description key-word 1 """ self.codemeta_entry_data1 = b""" Composing a Web of Audio Applications hal hal-01243065 hal-01243065 https://hal-test.archives-ouvertes.fr/hal-01243065 test DSP programming,Web 2017-05-03T16:08:47+02:00 this is the description 1 phpstorm stable php python C GNU General Public License v3.0 only CeCILL Free Software License Agreement v1.1 HAL hal@ccsd.cnrs.fr """ - def create_deposit_with_status_rejected(self): + def create_invalid_deposit(self): url = reverse(COL_IRI, args=[self.collection.name]) data = b'some data which is clearly not a zip file' md5sum = hashlib.md5(data).hexdigest() external_id = 'some-external-id-1' # when response = self.client.post( url, content_type='application/zip', # as zip data=data, # + headers CONTENT_LENGTH=len(data), # other headers needs HTTP_ prefix to be taken into account HTTP_SLUG=external_id, HTTP_CONTENT_MD5=md5sum, HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', HTTP_CONTENT_DISPOSITION='attachment; filename=filename0') response_content = parse_xml(BytesIO(response.content)) deposit_id = response_content[ '{http://www.w3.org/2005/Atom}deposit_id'] - # As we cannot create a rejected deposit in test context - # update in place the deposit with such status + return deposit_id + + def create_deposit_with_status_rejected(self): + deposit_id = self.create_invalid_deposit() + + # We cannot create rejected deposit in test context (we + # flipped off the checks in the configuration so all deposits + # have the status ready-for-checks). Update in place the + # deposit with such status deposit = Deposit.objects.get(pk=deposit_id) deposit.status = DEPOSIT_STATUS_REJECTED deposit.save() return deposit_id def create_simple_deposit_partial(self): """Create a simple deposit (1 request) in `partial` state and returns its new identifier. Returns: deposit id """ response = self.client.post( reverse(COL_IRI, args=[self.collection.name]), content_type='application/atom+xml;type=entry', data=self.atom_entry_data0, HTTP_SLUG='external-id', HTTP_IN_PROGRESS='true') assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) deposit_id = response_content[ '{http://www.w3.org/2005/Atom}deposit_id'] return deposit_id def create_deposit_partial_with_data_in_args(self, data): """Create a simple deposit (1 request) in `partial` state with the data or metadata as an argument and returns its new identifier. Args: data: atom entry Returns: deposit id """ response = self.client.post( reverse(COL_IRI, args=[self.collection.name]), content_type='application/atom+xml;type=entry', data=data, HTTP_SLUG='external-id', HTTP_IN_PROGRESS='true') assert response.status_code == status.HTTP_201_CREATED response_content = parse_xml(BytesIO(response.content)) deposit_id = response_content[ '{http://www.w3.org/2005/Atom}deposit_id'] return deposit_id def _update_deposit_with_status(self, deposit_id, status_partial=False): """Add to a given deposit another archive and update its current status to `ready` (by default). Returns: deposit id """ # when response = self.client.post( reverse(EDIT_SE_IRI, args=[self.collection.name, deposit_id]), content_type='application/atom+xml;type=entry', data=self.atom_entry_data1, HTTP_SLUG='external-id', HTTP_IN_PROGRESS=status_partial) # then assert response.status_code == status.HTTP_201_CREATED return deposit_id def create_deposit_ready(self): """Create a complex deposit (2 requests) in status `ready`. """ deposit_id = self.create_simple_deposit_partial() deposit_id = self._update_deposit_with_status(deposit_id) return deposit_id def create_deposit_partial(self): """Create a complex deposit (2 requests) in status `partial`. """ deposit_id = self.create_simple_deposit_partial() deposit_id = self._update_deposit_with_status( deposit_id, status_partial=True) return deposit_id def add_metadata_to_deposit(self, deposit_id, status_partial=False): """Add metadata to deposit. """ # when response = self.client.post( reverse(EDIT_SE_IRI, args=[self.collection.name, deposit_id]), content_type='application/atom+xml;type=entry', data=self.codemeta_entry_data1, HTTP_SLUG='external-id', HTTP_IN_PROGRESS=status_partial) assert response.status_code == status.HTTP_201_CREATED # then deposit = Deposit.objects.get(pk=deposit_id) assert deposit is not None deposit_requests = DepositRequest.objects.filter(deposit=deposit) assert deposit_requests is not [] for dr in deposit_requests: if dr.type.name == 'metadata': assert deposit_requests[0].metadata is not {} return deposit_id