diff --git a/swh/deposit/api/private/deposit_check.py b/swh/deposit/api/private/deposit_check.py --- a/swh/deposit/api/private/deposit_check.py +++ b/swh/deposit/api/private/deposit_check.py @@ -5,11 +5,10 @@ import json import re - +import tarfile +import zipfile from rest_framework import status -from tarfile import TarFile, is_tarfile -from zipfile import ZipFile, is_zipfile from . import DepositReadMixin from ..common import SWHGetDepositAPI, SWHPrivateAPIView @@ -84,25 +83,23 @@ """ archive_path = archive_request.archive.path try: - if is_zipfile(archive_path): - with ZipFile(archive_path) as f: + if zipfile.is_zipfile(archive_path): + with zipfile.ZipFile(archive_path) as f: files = f.namelist() - elif is_tarfile(archive_path): - with TarFile(archive_path) as f: + elif tarfile.is_tarfile(archive_path): + with tarfile.open(archive_path) as f: files = f.getnames() else: return False, MANDATORY_ARCHIVE_UNSUPPORTED - - if len(files) > 1: - return True, None - - element = files[0] - pattern = re.compile( - r'.*\.(zip|tar|tar.gz|.xz|tar.xz|Z|.tar.Z|bz2|tar.bz2)$') - if pattern.match(element): # invalid archive in archive - return False, MANDATORY_ARCHIVE_INVALID except Exception: return False, MANDATORY_ARCHIVE_UNREADABLE + if len(files) > 1: + return True, None + element = files[0] + pattern = re.compile( + r'.*\.(zip|tar|tar.gz|.xz|tar.xz|Z|.tar.Z|bz2|tar.bz2)$') + if pattern.match(element): # invalid archive in archive + return False, MANDATORY_ARCHIVE_INVALID return True, None def _check_metadata(self, metadata): diff --git a/swh/deposit/tests/api/test_deposit_check.py b/swh/deposit/tests/api/test_deposit_check.py --- a/swh/deposit/tests/api/test_deposit_check.py +++ b/swh/deposit/tests/api/test_deposit_check.py @@ -17,7 +17,7 @@ DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_REJECTED ) from swh.deposit.api.private.deposit_check import ( - SWHChecksDeposit, + SWHChecksDeposit, MANDATORY_ARCHIVE_INVALID, MANDATORY_FIELDS_MISSING, INCOMPATIBLE_URL_FIELDS, MANDATORY_ARCHIVE_UNSUPPORTED, ALTERNATE_FIELDS_MISSING ) @@ -61,7 +61,45 @@ self.assertEquals(deposit.status, DEPOSIT_STATUS_VERIFIED) @istest - def deposit_ko(self): + def deposit_with_tarball_with_one_tarball_invalid(self): + """Deposit with tarball (of 1 tarball) should fail the checks: rejected + + """ + deposit_id = self.create_deposit_archive_with_archive() + + deposit = Deposit.objects.get(pk=deposit_id) + self.assertEquals(DEPOSIT_STATUS_DEPOSITED, deposit.status) + + url = reverse(PRIVATE_CHECK_DEPOSIT, + args=[self.collection.name, deposit.id]) + + response = self.client.get(url) + + self.assertEqual(response.status_code, status.HTTP_200_OK) + data = json.loads(response.content.decode('utf-8')) + self.assertEqual(data['status'], DEPOSIT_STATUS_REJECTED) + details = data['details'] + # archive checks failure + self.assertEqual(len(details['archive']), 1) + self.assertEqual(details['archive'][0]['summary'], + MANDATORY_ARCHIVE_INVALID) + # metadata check failure + self.assertEqual(len(details['metadata']), 2) + mandatory = details['metadata'][0] + self.assertEqual(mandatory['summary'], MANDATORY_FIELDS_MISSING) + self.assertEqual(set(mandatory['fields']), + set(['url', 'external_identifier', 'author'])) + alternate = details['metadata'][1] + self.assertEqual(alternate['summary'], ALTERNATE_FIELDS_MISSING) + self.assertEqual(alternate['fields'], ['name or title']) + # url check failure + self.assertEqual(details['url']['summary'], INCOMPATIBLE_URL_FIELDS) + + deposit = Deposit.objects.get(pk=deposit.id) + self.assertEquals(deposit.status, DEPOSIT_STATUS_REJECTED) + + @istest + def deposit_ko_not_a_valid_tarball(self): """Invalid deposit should fail the checks (-> status rejected) """ diff --git a/swh/deposit/tests/common.py b/swh/deposit/tests/common.py --- a/swh/deposit/tests/common.py +++ b/swh/deposit/tests/common.py @@ -7,6 +7,7 @@ import hashlib import os import shutil +import tarfile import tempfile from django.core.urlresolvers import reverse @@ -28,6 +29,32 @@ from swh.core import tarball +def compute_info(archive_path): + """Given a path, compute information on path. + + """ + with open(archive_path, 'rb') as f: + length = 0 + sha1sum = hashlib.sha1() + md5sum = hashlib.md5() + data = b'' + for chunk in f: + sha1sum.update(chunk) + md5sum.update(chunk) + length += len(chunk) + data += chunk + + return { + 'dir': os.path.dirname(archive_path), + 'name': os.path.basename(archive_path), + 'path': archive_path, + 'length': length, + 'sha1sum': sha1sum.hexdigest(), + 'md5sum': md5sum.hexdigest(), + 'data': data + } + + def create_arborescence_zip(root_path, archive_name, filename, content, up_to_size=None): """Build an archive named archive_name in the root_path. @@ -61,27 +88,17 @@ zip_path = dir_path + '.zip' zip_path = tarball.compress(zip_path, 'zip', dir_path) + return compute_info(zip_path) - with open(zip_path, 'rb') as f: - length = 0 - sha1sum = hashlib.sha1() - md5sum = hashlib.md5() - data = b'' - for chunk in f: - sha1sum.update(chunk) - md5sum.update(chunk) - length += len(chunk) - data += chunk - return { - 'dir': archive_path_dir, - 'name': archive_name, - 'data': data, - 'path': zip_path, - 'sha1sum': sha1sum.hexdigest(), - 'md5sum': md5sum.hexdigest(), - 'length': length, - } +def create_archive_with_archive(root_path, name, archive): + """Create an archive holding another. + + """ + invalid_archive_path = os.path.join(root_path, name) + with tarfile.open(invalid_archive_path, 'w:gz') as _archive: + _archive.add(archive['path'], arcname=archive['name']) + return compute_info(invalid_archive_path) @attr('fs') @@ -158,6 +175,29 @@ deposit_id = int(response_content['deposit_id']) return deposit_id + def create_deposit_archive_with_archive(self): + invalid_archive = create_archive_with_archive( + self.root_path, 'invalid.tar.gz', self.archive) + + response = self.client.post( + reverse(COL_IRI, args=[self.collection.name]), + content_type='application/x-tar', + data=invalid_archive['data'], + CONTENT_LENGTH=invalid_archive['length'], + HTTP_MD5SUM=invalid_archive['md5sum'], + HTTP_SLUG='external-id', + HTTP_IN_PROGRESS=False, + HTTP_CONTENT_DISPOSITION='attachment; filename=%s' % ( + invalid_archive['name'], )) + + # then + self.assertEqual(response.status_code, status.HTTP_201_CREATED) + response_content = parse_xml(BytesIO(response.content)) + _status = response_content['deposit_status'] + self.assertEqual(_status, DEPOSIT_STATUS_DEPOSITED) + deposit_id = int(response_content['deposit_id']) + return deposit_id + def update_binary_deposit(self, deposit_id, status_partial=False): # update existing deposit with atom entry metadata response = self.client.post(