diff --git a/swh/deposit/loader/tasks.py b/swh/deposit/loader/tasks.py index d5e68a71..b075addb 100644 --- a/swh/deposit/loader/tasks.py +++ b/swh/deposit/loader/tasks.py @@ -1,36 +1,38 @@ # Copyright (C) 2015-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from celery import shared_task from swh.deposit.loader.loader import DepositLoader from swh.deposit.loader.checker import DepositChecker @shared_task(name=__name__ + '.LoadDepositArchiveTsk') -def load_deposit_archive(archive_url, deposit_meta_url, deposit_update_url): +def load_deposit(archive_url, deposit_meta_url, deposit_update_url): """Deposit archive loading task described by the following steps: 1. Retrieve tarball from deposit's private api and store locally in a temporary directory 2. Trigger the loading 3. clean up the temporary directory 4. Update the deposit's status according to result using the deposit's private update status api """ - return DepositLoader().load( + loader = DepositLoader() + return loader.load( archive_url=archive_url, deposit_meta_url=deposit_meta_url, deposit_update_url=deposit_update_url) @shared_task(name=__name__ + '.ChecksDepositTsk') def check_deposit(deposit_check_url): """Check a deposit's status Args: see :func:`DepositChecker.check`. """ - return DepositChecker().check(deposit_check_url) + checker = DepositChecker() + return checker.check(deposit_check_url) diff --git a/swh/deposit/tests/api/test_common.py b/swh/deposit/tests/api/test_common.py deleted file mode 100644 index 74479973..00000000 --- a/swh/deposit/tests/api/test_common.py +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright (C) 2017-2019 The Software Heritage developers -# See the AUTHORS file at the top-level directory of this distribution -# License: GNU General Public License version 3, or any later version -# See top-level LICENSE file for more information - - -from django.urls import reverse -from rest_framework import status -from rest_framework.test import APITestCase - -from ..common import BasicTestCase, WithAuthTestCase - - -class IndexNoAuthCase(APITestCase, BasicTestCase): - """Access to main entry point is ok without authentication - - """ - def test_get_home_is_ok(self): - """Without authentication, endpoint refuses access with 401 response - - """ - url = reverse('home') - response = self.client.get(url) - self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertIn(b'The Software Heritage Deposit', response.content) - - -class IndexWithAuthCase(WithAuthTestCase, APITestCase, BasicTestCase): - """Access to main entry point is ok with authentication as well - - """ - def test_get_home_is_ok_2(self): - """Without authentication, endpoint refuses access with 401 response - - """ - url = reverse('home') - response = self.client.get(url) - self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertIn(b'The Software Heritage Deposit', response.content) diff --git a/swh/deposit/tests/common.py b/swh/deposit/tests/common.py index 4d41a1f2..e4341f59 100644 --- a/swh/deposit/tests/common.py +++ b/swh/deposit/tests/common.py @@ -1,586 +1,138 @@ # Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -import base64 import hashlib import os import re -import shutil import tarfile import tempfile -from django.urls import reverse -from django.test import TestCase -from io import BytesIO -import pytest -from rest_framework import status - -from swh.deposit.config import (COL_IRI, EM_IRI, EDIT_SE_IRI, - DEPOSIT_STATUS_PARTIAL, - DEPOSIT_STATUS_VERIFIED, - DEPOSIT_STATUS_REJECTED, - DEPOSIT_STATUS_DEPOSITED) -from swh.deposit.models import DepositClient, DepositCollection, Deposit -from swh.deposit.models import DepositRequest -from swh.deposit.parsers import parse_xml -from swh.deposit.settings.testing import MEDIA_ROOT from swh.core import tarball def compute_info(archive_path): """Given a path, compute information on path. """ with open(archive_path, 'rb') as f: length = 0 sha1sum = hashlib.sha1() md5sum = hashlib.md5() data = b'' for chunk in f: sha1sum.update(chunk) md5sum.update(chunk) length += len(chunk) data += chunk return { 'dir': os.path.dirname(archive_path), 'name': os.path.basename(archive_path), 'path': archive_path, 'length': length, 'sha1sum': sha1sum.hexdigest(), 'md5sum': md5sum.hexdigest(), 'data': data } def _compress(path, extension, dir_path): """Compress path according to extension """ if extension == 'zip' or extension == 'tar': return tarball.compress(path, extension, dir_path) elif '.' in extension: split_ext = extension.split('.') if split_ext[0] != 'tar': raise ValueError( 'Development error, only zip or tar archive supported, ' '%s not supported' % extension) # deal with specific tar mode = split_ext[1] supported_mode = ['xz', 'gz', 'bz2'] if mode not in supported_mode: raise ValueError( 'Development error, only %s supported, %s not supported' % ( supported_mode, mode)) files = tarball._ls(dir_path) with tarfile.open(path, 'w:%s' % mode) as t: for fpath, fname in files: t.add(fpath, arcname=fname, recursive=False) return path def create_arborescence_archive(root_path, archive_name, filename, content, up_to_size=None, extension='zip'): """Build an archive named archive_name in the root_path. This archive contains one file named filename with the content content. Args: root_path (str): Location path of the archive to create archive_name (str): Archive's name (without extension) filename (str): Archive's content is only one filename content (bytes): Content of the filename up_to_size (int | None): Fill in the blanks size to oversize or complete an archive's size extension (str): Extension of the archive to write (default is zip) Returns: dict with the keys: - dir: the directory of that archive - path: full path to the archive - sha1sum: archive's sha1sum - length: archive's length """ os.makedirs(root_path, exist_ok=True) archive_path_dir = tempfile.mkdtemp(dir=root_path) dir_path = os.path.join(archive_path_dir, archive_name) os.mkdir(dir_path) filepath = os.path.join(dir_path, filename) _length = len(content) count = 0 batch_size = 128 with open(filepath, 'wb') as f: f.write(content) if up_to_size: # fill with blank content up to a given size count += _length while count < up_to_size: f.write(b'0'*batch_size) count += batch_size _path = '%s.%s' % (dir_path, extension) _path = _compress(_path, extension, dir_path) return compute_info(_path) def create_archive_with_archive(root_path, name, archive): """Create an archive holding another. """ invalid_archive_path = os.path.join(root_path, name) with tarfile.open(invalid_archive_path, 'w:gz') as _archive: _archive.add(archive['path'], arcname=archive['name']) return compute_info(invalid_archive_path) -@pytest.mark.fs -class FileSystemCreationRoutine(TestCase): - """Mixin intended for tests needed to tamper with archives. - - """ - def setUp(self): - """Define the test client and other test variables.""" - super().setUp() - self.root_path = '/tmp/swh-deposit/test/build-zip/' - os.makedirs(self.root_path, exist_ok=True) - - self.archive = create_arborescence_archive( - self.root_path, 'archive1', 'file1', b'some content in file') - - self.atom_entry = b""" - - Awesome Compiler - urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a - 1785io25c695 - 2017-10-07T15:17:08Z - some awesome author - https://hal-test.archives-ouvertes.fr - """ - - def tearDown(self): - super().tearDown() - shutil.rmtree(self.root_path) - - def create_simple_binary_deposit(self, status_partial=True): - response = self.client.post( - reverse(COL_IRI, args=[self.collection.name]), - content_type='application/zip', - data=self.archive['data'], - CONTENT_LENGTH=self.archive['length'], - HTTP_MD5SUM=self.archive['md5sum'], - HTTP_SLUG='external-id', - HTTP_IN_PROGRESS=status_partial, - HTTP_CONTENT_DISPOSITION='attachment; filename=%s' % ( - self.archive['name'], )) - - # then - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - response_content = parse_xml(BytesIO(response.content)) - _status = response_content['deposit_status'] - if status_partial: - expected_status = DEPOSIT_STATUS_PARTIAL - else: - expected_status = DEPOSIT_STATUS_VERIFIED - self.assertEqual(_status, expected_status) - deposit_id = int(response_content['deposit_id']) - return deposit_id - - def create_complex_binary_deposit(self, status_partial=False): - deposit_id = self.create_simple_binary_deposit( - status_partial=True) - - # Add a second archive to the deposit - # update its status to DEPOSIT_STATUS_VERIFIED - response = self.client.post( - reverse(EM_IRI, args=[self.collection.name, deposit_id]), - content_type='application/zip', - data=self.archive2['data'], - CONTENT_LENGTH=self.archive2['length'], - HTTP_MD5SUM=self.archive2['md5sum'], - HTTP_SLUG='external-id', - HTTP_IN_PROGRESS=status_partial, - HTTP_CONTENT_DISPOSITION='attachment; filename=filename1.zip') - - # then - assert response.status_code == status.HTTP_201_CREATED - response_content = parse_xml(BytesIO(response.content)) - deposit_id = int(response_content['deposit_id']) - return deposit_id - - def create_deposit_archive_with_archive(self, archive_extension): - # we create the holding archive to a given extension - archive = create_arborescence_archive( - self.root_path, 'archive1', 'file1', b'some content in file', - extension=archive_extension) - - # now we create an archive holding the first created archive - invalid_archive = create_archive_with_archive( - self.root_path, 'invalid.tar.gz', archive) - - # we deposit it - response = self.client.post( - reverse(COL_IRI, args=[self.collection.name]), - content_type='application/x-tar', - data=invalid_archive['data'], - CONTENT_LENGTH=invalid_archive['length'], - HTTP_MD5SUM=invalid_archive['md5sum'], - HTTP_SLUG='external-id', - HTTP_IN_PROGRESS=False, - HTTP_CONTENT_DISPOSITION='attachment; filename=%s' % ( - invalid_archive['name'], )) - - # then - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - response_content = parse_xml(BytesIO(response.content)) - _status = response_content['deposit_status'] - self.assertEqual(_status, DEPOSIT_STATUS_DEPOSITED) - deposit_id = int(response_content['deposit_id']) - return deposit_id - - def update_binary_deposit(self, deposit_id, status_partial=False): - # update existing deposit with atom entry metadata - response = self.client.post( - reverse(EDIT_SE_IRI, args=[self.collection.name, deposit_id]), - content_type='application/atom+xml;type=entry', - data=self.codemeta_entry_data1, - HTTP_SLUG='external-id', - HTTP_IN_PROGRESS=status_partial) - - # then - self.assertEqual(response.status_code, status.HTTP_201_CREATED) - response_content = parse_xml(BytesIO(response.content)) - _status = response_content['deposit_status'] - if status_partial: - expected_status = DEPOSIT_STATUS_PARTIAL - else: - expected_status = DEPOSIT_STATUS_DEPOSITED - self.assertEqual(_status, expected_status) - deposit_id = int(response_content['deposit_id']) - return deposit_id - - -@pytest.mark.fs -class BasicTestCase(TestCase): - """Mixin intended for data setup purposes (user, collection, etc...) - - """ - def setUp(self): - """Define the test client and other test variables.""" - super().setUp() - # expanding diffs in tests - self.maxDiff = None - - # basic minimum test data - - _name = 'hal' - _provider_url = 'https://hal-test.archives-ouvertes.fr/' - _domain = 'archives-ouvertes.fr/' - # set collection up - _collection = DepositCollection(name=_name) - _collection.save() - # set user/client up - _client = DepositClient.objects.create_user(username=_name, - password=_name, - provider_url=_provider_url, - domain=_domain) - _client.collections = [_collection.id] - _client.last_name = _name - _client.save() - - self.collection = _collection - self.user = _client - self.username = _name - self.userpass = _name - - def tearDown(self): - super().tearDown() - # Clean up uploaded files in temporary directory (tests have - # their own media root folder) - if os.path.exists(MEDIA_ROOT): - for d in os.listdir(MEDIA_ROOT): - shutil.rmtree(os.path.join(MEDIA_ROOT, d)) - - -class WithAuthTestCase(TestCase): - """Mixin intended for testing the api with basic authentication. - - """ - def setUp(self): - super().setUp() - _token = '%s:%s' % (self.username, self.userpass) - token = base64.b64encode(_token.encode('utf-8')) - authorization = 'Basic %s' % token.decode('utf-8') - self.client.credentials(HTTP_AUTHORIZATION=authorization) - - def tearDown(self): - super().tearDown() - self.client.credentials() - - -class CommonCreationRoutine(TestCase): - """Mixin class to share initialization routine. - - - cf: - `class`:test_deposit_update.DepositReplaceExistingDataTest - `class`:test_deposit_update.DepositUpdateDepositWithNewDataTest - `class`:test_deposit_update.DepositUpdateFailuresTest - `class`:test_deposit_delete.DepositDeleteTest - - """ - def setUp(self): - super().setUp() - - self.atom_entry_data0 = b""" - - some-external-id - https://hal-test.archives-ouvertes.fr/some-external-id - some awesome author - """ - - self.atom_entry_data1 = b""" - - another one - no one - 2017-10-07T15:17:08Z - """ - - self.atom_entry_data2 = b""" - - Awesome Compiler - urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a - 1785io25c695 - 2017-10-07T15:17:08Z - some awesome author - https://hal-test.archives-ouvertes.fr/id - """ - - self.codemeta_entry_data0 = b""" - - Awesome Compiler - https://hal-test.archives-ouvertes.fr/1785io25c695 - urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a - 1785io25c695 - 2017-10-07T15:17:08Z - some awesome author - description - key-word 1 - """ - - self.codemeta_entry_data1 = b""" - - Composing a Web of Audio Applications - hal - hal-01243065 - hal-01243065 - https://hal-test.archives-ouvertes.fr/hal-01243065 - test - DSP programming,Web - 2017-05-03T16:08:47+02:00 - this is the description - 1 - phpstorm - stable - php - python - C - - GNU General Public License v3.0 only - - - CeCILL Free Software License Agreement v1.1 - - - HAL - hal@ccsd.cnrs.fr - - - Morane Gruenpeter - -""" - - def create_deposit_with_invalid_archive(self, - external_id='some-external-id-1'): - url = reverse(COL_IRI, args=[self.collection.name]) - - data = b'some data which is clearly not a zip file' - md5sum = hashlib.md5(data).hexdigest() - - # when - response = self.client.post( - url, - content_type='application/zip', # as zip - data=data, - # + headers - CONTENT_LENGTH=len(data), - # other headers needs HTTP_ prefix to be taken into account - HTTP_SLUG=external_id, - HTTP_CONTENT_MD5=md5sum, - HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip', - HTTP_CONTENT_DISPOSITION='attachment; filename=filename0') - - response_content = parse_xml(BytesIO(response.content)) - deposit_id = int(response_content['deposit_id']) - return deposit_id - - def create_deposit_with_status( - self, status, - external_id='some-external-id-1', - swh_id=None, - swh_id_context=None, - swh_anchor_id=None, - swh_anchor_id_context=None, - status_detail=None): - # create an invalid deposit which we will update further down the line - deposit_id = self.create_deposit_with_invalid_archive(external_id) - - # We cannot create some form of deposit with a given status in - # test context ('rejected' for example). Update in place the - # deposit with such status to permit some further tests. - deposit = Deposit.objects.get(pk=deposit_id) - if status == DEPOSIT_STATUS_REJECTED: - deposit.status_detail = status_detail - deposit.status = status - if swh_id: - deposit.swh_id = swh_id - if swh_id_context: - deposit.swh_id_context = swh_id_context - if swh_anchor_id: - deposit.swh_anchor_id = swh_anchor_id - if swh_anchor_id_context: - deposit.swh_anchor_id_context = swh_anchor_id_context - deposit.save() - return deposit_id - - def create_simple_deposit_partial(self, external_id='some-external-id'): - """Create a simple deposit (1 request) in `partial` state and returns - its new identifier. - - Returns: - deposit id - - """ - response = self.client.post( - reverse(COL_IRI, args=[self.collection.name]), - content_type='application/atom+xml;type=entry', - data=self.atom_entry_data0, - HTTP_SLUG=external_id, - HTTP_IN_PROGRESS='true') - - assert response.status_code == status.HTTP_201_CREATED - response_content = parse_xml(BytesIO(response.content)) - deposit_id = int(response_content['deposit_id']) - return deposit_id - - def create_deposit_partial_with_data_in_args(self, data): - """Create a simple deposit (1 request) in `partial` state with the data - or metadata as an argument and returns its new identifier. - - Args: - data: atom entry - - Returns: - deposit id - - """ - if isinstance(data, str): - data = data.encode('utf-8') - - response = self.client.post( - reverse(COL_IRI, args=[self.collection.name]), - content_type='application/atom+xml;type=entry', - data=data, - HTTP_SLUG='external-id', - HTTP_IN_PROGRESS='true') - - assert response.status_code == status.HTTP_201_CREATED - response_content = parse_xml(BytesIO(response.content)) - deposit_id = int(response_content['deposit_id']) - return deposit_id - - def _update_deposit_with_status(self, deposit_id, status_partial=False): - """Add to a given deposit another archive and update its current - status to `deposited` (by default). - - Returns: - deposit id - - """ - # when - response = self.client.post( - reverse(EDIT_SE_IRI, args=[self.collection.name, deposit_id]), - content_type='application/atom+xml;type=entry', - data=self.atom_entry_data1, - HTTP_SLUG='external-id', - HTTP_IN_PROGRESS=status_partial) - - # then - assert response.status_code == status.HTTP_201_CREATED - return deposit_id - - def create_deposit_ready(self, external_id='some-external-id'): - """Create a complex deposit (2 requests) in status `deposited`. - - """ - deposit_id = self.create_simple_deposit_partial( - external_id=external_id) - deposit_id = self._update_deposit_with_status(deposit_id) - return deposit_id - - def create_deposit_partial(self, external_id='some-external-id'): - """Create a complex deposit (2 requests) in status `partial`. - - """ - deposit_id = self.create_simple_deposit_partial( - external_id=external_id) - deposit_id = self._update_deposit_with_status( - deposit_id, status_partial=True) - return deposit_id - - def add_metadata_to_deposit(self, deposit_id, status_partial=False): - """Add metadata to deposit. - - """ - # when - response = self.client.post( - reverse(EDIT_SE_IRI, args=[self.collection.name, deposit_id]), - content_type='application/atom+xml;type=entry', - data=self.codemeta_entry_data1, - HTTP_SLUG='external-id', - HTTP_IN_PROGRESS=status_partial) - assert response.status_code == status.HTTP_201_CREATED - # then - deposit = Deposit.objects.get(pk=deposit_id) - assert deposit is not None - - deposit_requests = DepositRequest.objects.filter(deposit=deposit) - assert deposit_requests is not [] - - for dr in deposit_requests: - if dr.type == 'metadata': - assert deposit_requests[0].metadata is not {} - return deposit_id - - def check_archive(archive_name: str, archive_name_to_check: str): """Helper function to ensure archive_name is present within the archive_name_to_check. Raises: AssertionError if archive_name is not present within archive_name_to_check """ if '.' in archive_name: filename, extension = archive_name.split('.') pattern = re.compile('.*/%s.*\\.%s' % (filename, extension)) else: pattern = re.compile('.*/%s' % archive_name) assert pattern.match(archive_name_to_check) is not None diff --git a/swh/deposit/tests/loader/test_tasks.py b/swh/deposit/tests/loader/test_tasks.py index 9d69d43a..c522d2fc 100644 --- a/swh/deposit/tests/loader/test_tasks.py +++ b/swh/deposit/tests/loader/test_tasks.py @@ -1,41 +1,39 @@ # Copyright (C) 2018 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information from unittest.mock import patch @patch('swh.deposit.loader.loader.DepositLoader.load') -def test_deposit_load(mock_loader, swh_app, celery_session_worker): - mock_loader.return_value = {'status': 'eventful'} +def deposit_load(loader, swh_config, swh_app, celery_session_worker): + loader.return_value = {'status': 'eventful'} res = swh_app.send_task( 'swh.deposit.loader.tasks.LoadDepositArchiveTsk', - kwargs=dict(archive_url='archive_url', - deposit_meta_url='deposit_meta_url', - deposit_update_url='deposit_update_url')) + args=('archive_url', 'deposit_meta_url', 'deposit_update_url')) assert res res.wait() assert res.successful() assert res.result == {'status': 'eventful'} - mock_loader.assert_called_once_with( + loader.assert_called_once_with( archive_url='archive_url', deposit_meta_url='deposit_meta_url', deposit_update_url='deposit_update_url') @patch('swh.deposit.loader.checker.DepositChecker.check') -def test_check_deposit(mock_checker, swh_app, celery_session_worker): - mock_checker.return_value = {'status': 'uneventful'} +def deposit_check(checker, swh_config, swh_app, celery_session_worker): + checker.return_value = {'status': 'uneventful'} res = swh_app.send_task( 'swh.deposit.loader.tasks.ChecksDepositTsk', - args=('check_deposit_url',)) + args=['check_deposit_url']) assert res res.wait() assert res.successful() assert res.result == {'status': 'uneventful'} - mock_checker.assert_called_once_with('check_deposit_url') + checker.assert_called_once_with('check_deposit_url')