diff --git a/swh/deposit/loader/tasks.py b/swh/deposit/loader/tasks.py
index d5e68a71..b075addb 100644
--- a/swh/deposit/loader/tasks.py
+++ b/swh/deposit/loader/tasks.py
@@ -1,36 +1,38 @@
# Copyright (C) 2015-2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from celery import shared_task
from swh.deposit.loader.loader import DepositLoader
from swh.deposit.loader.checker import DepositChecker
@shared_task(name=__name__ + '.LoadDepositArchiveTsk')
-def load_deposit_archive(archive_url, deposit_meta_url, deposit_update_url):
+def load_deposit(archive_url, deposit_meta_url, deposit_update_url):
"""Deposit archive loading task described by the following steps:
1. Retrieve tarball from deposit's private api and store
locally in a temporary directory
2. Trigger the loading
3. clean up the temporary directory
4. Update the deposit's status according to result using the
deposit's private update status api
"""
- return DepositLoader().load(
+ loader = DepositLoader()
+ return loader.load(
archive_url=archive_url,
deposit_meta_url=deposit_meta_url,
deposit_update_url=deposit_update_url)
@shared_task(name=__name__ + '.ChecksDepositTsk')
def check_deposit(deposit_check_url):
"""Check a deposit's status
Args: see :func:`DepositChecker.check`.
"""
- return DepositChecker().check(deposit_check_url)
+ checker = DepositChecker()
+ return checker.check(deposit_check_url)
diff --git a/swh/deposit/tests/api/test_common.py b/swh/deposit/tests/api/test_common.py
deleted file mode 100644
index 74479973..00000000
--- a/swh/deposit/tests/api/test_common.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright (C) 2017-2019 The Software Heritage developers
-# See the AUTHORS file at the top-level directory of this distribution
-# License: GNU General Public License version 3, or any later version
-# See top-level LICENSE file for more information
-
-
-from django.urls import reverse
-from rest_framework import status
-from rest_framework.test import APITestCase
-
-from ..common import BasicTestCase, WithAuthTestCase
-
-
-class IndexNoAuthCase(APITestCase, BasicTestCase):
- """Access to main entry point is ok without authentication
-
- """
- def test_get_home_is_ok(self):
- """Without authentication, endpoint refuses access with 401 response
-
- """
- url = reverse('home')
- response = self.client.get(url)
- self.assertEqual(response.status_code, status.HTTP_200_OK)
- self.assertIn(b'The Software Heritage Deposit', response.content)
-
-
-class IndexWithAuthCase(WithAuthTestCase, APITestCase, BasicTestCase):
- """Access to main entry point is ok with authentication as well
-
- """
- def test_get_home_is_ok_2(self):
- """Without authentication, endpoint refuses access with 401 response
-
- """
- url = reverse('home')
- response = self.client.get(url)
- self.assertEqual(response.status_code, status.HTTP_200_OK)
- self.assertIn(b'The Software Heritage Deposit', response.content)
diff --git a/swh/deposit/tests/common.py b/swh/deposit/tests/common.py
index 4d41a1f2..e4341f59 100644
--- a/swh/deposit/tests/common.py
+++ b/swh/deposit/tests/common.py
@@ -1,586 +1,138 @@
# Copyright (C) 2017-2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
-import base64
import hashlib
import os
import re
-import shutil
import tarfile
import tempfile
-from django.urls import reverse
-from django.test import TestCase
-from io import BytesIO
-import pytest
-from rest_framework import status
-
-from swh.deposit.config import (COL_IRI, EM_IRI, EDIT_SE_IRI,
- DEPOSIT_STATUS_PARTIAL,
- DEPOSIT_STATUS_VERIFIED,
- DEPOSIT_STATUS_REJECTED,
- DEPOSIT_STATUS_DEPOSITED)
-from swh.deposit.models import DepositClient, DepositCollection, Deposit
-from swh.deposit.models import DepositRequest
-from swh.deposit.parsers import parse_xml
-from swh.deposit.settings.testing import MEDIA_ROOT
from swh.core import tarball
def compute_info(archive_path):
"""Given a path, compute information on path.
"""
with open(archive_path, 'rb') as f:
length = 0
sha1sum = hashlib.sha1()
md5sum = hashlib.md5()
data = b''
for chunk in f:
sha1sum.update(chunk)
md5sum.update(chunk)
length += len(chunk)
data += chunk
return {
'dir': os.path.dirname(archive_path),
'name': os.path.basename(archive_path),
'path': archive_path,
'length': length,
'sha1sum': sha1sum.hexdigest(),
'md5sum': md5sum.hexdigest(),
'data': data
}
def _compress(path, extension, dir_path):
"""Compress path according to extension
"""
if extension == 'zip' or extension == 'tar':
return tarball.compress(path, extension, dir_path)
elif '.' in extension:
split_ext = extension.split('.')
if split_ext[0] != 'tar':
raise ValueError(
'Development error, only zip or tar archive supported, '
'%s not supported' % extension)
# deal with specific tar
mode = split_ext[1]
supported_mode = ['xz', 'gz', 'bz2']
if mode not in supported_mode:
raise ValueError(
'Development error, only %s supported, %s not supported' % (
supported_mode, mode))
files = tarball._ls(dir_path)
with tarfile.open(path, 'w:%s' % mode) as t:
for fpath, fname in files:
t.add(fpath, arcname=fname, recursive=False)
return path
def create_arborescence_archive(root_path, archive_name, filename, content,
up_to_size=None, extension='zip'):
"""Build an archive named archive_name in the root_path.
This archive contains one file named filename with the content content.
Args:
root_path (str): Location path of the archive to create
archive_name (str): Archive's name (without extension)
filename (str): Archive's content is only one filename
content (bytes): Content of the filename
up_to_size (int | None): Fill in the blanks size to oversize
or complete an archive's size
extension (str): Extension of the archive to write (default is zip)
Returns:
dict with the keys:
- dir: the directory of that archive
- path: full path to the archive
- sha1sum: archive's sha1sum
- length: archive's length
"""
os.makedirs(root_path, exist_ok=True)
archive_path_dir = tempfile.mkdtemp(dir=root_path)
dir_path = os.path.join(archive_path_dir, archive_name)
os.mkdir(dir_path)
filepath = os.path.join(dir_path, filename)
_length = len(content)
count = 0
batch_size = 128
with open(filepath, 'wb') as f:
f.write(content)
if up_to_size: # fill with blank content up to a given size
count += _length
while count < up_to_size:
f.write(b'0'*batch_size)
count += batch_size
_path = '%s.%s' % (dir_path, extension)
_path = _compress(_path, extension, dir_path)
return compute_info(_path)
def create_archive_with_archive(root_path, name, archive):
"""Create an archive holding another.
"""
invalid_archive_path = os.path.join(root_path, name)
with tarfile.open(invalid_archive_path, 'w:gz') as _archive:
_archive.add(archive['path'], arcname=archive['name'])
return compute_info(invalid_archive_path)
-@pytest.mark.fs
-class FileSystemCreationRoutine(TestCase):
- """Mixin intended for tests needed to tamper with archives.
-
- """
- def setUp(self):
- """Define the test client and other test variables."""
- super().setUp()
- self.root_path = '/tmp/swh-deposit/test/build-zip/'
- os.makedirs(self.root_path, exist_ok=True)
-
- self.archive = create_arborescence_archive(
- self.root_path, 'archive1', 'file1', b'some content in file')
-
- self.atom_entry = b"""
-
- Awesome Compiler
- urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a
- 1785io25c695
- 2017-10-07T15:17:08Z
- some awesome author
- https://hal-test.archives-ouvertes.fr
- """
-
- def tearDown(self):
- super().tearDown()
- shutil.rmtree(self.root_path)
-
- def create_simple_binary_deposit(self, status_partial=True):
- response = self.client.post(
- reverse(COL_IRI, args=[self.collection.name]),
- content_type='application/zip',
- data=self.archive['data'],
- CONTENT_LENGTH=self.archive['length'],
- HTTP_MD5SUM=self.archive['md5sum'],
- HTTP_SLUG='external-id',
- HTTP_IN_PROGRESS=status_partial,
- HTTP_CONTENT_DISPOSITION='attachment; filename=%s' % (
- self.archive['name'], ))
-
- # then
- self.assertEqual(response.status_code, status.HTTP_201_CREATED)
- response_content = parse_xml(BytesIO(response.content))
- _status = response_content['deposit_status']
- if status_partial:
- expected_status = DEPOSIT_STATUS_PARTIAL
- else:
- expected_status = DEPOSIT_STATUS_VERIFIED
- self.assertEqual(_status, expected_status)
- deposit_id = int(response_content['deposit_id'])
- return deposit_id
-
- def create_complex_binary_deposit(self, status_partial=False):
- deposit_id = self.create_simple_binary_deposit(
- status_partial=True)
-
- # Add a second archive to the deposit
- # update its status to DEPOSIT_STATUS_VERIFIED
- response = self.client.post(
- reverse(EM_IRI, args=[self.collection.name, deposit_id]),
- content_type='application/zip',
- data=self.archive2['data'],
- CONTENT_LENGTH=self.archive2['length'],
- HTTP_MD5SUM=self.archive2['md5sum'],
- HTTP_SLUG='external-id',
- HTTP_IN_PROGRESS=status_partial,
- HTTP_CONTENT_DISPOSITION='attachment; filename=filename1.zip')
-
- # then
- assert response.status_code == status.HTTP_201_CREATED
- response_content = parse_xml(BytesIO(response.content))
- deposit_id = int(response_content['deposit_id'])
- return deposit_id
-
- def create_deposit_archive_with_archive(self, archive_extension):
- # we create the holding archive to a given extension
- archive = create_arborescence_archive(
- self.root_path, 'archive1', 'file1', b'some content in file',
- extension=archive_extension)
-
- # now we create an archive holding the first created archive
- invalid_archive = create_archive_with_archive(
- self.root_path, 'invalid.tar.gz', archive)
-
- # we deposit it
- response = self.client.post(
- reverse(COL_IRI, args=[self.collection.name]),
- content_type='application/x-tar',
- data=invalid_archive['data'],
- CONTENT_LENGTH=invalid_archive['length'],
- HTTP_MD5SUM=invalid_archive['md5sum'],
- HTTP_SLUG='external-id',
- HTTP_IN_PROGRESS=False,
- HTTP_CONTENT_DISPOSITION='attachment; filename=%s' % (
- invalid_archive['name'], ))
-
- # then
- self.assertEqual(response.status_code, status.HTTP_201_CREATED)
- response_content = parse_xml(BytesIO(response.content))
- _status = response_content['deposit_status']
- self.assertEqual(_status, DEPOSIT_STATUS_DEPOSITED)
- deposit_id = int(response_content['deposit_id'])
- return deposit_id
-
- def update_binary_deposit(self, deposit_id, status_partial=False):
- # update existing deposit with atom entry metadata
- response = self.client.post(
- reverse(EDIT_SE_IRI, args=[self.collection.name, deposit_id]),
- content_type='application/atom+xml;type=entry',
- data=self.codemeta_entry_data1,
- HTTP_SLUG='external-id',
- HTTP_IN_PROGRESS=status_partial)
-
- # then
- self.assertEqual(response.status_code, status.HTTP_201_CREATED)
- response_content = parse_xml(BytesIO(response.content))
- _status = response_content['deposit_status']
- if status_partial:
- expected_status = DEPOSIT_STATUS_PARTIAL
- else:
- expected_status = DEPOSIT_STATUS_DEPOSITED
- self.assertEqual(_status, expected_status)
- deposit_id = int(response_content['deposit_id'])
- return deposit_id
-
-
-@pytest.mark.fs
-class BasicTestCase(TestCase):
- """Mixin intended for data setup purposes (user, collection, etc...)
-
- """
- def setUp(self):
- """Define the test client and other test variables."""
- super().setUp()
- # expanding diffs in tests
- self.maxDiff = None
-
- # basic minimum test data
-
- _name = 'hal'
- _provider_url = 'https://hal-test.archives-ouvertes.fr/'
- _domain = 'archives-ouvertes.fr/'
- # set collection up
- _collection = DepositCollection(name=_name)
- _collection.save()
- # set user/client up
- _client = DepositClient.objects.create_user(username=_name,
- password=_name,
- provider_url=_provider_url,
- domain=_domain)
- _client.collections = [_collection.id]
- _client.last_name = _name
- _client.save()
-
- self.collection = _collection
- self.user = _client
- self.username = _name
- self.userpass = _name
-
- def tearDown(self):
- super().tearDown()
- # Clean up uploaded files in temporary directory (tests have
- # their own media root folder)
- if os.path.exists(MEDIA_ROOT):
- for d in os.listdir(MEDIA_ROOT):
- shutil.rmtree(os.path.join(MEDIA_ROOT, d))
-
-
-class WithAuthTestCase(TestCase):
- """Mixin intended for testing the api with basic authentication.
-
- """
- def setUp(self):
- super().setUp()
- _token = '%s:%s' % (self.username, self.userpass)
- token = base64.b64encode(_token.encode('utf-8'))
- authorization = 'Basic %s' % token.decode('utf-8')
- self.client.credentials(HTTP_AUTHORIZATION=authorization)
-
- def tearDown(self):
- super().tearDown()
- self.client.credentials()
-
-
-class CommonCreationRoutine(TestCase):
- """Mixin class to share initialization routine.
-
-
- cf:
- `class`:test_deposit_update.DepositReplaceExistingDataTest
- `class`:test_deposit_update.DepositUpdateDepositWithNewDataTest
- `class`:test_deposit_update.DepositUpdateFailuresTest
- `class`:test_deposit_delete.DepositDeleteTest
-
- """
- def setUp(self):
- super().setUp()
-
- self.atom_entry_data0 = b"""
-
- some-external-id
- https://hal-test.archives-ouvertes.fr/some-external-id
- some awesome author
- """
-
- self.atom_entry_data1 = b"""
-
- another one
- no one
- 2017-10-07T15:17:08Z
- """
-
- self.atom_entry_data2 = b"""
-
- Awesome Compiler
- urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a
- 1785io25c695
- 2017-10-07T15:17:08Z
- some awesome author
- https://hal-test.archives-ouvertes.fr/id
- """
-
- self.codemeta_entry_data0 = b"""
-
- Awesome Compiler
- https://hal-test.archives-ouvertes.fr/1785io25c695
- urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a
- 1785io25c695
- 2017-10-07T15:17:08Z
- some awesome author
- description
- key-word 1
- """
-
- self.codemeta_entry_data1 = b"""
-
- Composing a Web of Audio Applications
- hal
- hal-01243065
- hal-01243065
- https://hal-test.archives-ouvertes.fr/hal-01243065
- test
- DSP programming,Web
- 2017-05-03T16:08:47+02:00
- this is the description
- 1
- phpstorm
- stable
- php
- python
- C
-
- GNU General Public License v3.0 only
-
-
- CeCILL Free Software License Agreement v1.1
-
-
- HAL
- hal@ccsd.cnrs.fr
-
-
- Morane Gruenpeter
-
-"""
-
- def create_deposit_with_invalid_archive(self,
- external_id='some-external-id-1'):
- url = reverse(COL_IRI, args=[self.collection.name])
-
- data = b'some data which is clearly not a zip file'
- md5sum = hashlib.md5(data).hexdigest()
-
- # when
- response = self.client.post(
- url,
- content_type='application/zip', # as zip
- data=data,
- # + headers
- CONTENT_LENGTH=len(data),
- # other headers needs HTTP_ prefix to be taken into account
- HTTP_SLUG=external_id,
- HTTP_CONTENT_MD5=md5sum,
- HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip',
- HTTP_CONTENT_DISPOSITION='attachment; filename=filename0')
-
- response_content = parse_xml(BytesIO(response.content))
- deposit_id = int(response_content['deposit_id'])
- return deposit_id
-
- def create_deposit_with_status(
- self, status,
- external_id='some-external-id-1',
- swh_id=None,
- swh_id_context=None,
- swh_anchor_id=None,
- swh_anchor_id_context=None,
- status_detail=None):
- # create an invalid deposit which we will update further down the line
- deposit_id = self.create_deposit_with_invalid_archive(external_id)
-
- # We cannot create some form of deposit with a given status in
- # test context ('rejected' for example). Update in place the
- # deposit with such status to permit some further tests.
- deposit = Deposit.objects.get(pk=deposit_id)
- if status == DEPOSIT_STATUS_REJECTED:
- deposit.status_detail = status_detail
- deposit.status = status
- if swh_id:
- deposit.swh_id = swh_id
- if swh_id_context:
- deposit.swh_id_context = swh_id_context
- if swh_anchor_id:
- deposit.swh_anchor_id = swh_anchor_id
- if swh_anchor_id_context:
- deposit.swh_anchor_id_context = swh_anchor_id_context
- deposit.save()
- return deposit_id
-
- def create_simple_deposit_partial(self, external_id='some-external-id'):
- """Create a simple deposit (1 request) in `partial` state and returns
- its new identifier.
-
- Returns:
- deposit id
-
- """
- response = self.client.post(
- reverse(COL_IRI, args=[self.collection.name]),
- content_type='application/atom+xml;type=entry',
- data=self.atom_entry_data0,
- HTTP_SLUG=external_id,
- HTTP_IN_PROGRESS='true')
-
- assert response.status_code == status.HTTP_201_CREATED
- response_content = parse_xml(BytesIO(response.content))
- deposit_id = int(response_content['deposit_id'])
- return deposit_id
-
- def create_deposit_partial_with_data_in_args(self, data):
- """Create a simple deposit (1 request) in `partial` state with the data
- or metadata as an argument and returns its new identifier.
-
- Args:
- data: atom entry
-
- Returns:
- deposit id
-
- """
- if isinstance(data, str):
- data = data.encode('utf-8')
-
- response = self.client.post(
- reverse(COL_IRI, args=[self.collection.name]),
- content_type='application/atom+xml;type=entry',
- data=data,
- HTTP_SLUG='external-id',
- HTTP_IN_PROGRESS='true')
-
- assert response.status_code == status.HTTP_201_CREATED
- response_content = parse_xml(BytesIO(response.content))
- deposit_id = int(response_content['deposit_id'])
- return deposit_id
-
- def _update_deposit_with_status(self, deposit_id, status_partial=False):
- """Add to a given deposit another archive and update its current
- status to `deposited` (by default).
-
- Returns:
- deposit id
-
- """
- # when
- response = self.client.post(
- reverse(EDIT_SE_IRI, args=[self.collection.name, deposit_id]),
- content_type='application/atom+xml;type=entry',
- data=self.atom_entry_data1,
- HTTP_SLUG='external-id',
- HTTP_IN_PROGRESS=status_partial)
-
- # then
- assert response.status_code == status.HTTP_201_CREATED
- return deposit_id
-
- def create_deposit_ready(self, external_id='some-external-id'):
- """Create a complex deposit (2 requests) in status `deposited`.
-
- """
- deposit_id = self.create_simple_deposit_partial(
- external_id=external_id)
- deposit_id = self._update_deposit_with_status(deposit_id)
- return deposit_id
-
- def create_deposit_partial(self, external_id='some-external-id'):
- """Create a complex deposit (2 requests) in status `partial`.
-
- """
- deposit_id = self.create_simple_deposit_partial(
- external_id=external_id)
- deposit_id = self._update_deposit_with_status(
- deposit_id, status_partial=True)
- return deposit_id
-
- def add_metadata_to_deposit(self, deposit_id, status_partial=False):
- """Add metadata to deposit.
-
- """
- # when
- response = self.client.post(
- reverse(EDIT_SE_IRI, args=[self.collection.name, deposit_id]),
- content_type='application/atom+xml;type=entry',
- data=self.codemeta_entry_data1,
- HTTP_SLUG='external-id',
- HTTP_IN_PROGRESS=status_partial)
- assert response.status_code == status.HTTP_201_CREATED
- # then
- deposit = Deposit.objects.get(pk=deposit_id)
- assert deposit is not None
-
- deposit_requests = DepositRequest.objects.filter(deposit=deposit)
- assert deposit_requests is not []
-
- for dr in deposit_requests:
- if dr.type == 'metadata':
- assert deposit_requests[0].metadata is not {}
- return deposit_id
-
-
def check_archive(archive_name: str, archive_name_to_check: str):
"""Helper function to ensure archive_name is present within the
archive_name_to_check.
Raises:
AssertionError if archive_name is not present within
archive_name_to_check
"""
if '.' in archive_name:
filename, extension = archive_name.split('.')
pattern = re.compile('.*/%s.*\\.%s' % (filename, extension))
else:
pattern = re.compile('.*/%s' % archive_name)
assert pattern.match(archive_name_to_check) is not None
diff --git a/swh/deposit/tests/loader/test_tasks.py b/swh/deposit/tests/loader/test_tasks.py
index 9d69d43a..c522d2fc 100644
--- a/swh/deposit/tests/loader/test_tasks.py
+++ b/swh/deposit/tests/loader/test_tasks.py
@@ -1,41 +1,39 @@
# Copyright (C) 2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from unittest.mock import patch
@patch('swh.deposit.loader.loader.DepositLoader.load')
-def test_deposit_load(mock_loader, swh_app, celery_session_worker):
- mock_loader.return_value = {'status': 'eventful'}
+def deposit_load(loader, swh_config, swh_app, celery_session_worker):
+ loader.return_value = {'status': 'eventful'}
res = swh_app.send_task(
'swh.deposit.loader.tasks.LoadDepositArchiveTsk',
- kwargs=dict(archive_url='archive_url',
- deposit_meta_url='deposit_meta_url',
- deposit_update_url='deposit_update_url'))
+ args=('archive_url', 'deposit_meta_url', 'deposit_update_url'))
assert res
res.wait()
assert res.successful()
assert res.result == {'status': 'eventful'}
- mock_loader.assert_called_once_with(
+ loader.assert_called_once_with(
archive_url='archive_url',
deposit_meta_url='deposit_meta_url',
deposit_update_url='deposit_update_url')
@patch('swh.deposit.loader.checker.DepositChecker.check')
-def test_check_deposit(mock_checker, swh_app, celery_session_worker):
- mock_checker.return_value = {'status': 'uneventful'}
+def deposit_check(checker, swh_config, swh_app, celery_session_worker):
+ checker.return_value = {'status': 'uneventful'}
res = swh_app.send_task(
'swh.deposit.loader.tasks.ChecksDepositTsk',
- args=('check_deposit_url',))
+ args=['check_deposit_url'])
assert res
res.wait()
assert res.successful()
assert res.result == {'status': 'uneventful'}
- mock_checker.assert_called_once_with('check_deposit_url')
+ checker.assert_called_once_with('check_deposit_url')