diff --git a/swh/deposit/api/common.py b/swh/deposit/api/common.py --- a/swh/deposit/api/common.py +++ b/swh/deposit/api/common.py @@ -19,13 +19,12 @@ from swh.model import hashutil from swh.scheduler.utils import create_oneshot_task_dict -from swh.deposit.utils import origin_url_from from ..config import ( SWHDefaultConfig, EDIT_SE_IRI, EM_IRI, CONT_FILE_IRI, ARCHIVE_KEY, METADATA_KEY, RAW_METADATA_KEY, STATE_IRI, DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_PARTIAL, - DEPOSIT_STATUS_VERIFIED, PRIVATE_CHECK_DEPOSIT, + PRIVATE_CHECK_DEPOSIT, DEPOSIT_STATUS_LOAD_SUCCESS, ARCHIVE_TYPE, METADATA_TYPE ) from ..errors import ( @@ -184,14 +183,6 @@ 'check-deposit', deposit_check_url=check_url) check_task_id = scheduler.create_tasks([task])[0]['id'] deposit.check_task_id = check_task_id - elif (deposit.status == DEPOSIT_STATUS_VERIFIED and - not deposit.load_task_id): - - url = origin_url_from(deposit) - task = create_oneshot_task_dict( - 'load-deposit', url=url, deposit_id=deposit.id) - load_task_id = scheduler.create_task([task])[0]['id'] - deposit.load_task_id = load_task_id deposit.save() diff --git a/swh/deposit/api/private/__init__.py b/swh/deposit/api/private/__init__.py --- a/swh/deposit/api/private/__init__.py +++ b/swh/deposit/api/private/__init__.py @@ -5,7 +5,7 @@ from swh.deposit import utils -from ...config import METADATA_TYPE +from ...config import METADATA_TYPE, SWHDefaultConfig from ...models import DepositRequest, Deposit from rest_framework.permissions import AllowAny @@ -56,7 +56,7 @@ return utils.merge(*metadata) -class SWHPrivateAPIView(SWHAPIView): +class SWHPrivateAPIView(SWHDefaultConfig, SWHAPIView): """Mixin intended as private api (so no authentication) based API view (for the private ones). diff --git a/swh/deposit/api/private/deposit_check.py b/swh/deposit/api/private/deposit_check.py --- a/swh/deposit/api/private/deposit_check.py +++ b/swh/deposit/api/private/deposit_check.py @@ -8,8 +8,12 @@ import tarfile import zipfile +from itertools import chain +from shutil import get_unpack_formats + from rest_framework import status +from swh.scheduler.utils import create_oneshot_task_dict from . import DepositReadMixin, SWHPrivateAPIView from ..common import SWHGetDepositAPI @@ -33,6 +37,11 @@ r'.*\.(%s)$' % '|'.join(ARCHIVE_EXTENSIONS)) +def known_archive_format(filename): + return any(filename.endswith(t) for t in + chain(*(x[1] for x in get_unpack_formats()))) + + class SWHChecksDeposit(SWHPrivateAPIView, SWHGetDepositAPI, DepositReadMixin): """Dedicated class to read a deposit's raw archives content. @@ -92,6 +101,10 @@ """ archive_path = archive_request.archive.path + + if not known_archive_format(archive_path): + return False, MANDATORY_ARCHIVE_UNSUPPORTED + try: if zipfile.is_zipfile(archive_path): with zipfile.ZipFile(archive_path) as f: @@ -203,6 +216,12 @@ response = { 'status': deposit.status, } + if not deposit.load_task_id and self.config['checks']: + url = deposit.origin_url + task = create_oneshot_task_dict( + 'load-deposit', url=url, deposit_id=deposit.id) + load_task_id = self.scheduler.create_tasks([task])[0]['id'] + deposit.load_task_id = load_task_id deposit.save() diff --git a/swh/deposit/api/private/deposit_read.py b/swh/deposit/api/private/deposit_read.py --- a/swh/deposit/api/private/deposit_read.py +++ b/swh/deposit/api/private/deposit_read.py @@ -15,7 +15,6 @@ from swh.core import tarball from swh.model import identifiers from swh.deposit.utils import normalize_date -from swh.deposit import utils from . import DepositReadMixin, SWHPrivateAPIView from ...config import SWH_PERSON, ARCHIVE_TYPE @@ -177,7 +176,7 @@ data = { 'origin': { 'type': 'deposit', - 'url': utils.origin_url_from(deposit), + 'url': deposit.origin_url, } } diff --git a/swh/deposit/client/__init__.py b/swh/deposit/client/__init__.py --- a/swh/deposit/client/__init__.py +++ b/swh/deposit/client/__init__.py @@ -14,6 +14,7 @@ import logging from abc import ABCMeta, abstractmethod +from urllib.parse import urljoin from swh.core.config import SWHConfig @@ -82,7 +83,7 @@ self.config = config self._client = _client - self.base_url = self.config['url'] + self.base_url = self.config['url'].lstrip('/') auth = self.config['auth'] if auth == {}: self.auth = None @@ -109,7 +110,7 @@ if self.auth: kwargs['auth'] = self.auth - full_url = '%s%s' % (self.base_url.rstrip('/'), url) + full_url = urljoin(self.base_url, url.lstrip('/')) return method_fn(full_url, *args, **kwargs) diff --git a/swh/deposit/models.py b/swh/deposit/models.py --- a/swh/deposit/models.py +++ b/swh/deposit/models.py @@ -151,6 +151,11 @@ d['status_detail'] = self.status_detail return str(d) + @property + def origin_url(self): + return '%s/%s' % (self.client.provider_url.rstrip('/'), + self.external_id) + def client_directory_path(instance, filename): """Callable to upload archive in MEDIA_ROOT/user_/ diff --git a/swh/deposit/tests/api/test_deposit_private_check.py b/swh/deposit/tests/api/test_deposit_private_check.py --- a/swh/deposit/tests/api/test_deposit_private_check.py +++ b/swh/deposit/tests/api/test_deposit_private_check.py @@ -237,7 +237,7 @@ # now we create an archive holding the first created archive invalid_archive = create_archive_with_archive( - root_path, 'invalid.tar.gz', archive) + root_path, 'invalid.tgz', archive) # we deposit it response = client.post( diff --git a/swh/deposit/tests/test_utils.py b/swh/deposit/tests/test_utils.py --- a/swh/deposit/tests/test_utils.py +++ b/swh/deposit/tests/test_utils.py @@ -8,43 +8,6 @@ from unittest.mock import patch from swh.deposit import utils -from swh.deposit.models import Deposit, DepositClient - - -def test_origin_url_from(): - """With correctly setup-ed deposit, all is fine - - """ - for provider_url, external_id in ( - ('http://somewhere.org', 'uuid'), - ('http://overthejungle.org', 'diuu'), - ): - deposit = Deposit( - client=DepositClient(provider_url=provider_url), - external_id=external_id - ) - - actual_origin_url = utils.origin_url_from(deposit) - - assert actual_origin_url == '%s/%s' % ( - provider_url.rstrip('/'), external_id) - - -def test_origin_url_from_ko(): - """Badly configured deposit should raise - - """ - for provider_url, external_id in ( - (None, 'uuid'), - ('http://overthejungle.org', None), - ): - deposit = Deposit( - client=DepositClient(provider_url=provider_url), - external_id=None - ) - - with pytest.raises(AssertionError): - utils.origin_url_from(deposit) def test_merge(): diff --git a/swh/deposit/utils.py b/swh/deposit/utils.py --- a/swh/deposit/utils.py +++ b/swh/deposit/utils.py @@ -10,31 +10,6 @@ from swh.model.identifiers import normalize_timestamp -def origin_url_from(deposit): - """Given a deposit instance, return the associated origin url. - - This expects a deposit and the associated client to be correctly - configured. - - Args: - deposit (Deposit): The deposit from which derives the origin url - - Raises: - AssertionError if: - - the client's provider_url field is not configured. - - the deposit's external_id field is not configured. - - Returns - The associated origin url - - """ - external_id = deposit.external_id - assert external_id is not None - base_url = deposit.client.provider_url - assert base_url is not None - return '%s/%s' % (base_url.rstrip('/'), external_id) - - def merge(*dicts): """Given an iterator of dicts, merge them losing no information.