diff --git a/bin/create_deposit.sh b/bin/create_deposit.sh
index c883306e..183f114a 100755
--- a/bin/create_deposit.sh
+++ b/bin/create_deposit.sh
@@ -1,21 +1,21 @@
#!/usr/bin/env bash
. ./default-setup
ARCHIVE=${1-'../../deposit.zip'}
NAME=$(basename ${ARCHIVE})
MD5=$(md5sum ${ARCHIVE} | cut -f 1 -d' ')
PROGRESS=${2-'false'}
+TYPE=${3-'application/zip'}
curl -i -u "$CREDS" \
-X POST \
--data-binary @${ARCHIVE} \
-H "In-Progress: ${PROGRESS}" \
-H "Content-MD5: ${MD5}" \
-H "Content-Disposition: attachment; filename=${NAME}" \
-H 'Slug: external-id' \
- -H 'Packaging: http://purl.org/net/sword/package/SimpleZip' \
- -H 'Content-type: application/zip' \
+ -H "Content-type: ${TYPE}" \
${SERVER}/1/${COLLECTION}/
diff --git a/swh/deposit/api/common.py b/swh/deposit/api/common.py
index f5367763..53bbf32f 100644
--- a/swh/deposit/api/common.py
+++ b/swh/deposit/api/common.py
@@ -1,858 +1,863 @@
-# Copyright (C) 2017 The Software Heritage developers
+# Copyright (C) 2017-2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
import hashlib
from abc import ABCMeta, abstractmethod
from django.core.urlresolvers import reverse
from django.http import HttpResponse
from django.shortcuts import render
from django.utils import timezone
from rest_framework import status
from rest_framework.authentication import BasicAuthentication
from rest_framework.permissions import IsAuthenticated, AllowAny
from rest_framework.views import APIView
from swh.model import hashutil
from ..config import SWHDefaultConfig, EDIT_SE_IRI, EM_IRI, CONT_FILE_IRI
from ..config import ARCHIVE_KEY, METADATA_KEY, STATE_IRI
from ..config import DEPOSIT_STATUS_READY_FOR_CHECKS, DEPOSIT_STATUS_PARTIAL
from ..config import DEPOSIT_STATUS_LOAD_SUCCESS
from ..errors import MAX_UPLOAD_SIZE_EXCEEDED, BAD_REQUEST, ERROR_CONTENT
from ..errors import CHECKSUM_MISMATCH, make_error_dict, MEDIATION_NOT_ALLOWED
from ..errors import make_error_response_from_dict, FORBIDDEN
from ..errors import NOT_FOUND, make_error_response, METHOD_NOT_ALLOWED
from ..models import Deposit, DepositRequest, DepositCollection
from ..models import DepositRequestType, DepositClient
from ..parsers import parse_xml
ACCEPT_PACKAGINGS = ['http://purl.org/net/sword/package/SimpleZip']
-ACCEPT_CONTENT_TYPES = ['application/zip']
+ACCEPT_ARCHIVE_CONTENT_TYPES = ['application/zip', 'application/x-tar']
class SWHAPIView(APIView):
"""Mixin intended as a based API view to enforce the basic
authentication check
"""
authentication_classes = (BasicAuthentication, )
permission_classes = (IsAuthenticated, )
class SWHPrivateAPIView(SWHAPIView):
"""Mixin intended as private api (so no authentication) based API view
(for the private ones).
"""
authentication_classes = ()
permission_classes = (AllowAny, )
class SWHBaseDeposit(SWHDefaultConfig, SWHAPIView, metaclass=ABCMeta):
"""Base deposit request class sharing multiple common behaviors.
"""
def __init__(self):
super().__init__()
deposit_request_types = DepositRequestType.objects.all()
self.deposit_request_types = {
type.name: type for type in deposit_request_types
}
def _read_headers(self, req):
"""Read and unify the necessary headers from the request (those are
not stored in the same location or not properly formatted).
Args:
req (Request): Input request
Returns:
Dictionary with the following keys (some associated values may be
None):
- content-type
- content-length
- in-progress
- content-disposition
- packaging
- slug
- on-behalf-of
"""
meta = req._request.META
content_type = req.content_type
content_length = meta.get('CONTENT_LENGTH')
if content_length and isinstance(content_length, str):
content_length = int(content_length)
# final deposit if not provided
in_progress = meta.get('HTTP_IN_PROGRESS', False)
content_disposition = meta.get('HTTP_CONTENT_DISPOSITION')
if isinstance(in_progress, str):
in_progress = in_progress.lower() == 'true'
content_md5sum = meta.get('HTTP_CONTENT_MD5')
if content_md5sum:
content_md5sum = bytes.fromhex(content_md5sum)
packaging = meta.get('HTTP_PACKAGING')
slug = meta.get('HTTP_SLUG')
on_behalf_of = meta.get('HTTP_ON_BEHALF_OF')
metadata_relevant = meta.get('HTTP_METADATA_RELEVANT')
return {
'content-type': content_type,
'content-length': content_length,
'in-progress': in_progress,
'content-disposition': content_disposition,
'content-md5sum': content_md5sum,
'packaging': packaging,
'slug': slug,
'on-behalf-of': on_behalf_of,
'metadata-relevant': metadata_relevant,
}
def _compute_md5(self, filehandler):
"""Compute uploaded file's md5 sum.
Args:
filehandler (InMemoryUploadedFile): the file to compute the md5
hash
Returns:
the md5 checksum (str)
"""
h = hashlib.md5()
for chunk in filehandler:
h.update(chunk)
return h.digest()
def _deposit_put(self, deposit_id=None, in_progress=False,
external_id=None):
"""Save/Update a deposit in db.
Args:
deposit_id (int): deposit identifier
in_progress (dict): The deposit's status
external_id (str): The external identifier to associate to
the deposit
Returns:
The Deposit instance saved or updated.
"""
if in_progress is False:
complete_date = timezone.now()
status_type = DEPOSIT_STATUS_READY_FOR_CHECKS
else:
complete_date = None
status_type = DEPOSIT_STATUS_PARTIAL
if not deposit_id:
try:
# find a deposit parent (same external id, status load
# to success)
deposit_parent = Deposit.objects.filter(
external_id=external_id,
status=DEPOSIT_STATUS_LOAD_SUCCESS).order_by('-id')[0:1].get() # noqa
except Deposit.DoesNotExist:
deposit_parent = None
deposit = Deposit(collection=self._collection,
external_id=external_id,
complete_date=complete_date,
status=status_type,
client=self._client,
parent=deposit_parent)
else:
deposit = Deposit.objects.get(pk=deposit_id)
# update metadata
deposit.complete_date = complete_date
deposit.status = status_type
deposit.save()
return deposit
def _deposit_request_put(self, deposit, deposit_request_data,
replace_metadata=False, replace_archives=False):
"""Save a deposit request with metadata attached to a deposit.
Args:
deposit (Deposit): The deposit concerned by the request
deposit_request_data (dict): The dictionary with at most 2 deposit
request types (archive, metadata) to associate to the deposit
replace_metadata (bool): Flag defining if we add or update
existing metadata to the deposit
replace_archives (bool): Flag defining if we add or update
archives to existing deposit
Returns:
None
"""
if replace_metadata:
DepositRequest.objects.filter(
deposit=deposit,
type=self.deposit_request_types[METADATA_KEY]).delete()
if replace_archives:
DepositRequest.objects.filter(
deposit=deposit,
type=self.deposit_request_types[ARCHIVE_KEY]).delete()
deposit_request = None
archive_file = deposit_request_data.get(ARCHIVE_KEY)
if archive_file:
deposit_request = DepositRequest(
type=self.deposit_request_types[ARCHIVE_KEY],
deposit=deposit,
archive=archive_file)
deposit_request.save()
metadata = deposit_request_data.get(METADATA_KEY)
if metadata:
deposit_request = DepositRequest(
type=self.deposit_request_types[METADATA_KEY],
deposit=deposit,
metadata=metadata)
deposit_request.save()
assert deposit_request is not None
def _delete_archives(self, collection_name, deposit_id):
"""Delete archives reference from the deposit id.
"""
try:
deposit = Deposit.objects.get(pk=deposit_id)
except Deposit.DoesNotExist:
return make_error_dict(
NOT_FOUND,
'The deposit %s does not exist' % deposit_id)
DepositRequest.objects.filter(
deposit=deposit,
type=self.deposit_request_types[ARCHIVE_KEY]).delete()
return {}
def _delete_deposit(self, collection_name, deposit_id):
"""Delete deposit reference.
Args:
collection_name (str): Client's name
deposit_id (id): The deposit to delete
Returns
Empty dict when ok.
Dict with error key to describe the failure.
"""
try:
deposit = Deposit.objects.get(pk=deposit_id)
except Deposit.DoesNotExist:
return make_error_dict(
NOT_FOUND,
'The deposit %s does not exist' % deposit_id)
if deposit.collection.name != collection_name:
summary = 'Cannot delete a deposit from another collection'
description = "Deposit %s does not belong to the collection %s" % (
deposit_id, collection_name)
return make_error_dict(
BAD_REQUEST,
summary=summary,
verbose_description=description)
DepositRequest.objects.filter(deposit=deposit).delete()
deposit.delete()
return {}
def _check_preconditions_on(self, filehandler, md5sum,
content_length=None):
"""Check preconditions on provided file are respected. That is the
length and/or the md5sum hash match the file's content.
Args:
filehandler (InMemoryUploadedFile): The file to check
md5sum (hex str): md5 hash expected from the file's content
content_length (int): the expected length if provided.
Returns:
Either none if no error or a dictionary with a key error
detailing the problem.
"""
if content_length:
if content_length > self.config['max_upload_size']:
return make_error_dict(
MAX_UPLOAD_SIZE_EXCEEDED,
'Upload size limit exceeded (max %s bytes).' %
self.config['max_upload_size'],
'Please consider sending the archive in '
'multiple steps.')
length = filehandler.size
if length != content_length:
return make_error_dict(status.HTTP_412_PRECONDITION_FAILED,
'Wrong length')
if md5sum:
_md5sum = self._compute_md5(filehandler)
if _md5sum != md5sum:
return make_error_dict(
CHECKSUM_MISMATCH,
'Wrong md5 hash',
'The checksum sent %s and the actual checksum '
'%s does not match.' % (hashutil.hash_to_hex(md5sum),
hashutil.hash_to_hex(_md5sum)))
return None
def _binary_upload(self, req, headers, collection_name, deposit_id=None,
replace_metadata=False, replace_archives=False):
"""Binary upload routine.
Other than such a request, a 415 response is returned.
Args:
req (Request): the request holding information to parse
and inject in db
headers (dict): request headers formatted
collection_name (str): the associated client
deposit_id (id): deposit identifier if provided
replace_metadata (bool): 'Update or add' request to existing
deposit. If False (default), this adds new metadata request to
existing ones. Otherwise, this will replace existing metadata.
replace_archives (bool): 'Update or add' request to existing
deposit. If False (default), this adds new archive request to
existing ones. Otherwise, this will replace existing archives.
ones.
Returns:
In the optimal case a dict with the following keys:
- deposit_id (int): Deposit identifier
- deposit_date (date): Deposit date
- archive: None (no archive is provided here)
Otherwise, a dictionary with the key error and the
associated failures, either:
- 400 (bad request) if the request is not providing an external
identifier
- 413 (request entity too large) if the length of the
archive exceeds the max size configured
- 412 (precondition failed) if the length or md5 hash provided
mismatch the reality of the archive
- 415 (unsupported media type) if a wrong media type is provided
"""
content_length = headers['content-length']
if not content_length:
return make_error_dict(
BAD_REQUEST,
'CONTENT_LENGTH header is mandatory',
'For archive deposit, the '
'CONTENT_LENGTH header must be sent.')
content_disposition = headers['content-disposition']
if not content_disposition:
return make_error_dict(
BAD_REQUEST,
'CONTENT_DISPOSITION header is mandatory',
'For archive deposit, the '
'CONTENT_DISPOSITION header must be sent.')
packaging = headers['packaging']
if packaging and packaging not in ACCEPT_PACKAGINGS:
return make_error_dict(
BAD_REQUEST,
'Only packaging %s is supported' %
ACCEPT_PACKAGINGS,
'The packaging provided %s is not supported' % packaging)
filehandler = req.FILES['file']
precondition_status_response = self._check_preconditions_on(
filehandler, headers['content-md5sum'], content_length)
if precondition_status_response:
return precondition_status_response
external_id = headers['slug']
# actual storage of data
archive_metadata = filehandler
deposit = self._deposit_put(deposit_id=deposit_id,
in_progress=headers['in-progress'],
external_id=external_id)
self._deposit_request_put(
deposit, {ARCHIVE_KEY: archive_metadata},
replace_metadata=replace_metadata,
replace_archives=replace_archives)
return {
'deposit_id': deposit.id,
'deposit_date': deposit.reception_date,
'status': deposit.status,
'archive': filehandler.name,
}
def _multipart_upload(self, req, headers, collection_name,
deposit_id=None, replace_metadata=False,
replace_archives=False):
"""Multipart upload supported with exactly:
- 1 archive (zip)
- 1 atom entry
Other than such a request, a 415 response is returned.
Args:
req (Request): the request holding information to parse
and inject in db
headers (dict): request headers formatted
collection_name (str): the associated client
deposit_id (id): deposit identifier if provided
replace_metadata (bool): 'Update or add' request to existing
deposit. If False (default), this adds new metadata request to
existing ones. Otherwise, this will replace existing metadata.
replace_archives (bool): 'Update or add' request to existing
deposit. If False (default), this adds new archive request to
existing ones. Otherwise, this will replace existing archives.
ones.
Returns:
In the optimal case a dict with the following keys:
- deposit_id (int): Deposit identifier
- deposit_date (date): Deposit date
- archive: None (no archive is provided here)
Otherwise, a dictionary with the key error and the
associated failures, either:
- 400 (bad request) if the request is not providing an external
identifier
- 412 (precondition failed) if the potentially md5 hash provided
mismatch the reality of the archive
- 413 (request entity too large) if the length of the
archive exceeds the max size configured
- 415 (unsupported media type) if a wrong media type is provided
"""
external_id = headers['slug']
content_types_present = set()
data = {
- 'application/zip': None, # expected archive
+ 'application/zip': None, # expected either zip
+ 'application/x-tar': None, # or x-tar
'application/atom+xml': None,
}
for key, value in req.FILES.items():
fh = value
if fh.content_type in content_types_present:
return make_error_dict(
ERROR_CONTENT,
- 'Only 1 application/zip archive and 1 '
- 'atom+xml entry is supported (as per sword2.0 '
+ 'Only 1 application/zip (or application/x-tar) archive '
+ 'and 1 atom+xml entry is supported (as per sword2.0 '
'specification)',
- 'You provided more than 1 application/zip '
+ 'You provided more than 1 application/(zip|x-tar) '
'or more than 1 application/atom+xml content-disposition '
'header in the multipart deposit')
content_types_present.add(fh.content_type)
data[fh.content_type] = fh
if len(content_types_present) != 2:
return make_error_dict(
ERROR_CONTENT,
- 'You must provide both 1 application/zip '
- 'and 1 atom+xml entry for multipart deposit',
- 'You need to provide only 1 application/zip '
+ 'You must provide both 1 application/zip (or '
+ 'application/x-tar) and 1 atom+xml entry for multipart '
+ 'deposit',
+ 'You need to provide only 1 application/(zip|x-tar) '
'and 1 application/atom+xml content-disposition header '
'in the multipart deposit')
filehandler = data['application/zip']
+ if not filehandler:
+ filehandler = data['application/x-tar']
+
precondition_status_response = self._check_preconditions_on(
filehandler,
headers['content-md5sum'])
if precondition_status_response:
return precondition_status_response
# actual storage of data
atom_metadata = parse_xml(data['application/atom+xml'])
deposit = self._deposit_put(deposit_id=deposit_id,
in_progress=headers['in-progress'],
external_id=external_id)
deposit_request_data = {
ARCHIVE_KEY: filehandler,
METADATA_KEY: atom_metadata,
}
self._deposit_request_put(
deposit, deposit_request_data, replace_metadata, replace_archives)
return {
'deposit_id': deposit.id,
'deposit_date': deposit.reception_date,
'archive': filehandler.name,
'status': deposit.status,
}
def _atom_entry(self, req, headers, collection_name,
deposit_id=None,
replace_metadata=False,
replace_archives=False):
"""Atom entry deposit.
Args:
req (Request): the request holding information to parse
and inject in db
headers (dict): request headers formatted
collection_name (str): the associated client
deposit_id (id): deposit identifier if provided
replace_metadata (bool): 'Update or add' request to existing
deposit. If False (default), this adds new metadata request to
existing ones. Otherwise, this will replace existing metadata.
replace_archives (bool): 'Update or add' request to existing
deposit. If False (default), this adds new archive request to
existing ones. Otherwise, this will replace existing archives.
ones.
Returns:
In the optimal case a dict with the following keys:
- deposit_id: deposit id associated to the deposit
- deposit_date: date of the deposit
- archive: None (no archive is provided here)
Otherwise, a dictionary with the key error and the
associated failures, either:
- 400 (bad request) if the request is not providing an external
identifier
- 400 (bad request) if the request's body is empty
- 415 (unsupported media type) if a wrong media type is provided
"""
if not req.data:
return make_error_dict(
BAD_REQUEST,
'Empty body request is not supported',
'Atom entry deposit is supposed to send for metadata. '
'If the body is empty, there is no metadata.')
external_id = req.data.get(
'{http://www.w3.org/2005/Atom}external_identifier',
headers['slug'])
deposit = self._deposit_put(deposit_id=deposit_id,
in_progress=headers['in-progress'],
external_id=external_id)
self._deposit_request_put(
deposit, {METADATA_KEY: req.data},
replace_metadata, replace_archives)
return {
'deposit_id': deposit.id,
'deposit_date': deposit.reception_date,
'archive': None,
'status': deposit.status,
}
def _empty_post(self, req, headers, collection_name, deposit_id):
"""Empty post to finalize an empty deposit.
Args:
req (Request): the request holding information to parse
and inject in db
headers (dict): request headers formatted
collection_name (str): the associated client
deposit_id (id): deposit identifier
Returns:
Dictionary of result with the deposit's id, the date
it was completed and no archive.
"""
deposit = Deposit.objects.get(pk=deposit_id)
deposit.complete_date = timezone.now()
deposit.status = DEPOSIT_STATUS_READY_FOR_CHECKS
deposit.save()
return {
'deposit_id': deposit_id,
'deposit_date': deposit.complete_date,
'status': deposit.status,
'archive': None,
}
def _make_iris(self, req, collection_name, deposit_id):
"""Define the IRI endpoints
Args:
req (Request): The initial request
collection_name (str): client/collection's name
deposit_id (id): Deposit identifier
Returns:
Dictionary of keys with the iris' urls.
"""
args = [collection_name, deposit_id]
return {
iri: req.build_absolute_uri(reverse(iri, args=args))
for iri in [EM_IRI, EDIT_SE_IRI, CONT_FILE_IRI, STATE_IRI]
}
def additional_checks(self, req, headers, collection_name,
deposit_id=None):
"""Permit the child class to enrich additional checks.
Returns:
dict with 'error' detailing the problem.
"""
return {}
def checks(self, req, collection_name, deposit_id=None):
try:
self._collection = DepositCollection.objects.get(
name=collection_name)
except DepositCollection.DoesNotExist:
return make_error_dict(
NOT_FOUND,
'Unknown collection name %s' % collection_name)
username = req.user.username
if username: # unauthenticated request can have the username empty
try:
self._client = DepositClient.objects.get(username=username)
except DepositClient.DoesNotExist:
return make_error_dict(NOT_FOUND,
'Unknown client name %s' % username)
if self._collection.id not in self._client.collections:
return make_error_dict(
FORBIDDEN,
'Client %s cannot access collection %s' % (
username, collection_name))
if deposit_id:
try:
deposit = Deposit.objects.get(pk=deposit_id)
except Deposit.DoesNotExist:
return make_error_dict(
NOT_FOUND,
'Deposit with id %s does not exist' %
deposit_id)
checks = self.restrict_access(req, deposit)
if checks:
return checks
headers = self._read_headers(req)
if headers['on-behalf-of']:
return make_error_dict(MEDIATION_NOT_ALLOWED,
'Mediation is not supported.')
checks = self.additional_checks(req, headers,
collection_name, deposit_id)
if 'error' in checks:
return checks
return {'headers': headers}
def restrict_access(self, req, deposit=None):
if deposit:
if (req.method != 'GET' and
deposit.status != DEPOSIT_STATUS_PARTIAL):
summary = "You can only act on deposit with status '%s'" % (
DEPOSIT_STATUS_PARTIAL, )
description = "This deposit has status '%s'" % deposit.status
return make_error_dict(
BAD_REQUEST, summary=summary,
verbose_description=description)
def _basic_not_allowed_method(self, req, method):
return make_error_response(
req, METHOD_NOT_ALLOWED,
'%s method is not supported on this endpoint' % method)
def get(self, req, *args, **kwargs):
return self._basic_not_allowed_method(req, 'GET')
def post(self, req, *args, **kwargs):
return self._basic_not_allowed_method(req, 'POST')
def put(self, req, *args, **kwargs):
return self._basic_not_allowed_method(req, 'PUT')
def delete(self, req, *args, **kwargs):
return self._basic_not_allowed_method(req, 'DELETE')
class SWHGetDepositAPI(SWHBaseDeposit, metaclass=ABCMeta):
"""Mixin for class to support GET method.
"""
def get(self, req, collection_name, deposit_id, format=None):
"""Endpoint to create/add resources to deposit.
Returns:
200 response when no error during routine occurred
400 if the deposit does not belong to the collection
404 if the deposit or the collection does not exist
"""
checks = self.checks(req, collection_name, deposit_id)
if 'error' in checks:
return make_error_response_from_dict(req, checks['error'])
r = self.process_get(
req, collection_name, deposit_id)
if isinstance(r, tuple):
status, content, content_type = r
return HttpResponse(content,
status=status,
content_type=content_type)
return r
@abstractmethod
def process_get(self, req, collection_name, deposit_id):
"""Routine to deal with the deposit's get processing.
Returns:
Tuple status, stream of content, content-type
"""
pass
class SWHPostDepositAPI(SWHBaseDeposit, metaclass=ABCMeta):
"""Mixin for class to support DELETE method.
"""
def post(self, req, collection_name, deposit_id=None, format=None):
"""Endpoint to create/add resources to deposit.
Returns:
204 response when no error during routine occurred.
400 if the deposit does not belong to the collection
404 if the deposit or the collection does not exist
"""
checks = self.checks(req, collection_name, deposit_id)
if 'error' in checks:
return make_error_response_from_dict(req, checks['error'])
headers = checks['headers']
_status, _iri_key, data = self.process_post(
req, headers, collection_name, deposit_id)
error = data.get('error')
if error:
return make_error_response_from_dict(req, error)
data['packagings'] = ACCEPT_PACKAGINGS
iris = self._make_iris(req, collection_name, data['deposit_id'])
data.update(iris)
response = render(req, 'deposit/deposit_receipt.xml',
context=data,
content_type='application/xml',
status=_status)
response._headers['location'] = 'Location', data[_iri_key]
return response
@abstractmethod
def process_post(self, req, headers, collection_name, deposit_id=None):
"""Routine to deal with the deposit's processing.
Returns
Tuple of:
- response status code (200, 201, etc...)
- key iri (EM_IRI, EDIT_SE_IRI, etc...)
- dictionary of the processing result
"""
pass
class SWHPutDepositAPI(SWHBaseDeposit, metaclass=ABCMeta):
"""Mixin for class to support PUT method.
"""
def put(self, req, collection_name, deposit_id, format=None):
"""Endpoint to update deposit resources.
Returns:
204 response when no error during routine occurred.
400 if the deposit does not belong to the collection
404 if the deposit or the collection does not exist
"""
checks = self.checks(req, collection_name, deposit_id)
if 'error' in checks:
return make_error_response_from_dict(req, checks['error'])
headers = checks['headers']
data = self.process_put(req, headers, collection_name, deposit_id)
error = data.get('error')
if error:
return make_error_response_from_dict(req, error)
return HttpResponse(status=status.HTTP_204_NO_CONTENT)
@abstractmethod
def process_put(self, req, headers, collection_name, deposit_id):
"""Routine to deal with updating a deposit in some way.
Returns
dictionary of the processing result
"""
pass
class SWHDeleteDepositAPI(SWHBaseDeposit, metaclass=ABCMeta):
"""Mixin for class to support DELETE method.
"""
def delete(self, req, collection_name, deposit_id):
"""Endpoint to delete some deposit's resources (archives, deposit).
Returns:
204 response when no error during routine occurred.
400 if the deposit does not belong to the collection
404 if the deposit or the collection does not exist
"""
checks = self.checks(req, collection_name, deposit_id)
if 'error' in checks:
return make_error_response_from_dict(req, checks['error'])
data = self.process_delete(req, collection_name, deposit_id)
error = data.get('error')
if error:
return make_error_response_from_dict(req, error)
return HttpResponse(status=status.HTTP_204_NO_CONTENT)
@abstractmethod
def process_delete(self, req, collection_name, deposit_id):
"""Routine to delete a resource.
This is mostly not allowed except for the
EM_IRI (cf. .api.deposit_update.SWHUpdateArchiveDeposit)
"""
pass
diff --git a/swh/deposit/api/deposit.py b/swh/deposit/api/deposit.py
index 77f73b9c..fb7ec49a 100644
--- a/swh/deposit/api/deposit.py
+++ b/swh/deposit/api/deposit.py
@@ -1,91 +1,93 @@
-# Copyright (C) 2017 The Software Heritage developers
+# Copyright (C) 2017-2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from rest_framework import status
-from .common import SWHPostDepositAPI
+from .common import SWHPostDepositAPI, ACCEPT_ARCHIVE_CONTENT_TYPES
from ..config import EDIT_SE_IRI
from ..errors import make_error_dict, BAD_REQUEST
-from ..parsers import SWHFileUploadParser, SWHAtomEntryParser
+from ..parsers import SWHFileUploadZipParser, SWHFileUploadTarParser
+from ..parsers import SWHAtomEntryParser
from ..parsers import SWHMultiPartParser
class SWHDeposit(SWHPostDepositAPI):
"""Deposit request class defining api endpoints for sword deposit.
What's known as 'Col IRI' in the sword specification.
HTTP verbs supported: POST
"""
parser_classes = (SWHMultiPartParser,
- SWHFileUploadParser,
+ SWHFileUploadZipParser,
+ SWHFileUploadTarParser,
SWHAtomEntryParser)
def additional_checks(self, req, headers, collection_name,
deposit_id=None):
slug = headers['slug']
if not slug:
msg = 'Missing SLUG header in request'
verbose_description = 'Provide in the SLUG header one identifier, for example the url pointing to the resource you are depositing.' # noqa
return make_error_dict(BAD_REQUEST, msg, verbose_description)
return {}
def process_post(self, req, headers, collection_name, deposit_id=None):
"""Create a first deposit as:
- archive deposit (1 zip)
- multipart (1 zip + 1 atom entry)
- atom entry
Args:
req (Request): the request holding the information to parse
and inject in db
collection_name (str): the associated client
Returns:
An http response (HttpResponse) according to the situation.
If everything is ok, a 201 response (created) with a
deposit receipt.
Otherwise, depending on the upload, the following errors
can be returned:
- archive deposit:
- 400 (bad request) if the request is not providing an external
identifier
- 403 (forbidden) if the length of the archive exceeds the
max size configured
- 412 (precondition failed) if the length or hash provided
mismatch the reality of the archive.
- 415 (unsupported media type) if a wrong media type is
provided
- multipart deposit:
- 400 (bad request) if the request is not providing an external
identifier
- 412 (precondition failed) if the potentially md5 hash
provided mismatch the reality of the archive
- 415 (unsupported media type) if a wrong media type is
provided
- Atom entry deposit:
- 400 (bad request) if the request is not providing an external
identifier
- 400 (bad request) if the request's body is empty
- 415 (unsupported media type) if a wrong media type is
provided
"""
assert deposit_id is None
- if req.content_type == 'application/zip':
+ if req.content_type in ACCEPT_ARCHIVE_CONTENT_TYPES:
data = self._binary_upload(req, headers, collection_name)
elif req.content_type.startswith('multipart/'):
data = self._multipart_upload(req, headers, collection_name)
else:
data = self._atom_entry(req, headers, collection_name)
return status.HTTP_201_CREATED, EDIT_SE_IRI, data
diff --git a/swh/deposit/api/deposit_update.py b/swh/deposit/api/deposit_update.py
index d99563b2..7c2a210b 100644
--- a/swh/deposit/api/deposit_update.py
+++ b/swh/deposit/api/deposit_update.py
@@ -1,155 +1,158 @@
-# Copyright (C) 2017 The Software Heritage developers
+# Copyright (C) 2017-2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from rest_framework import status
from .common import SWHPostDepositAPI, SWHPutDepositAPI, SWHDeleteDepositAPI
+from .common import ACCEPT_ARCHIVE_CONTENT_TYPES
from ..config import CONT_FILE_IRI, EDIT_SE_IRI, EM_IRI
from ..errors import make_error_response, BAD_REQUEST
-from ..parsers import SWHFileUploadParser, SWHAtomEntryParser
+from ..parsers import SWHFileUploadZipParser, SWHFileUploadTarParser
+from ..parsers import SWHAtomEntryParser
from ..parsers import SWHMultiPartParser
class SWHUpdateArchiveDeposit(SWHPostDepositAPI, SWHPutDepositAPI,
SWHDeleteDepositAPI):
"""Deposit request class defining api endpoints for sword deposit.
What's known as 'EM IRI' in the sword specification.
HTTP verbs supported: PUT, POST, DELETE
"""
- parser_classes = (SWHFileUploadParser, )
+ parser_classes = (SWHFileUploadZipParser, SWHFileUploadTarParser, )
def process_put(self, req, headers, collection_name, deposit_id):
"""Replace existing content for the existing deposit.
+header: Metadata-relevant (to extract metadata from the archive)
source: http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html
#protocoloperations_editingcontent_binary
Returns:
204 No content
"""
- if req.content_type != 'application/zip':
- return make_error_response(req, BAD_REQUEST,
- 'Only application/zip is supported!')
+ if req.content_type not in ACCEPT_ARCHIVE_CONTENT_TYPES:
+ msg = 'Packaging format supported is restricted to %s' % (
+ ', '.join(ACCEPT_ARCHIVE_CONTENT_TYPES))
+ return make_error_response(req, BAD_REQUEST, msg)
return self._binary_upload(req, headers, collection_name,
deposit_id=deposit_id,
replace_archives=True)
def process_post(self, req, headers, collection_name, deposit_id):
"""Add new content to the existing deposit.
+header: Metadata-relevant (to extract metadata from the archive)
source: http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html
#protocoloperations_addingcontent_mediaresource
Returns:
201 Created
Headers: Location: [Cont-File-IRI]
Body: [optional Deposit Receipt]
"""
if req.content_type != 'application/zip':
return make_error_response(req, BAD_REQUEST,
'Only application/zip is supported!')
return (status.HTTP_201_CREATED, CONT_FILE_IRI,
self._binary_upload(req, headers, collection_name, deposit_id))
def process_delete(self, req, collection_name, deposit_id):
"""Delete content (archives) from existing deposit.
source: http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html
#protocoloperations_deletingcontent
Returns:
204 Created
"""
return self._delete_archives(collection_name, deposit_id)
class SWHUpdateMetadataDeposit(SWHPostDepositAPI, SWHPutDepositAPI,
SWHDeleteDepositAPI):
"""Deposit request class defining api endpoints for sword deposit.
What's known as 'Edit IRI' (and SE IRI) in the sword specification.
HTTP verbs supported: POST (SE IRI), PUT (Edit IRI), DELETE
"""
parser_classes = (SWHMultiPartParser, SWHAtomEntryParser)
def process_put(self, req, headers, collection_name, deposit_id):
"""Replace existing deposit's metadata/archive with new ones.
source:
- http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html
#protocoloperations_editingcontent_metadata
- http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html
#protocoloperations_editingcontent_multipart
Returns:
204 No content
"""
if req.content_type.startswith('multipart/'):
return self._multipart_upload(req, headers, collection_name,
deposit_id=deposit_id,
replace_archives=True,
replace_metadata=True)
return self._atom_entry(req, headers, collection_name,
deposit_id=deposit_id, replace_metadata=True)
def process_post(self, req, headers, collection_name, deposit_id):
"""Add new metadata/archive to existing deposit.
source:
- http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html
#protocoloperations_addingcontent_metadata
- http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html
#protocoloperations_addingcontent_multipart
This also deals with an empty post corner case to finalize a
deposit.
Returns:
In optimal case for a multipart and atom-entry update, a
201 Created response. The body response will hold a
deposit. And the response headers will contain an entry
'Location' with the EM-IRI.
For the empty post case, this returns a 200.
"""
if req.content_type.startswith('multipart/'):
return (status.HTTP_201_CREATED, EM_IRI,
self._multipart_upload(req, headers, collection_name,
deposit_id=deposit_id))
# check for final empty post
# source: http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html
# #continueddeposit_complete
if headers['content-length'] == 0 and headers['in-progress'] is False:
data = self._empty_post(req, headers, collection_name, deposit_id)
return (status.HTTP_200_OK, EDIT_SE_IRI, data)
return (status.HTTP_201_CREATED, EM_IRI,
self._atom_entry(req, headers, collection_name,
deposit_id=deposit_id))
def process_delete(self, req, collection_name, deposit_id):
"""Delete the container (deposit).
Source: http://swordapp.github.io/SWORDv2-Profile/SWORDProfile.html
#protocoloperations_deleteconteiner
"""
return self._delete_deposit(collection_name, deposit_id)
diff --git a/swh/deposit/api/service_document.py b/swh/deposit/api/service_document.py
index 35905247..9ecac083 100644
--- a/swh/deposit/api/service_document.py
+++ b/swh/deposit/api/service_document.py
@@ -1,28 +1,29 @@
-# Copyright (C) 2017 The Software Heritage developers
+# Copyright (C) 2017-2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from django.shortcuts import render
-from .common import SWHBaseDeposit, ACCEPT_PACKAGINGS, ACCEPT_CONTENT_TYPES
+from .common import SWHBaseDeposit, ACCEPT_PACKAGINGS
+from .common import ACCEPT_ARCHIVE_CONTENT_TYPES
from ..models import DepositClient, DepositCollection
class SWHServiceDocument(SWHBaseDeposit):
def get(self, req, *args, **kwargs):
client = DepositClient.objects.get(username=req.user)
collections = []
for col_id in client.collections:
col = DepositCollection.objects.get(pk=col_id)
collections.append(col)
context = {
'max_upload_size': self.config['max_upload_size'],
'accept_packagings': ACCEPT_PACKAGINGS,
- 'accept_content_types': ACCEPT_CONTENT_TYPES,
+ 'accept_content_types': ACCEPT_ARCHIVE_CONTENT_TYPES,
'collections': collections,
}
return render(req, 'deposit/service_document.xml',
context, content_type='application/xml')
diff --git a/swh/deposit/parsers.py b/swh/deposit/parsers.py
index f0a4c6e7..5c4198c3 100644
--- a/swh/deposit/parsers.py
+++ b/swh/deposit/parsers.py
@@ -1,61 +1,68 @@
-# Copyright (C) 2017 The Software Heritage developers
+# Copyright (C) 2017-2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
"""Module in charge of defining parsers with SWORD 2.0 supported mediatypes.
"""
from decimal import Decimal
from rest_framework.parsers import FileUploadParser
from rest_framework.parsers import MultiPartParser
from rest_framework_xml.parsers import XMLParser
-class SWHFileUploadParser(FileUploadParser):
+class SWHFileUploadZipParser(FileUploadParser):
"""File upload parser limited to zip archive.
"""
media_type = 'application/zip'
+class SWHFileUploadTarParser(FileUploadParser):
+ """File upload parser limited to zip archive.
+
+ """
+ media_type = 'application/x-tar'
+
+
class SWHXMLParser(XMLParser):
def _type_convert(self, value):
"""Override the default type converter to avoid having decimal in the
resulting output.
"""
value = super()._type_convert(value)
if isinstance(value, Decimal):
value = str(value)
return value
class SWHAtomEntryParser(SWHXMLParser):
"""Atom entry parser limited to specific mediatype
"""
media_type = 'application/atom+xml;type=entry'
class SWHMultiPartParser(MultiPartParser):
"""Multipart parser limited to a subset of mediatypes.
"""
media_type = 'multipart/*; *'
def parse_xml(raw_content):
"""Parse xml body.
Args:
raw_content (bytes): The content to parse
Returns:
content parsed as dict.
"""
return SWHXMLParser().parse(raw_content)
diff --git a/swh/deposit/templates/deposit/service_document.xml b/swh/deposit/templates/deposit/service_document.xml
index f66dae40..aec0d822 100644
--- a/swh/deposit/templates/deposit/service_document.xml
+++ b/swh/deposit/templates/deposit/service_document.xml
@@ -1,24 +1,24 @@
2.0
{{ max_upload_size }}
The Software Heritage (SWH) Archive
{% for collection in collections %}
{{ collection.name }} Software Collection
- {% for accept_content_type in accept_content_types %}{{ accept_content_type }}{% endfor %}
- Collection Policy
+ {% for accept_content_type in accept_content_types %}{{ accept_content_type }}
+ {% endfor %}Collection Policy
Software Heritage Archive
Collect, Preserve, Share
false
false
- {% for accept_packaging in accept_packagings %}{{ accept_packaging }}{% endfor %}
- https://deposit.softwareheritage.org/1/{{ collection.name }}/
+ {% for accept_packaging in accept_packagings %}{{ accept_packaging }}
+ {% endfor %}https://deposit.softwareheritage.org/1/{{ collection.name }}/
{% endfor %}
diff --git a/swh/deposit/tests/api/test_deposit_binary.py b/swh/deposit/tests/api/test_deposit_binary.py
index f4d7a146..94b242b1 100644
--- a/swh/deposit/tests/api/test_deposit_binary.py
+++ b/swh/deposit/tests/api/test_deposit_binary.py
@@ -1,660 +1,660 @@
-# Copyright (C) 2017 The Software Heritage developers
+# Copyright (C) 2017-2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from django.core.files.uploadedfile import InMemoryUploadedFile
from django.core.urlresolvers import reverse
from io import BytesIO
from nose.tools import istest
from rest_framework import status
from rest_framework.test import APITestCase
from swh.deposit.tests import TEST_CONFIG
from swh.deposit.config import COL_IRI, EM_IRI
from swh.deposit.config import DEPOSIT_STATUS_READY_FOR_CHECKS
from swh.deposit.models import Deposit, DepositRequest
from swh.deposit.parsers import parse_xml
from ..common import BasicTestCase, WithAuthTestCase, create_arborescence_zip
from ..common import FileSystemCreationRoutine
class DepositTestCase(APITestCase, WithAuthTestCase, BasicTestCase,
FileSystemCreationRoutine):
"""Try and upload one single deposit
"""
def setUp(self):
super().setUp()
self.atom_entry_data0 = b"""
Awesome Compiler
hal
urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a
%s
2017-10-07T15:17:08Z
some awesome author
something
awesome-compiler
This is an awesome compiler destined to
awesomely compile stuff
and other stuff
compiler,programming,language
2005-10-07T17:17:08Z
2005-10-07T17:17:08Z
release note
related link
Awesome
https://hoster.org/awesome-compiler
GNU/Linux
0.0.1
running
all
"""
self.atom_entry_data1 = b"""
hal
urn:uuid:2225c695-cfb8-4ebb-aaaa-80da344efa6a
2017-10-07T15:17:08Z
some awesome author
something
awesome-compiler
This is an awesome compiler destined to
awesomely compile stuff
and other stuff
compiler,programming,language
2005-10-07T17:17:08Z
2005-10-07T17:17:08Z
release note
related link
Awesome
https://hoster.org/awesome-compiler
GNU/Linux
0.0.1
running
all
"""
self.atom_entry_data2 = b"""
%s
"""
self.atom_entry_data_empty_body = b"""
"""
self.atom_entry_data3 = b"""
something
"""
self.data_atom_entry_ok = b"""
Title
urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a
2005-10-07T17:17:08Z
Contributor
The abstract
The abstract
Access Rights
Alternative Title
Date Available
Bibliographic Citation # noqa
Contributor
Description
Has Part
Has Version
Identifier
Is Part Of
Publisher
References
Rights Holder
Source
Title
Type
"""
@istest
def post_deposit_binary_without_slug_header_is_bad_request(self):
"""Posting a binary deposit without slug header should return 400
"""
url = reverse(COL_IRI, args=[self.collection.name])
# when
response = self.client.post(
url,
content_type='application/zip', # as zip
data=self.archive['data'],
# + headers
CONTENT_LENGTH=self.archive['length'],
HTTP_CONTENT_MD5=self.archive['md5sum'],
HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip',
HTTP_IN_PROGRESS='false',
HTTP_CONTENT_DISPOSITION='attachment; filename=filename0')
self.assertIn(b'Missing SLUG header', response.content)
self.assertEqual(response.status_code,
status.HTTP_400_BAD_REQUEST)
@istest
def post_deposit_binary_upload_final_and_status_check(self):
"""Binary upload with correct headers should return 201 with receipt
"""
# given
url = reverse(COL_IRI, args=[self.collection.name])
external_id = 'some-external-id-1'
# when
response = self.client.post(
url,
content_type='application/zip', # as zip
data=self.archive['data'],
# + headers
CONTENT_LENGTH=self.archive['length'],
# other headers needs HTTP_ prefix to be taken into account
HTTP_SLUG=external_id,
HTTP_CONTENT_MD5=self.archive['md5sum'],
HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip',
HTTP_IN_PROGRESS='false',
HTTP_CONTENT_DISPOSITION='attachment; filename=%s' % (
self.archive['name'], ))
# then
response_content = parse_xml(BytesIO(response.content))
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
deposit_id = response_content[
'{http://www.w3.org/2005/Atom}deposit_id']
deposit = Deposit.objects.get(pk=deposit_id)
self.assertEqual(deposit.status, DEPOSIT_STATUS_READY_FOR_CHECKS)
self.assertEqual(deposit.external_id, external_id)
self.assertEqual(deposit.collection, self.collection)
self.assertEqual(deposit.client, self.user)
self.assertIsNone(deposit.swh_id)
deposit_request = DepositRequest.objects.get(deposit=deposit)
self.assertEquals(deposit_request.deposit, deposit)
self.assertRegex(deposit_request.archive.name, self.archive['name'])
response_content = parse_xml(BytesIO(response.content))
self.assertEqual(
response_content['{http://www.w3.org/2005/Atom}deposit_archive'],
self.archive['name'])
self.assertEqual(
response_content['{http://www.w3.org/2005/Atom}deposit_id'],
deposit.id)
self.assertEqual(
response_content['{http://www.w3.org/2005/Atom}deposit_status'],
deposit.status)
edit_se_iri = reverse('edit_se_iri',
args=[self.collection.name, deposit.id])
self.assertEqual(response._headers['location'],
('Location', 'http://testserver' + edit_se_iri))
@istest
- def post_deposit_binary_upload_only_supports_zip(self):
- """Binary upload without content_type application/zip should return 415
+ def post_deposit_binary_upload_supports_zip_or_tar(self):
+ """Binary upload with content-type not in [zip,x-tar] should return 415
"""
# given
url = reverse(COL_IRI, args=[self.collection.name])
external_id = 'some-external-id-1'
# when
response = self.client.post(
url,
content_type='application/octet-stream',
data=self.archive['data'],
# + headers
CONTENT_LENGTH=self.archive['length'],
HTTP_SLUG=external_id,
HTTP_CONTENT_MD5=self.archive['md5sum'],
HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip',
HTTP_IN_PROGRESS='false',
HTTP_CONTENT_DISPOSITION='attachment; filename=filename0')
# then
self.assertEqual(response.status_code,
status.HTTP_415_UNSUPPORTED_MEDIA_TYPE)
with self.assertRaises(Deposit.DoesNotExist):
Deposit.objects.get(external_id=external_id)
@istest
def post_deposit_binary_fails_if_unsupported_packaging_header(
self):
"""Bin deposit without supported content_disposition header returns 400
"""
# given
url = reverse(COL_IRI, args=[self.collection.name])
external_id = 'some-external-id'
# when
response = self.client.post(
url,
content_type='application/zip',
data=self.archive['data'],
# + headers
CONTENT_LENGTH=self.archive['length'],
HTTP_SLUG=external_id,
HTTP_CONTENT_MD5=self.archive['md5sum'],
HTTP_PACKAGING='something-unsupported',
HTTP_CONTENT_DISPOSITION='attachment; filename=filename0')
# then
self.assertEqual(response.status_code,
status.HTTP_400_BAD_REQUEST)
with self.assertRaises(Deposit.DoesNotExist):
Deposit.objects.get(external_id=external_id)
@istest
def post_deposit_binary_upload_fail_if_no_content_disposition_header(
self):
"""Binary upload without content_disposition header should return 400
"""
# given
url = reverse(COL_IRI, args=[self.collection.name])
external_id = 'some-external-id'
# when
response = self.client.post(
url,
content_type='application/zip',
data=self.archive['data'],
# + headers
CONTENT_LENGTH=self.archive['length'],
HTTP_SLUG=external_id,
HTTP_CONTENT_MD5=self.archive['md5sum'],
HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip',
HTTP_IN_PROGRESS='false')
# then
self.assertEqual(response.status_code,
status.HTTP_400_BAD_REQUEST)
with self.assertRaises(Deposit.DoesNotExist):
Deposit.objects.get(external_id=external_id)
@istest
def post_deposit_mediation_not_supported(self):
"""Binary upload with mediation should return a 412 response
"""
# given
url = reverse(COL_IRI, args=[self.collection.name])
external_id = 'some-external-id-1'
# when
response = self.client.post(
url,
content_type='application/zip',
data=self.archive['data'],
# + headers
CONTENT_LENGTH=self.archive['length'],
HTTP_SLUG=external_id,
HTTP_CONTENT_MD5=self.archive['md5sum'],
HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip',
HTTP_IN_PROGRESS='false',
HTTP_ON_BEHALF_OF='someone',
HTTP_CONTENT_DISPOSITION='attachment; filename=filename0')
# then
self.assertEqual(response.status_code,
status.HTTP_412_PRECONDITION_FAILED)
with self.assertRaises(Deposit.DoesNotExist):
Deposit.objects.get(external_id=external_id)
@istest
def post_deposit_binary_upload_fail_if_upload_size_limit_exceeded(
self):
"""Binary upload must not exceed the limit set up...
"""
# given
url = reverse(COL_IRI, args=[self.collection.name])
archive = create_arborescence_zip(
self.root_path, 'archive2', 'file2', b'some content in file',
up_to_size=TEST_CONFIG['max_upload_size'])
external_id = 'some-external-id'
# when
response = self.client.post(
url,
content_type='application/zip',
data=archive['data'],
# + headers
CONTENT_LENGTH=archive['length'],
HTTP_SLUG=external_id,
HTTP_CONTENT_MD5=archive['md5sum'],
HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip',
HTTP_IN_PROGRESS='false',
HTTP_CONTENT_DISPOSITION='attachment; filename=filename0')
# then
self.assertEqual(response.status_code,
status.HTTP_413_REQUEST_ENTITY_TOO_LARGE)
self.assertRegex(response.content, b'Upload size limit exceeded')
with self.assertRaises(Deposit.DoesNotExist):
Deposit.objects.get(external_id=external_id)
@istest
def post_deposit_2_post_2_different_deposits(self):
"""2 posting deposits should return 2 different 201 with receipt
"""
url = reverse(COL_IRI, args=[self.collection.name])
# when
response = self.client.post(
url,
content_type='application/zip', # as zip
data=self.archive['data'],
# + headers
CONTENT_LENGTH=self.archive['length'],
HTTP_SLUG='some-external-id-1',
HTTP_CONTENT_MD5=self.archive['md5sum'],
HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip',
HTTP_IN_PROGRESS='false',
HTTP_CONTENT_DISPOSITION='attachment; filename=filename0')
# then
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
response_content = parse_xml(BytesIO(response.content))
deposit_id = response_content[
'{http://www.w3.org/2005/Atom}deposit_id']
deposit = Deposit.objects.get(pk=deposit_id)
deposits = Deposit.objects.all()
self.assertEqual(len(deposits), 1)
self.assertEqual(deposits[0], deposit)
# second post
response = self.client.post(
url,
- content_type='application/zip', # as zip
+ content_type='application/x-tar', # as zip
data=self.archive['data'],
# + headers
CONTENT_LENGTH=self.archive['length'],
HTTP_SLUG='another-external-id',
HTTP_CONTENT_MD5=self.archive['md5sum'],
HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip',
HTTP_IN_PROGRESS='false',
HTTP_CONTENT_DISPOSITION='attachment; filename=filename1')
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
response_content = parse_xml(BytesIO(response.content))
deposit_id2 = response_content[
'{http://www.w3.org/2005/Atom}deposit_id']
deposit2 = Deposit.objects.get(pk=deposit_id2)
self.assertNotEqual(deposit, deposit2)
deposits = Deposit.objects.all().order_by('id')
self.assertEqual(len(deposits), 2)
self.assertEqual(list(deposits), [deposit, deposit2])
@istest
def post_deposit_binary_and_post_to_add_another_archive(self):
"""Updating a deposit should return a 201 with receipt
"""
# given
url = reverse(COL_IRI, args=[self.collection.name])
external_id = 'some-external-id-1'
# when
response = self.client.post(
url,
content_type='application/zip', # as zip
data=self.archive['data'],
# + headers
CONTENT_LENGTH=self.archive['length'],
HTTP_SLUG=external_id,
HTTP_CONTENT_MD5=self.archive['md5sum'],
HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip',
HTTP_IN_PROGRESS='true',
HTTP_CONTENT_DISPOSITION='attachment; filename=%s' % (
self.archive['name'], ))
# then
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
response_content = parse_xml(BytesIO(response.content))
deposit_id = response_content[
'{http://www.w3.org/2005/Atom}deposit_id']
deposit = Deposit.objects.get(pk=deposit_id)
self.assertEqual(deposit.status, 'partial')
self.assertEqual(deposit.external_id, external_id)
self.assertEqual(deposit.collection, self.collection)
self.assertEqual(deposit.client, self.user)
self.assertIsNone(deposit.swh_id)
deposit_request = DepositRequest.objects.get(deposit=deposit)
self.assertEquals(deposit_request.deposit, deposit)
self.assertEquals(deposit_request.type.name, 'archive')
self.assertRegex(deposit_request.archive.name, self.archive['name'])
# 2nd archive to upload
archive2 = create_arborescence_zip(
self.root_path, 'archive2', 'file2', b'some other content in file')
# uri to update the content
update_uri = reverse(EM_IRI, args=[self.collection.name, deposit_id])
# adding another archive for the deposit and finalizing it
response = self.client.post(
update_uri,
content_type='application/zip', # as zip
data=archive2['data'],
# + headers
CONTENT_LENGTH=archive2['length'],
HTTP_SLUG=external_id,
HTTP_CONTENT_MD5=archive2['md5sum'],
HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip',
HTTP_CONTENT_DISPOSITION='attachment; filename=%s' % (
archive2['name']))
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
response_content = parse_xml(BytesIO(response.content))
deposit = Deposit.objects.get(pk=deposit_id)
self.assertEqual(deposit.status, DEPOSIT_STATUS_READY_FOR_CHECKS)
self.assertEqual(deposit.external_id, external_id)
self.assertEqual(deposit.collection, self.collection)
self.assertEqual(deposit.client, self.user)
self.assertIsNone(deposit.swh_id)
deposit_requests = list(DepositRequest.objects.filter(deposit=deposit).
order_by('id'))
# 2 deposit requests for the same deposit
self.assertEquals(len(deposit_requests), 2)
self.assertEquals(deposit_requests[0].deposit, deposit)
self.assertEquals(deposit_requests[0].type.name, 'archive')
self.assertRegex(deposit_requests[0].archive.name,
self.archive['name'])
self.assertEquals(deposit_requests[1].deposit, deposit)
self.assertEquals(deposit_requests[1].type.name, 'archive')
self.assertRegex(deposit_requests[1].archive.name,
archive2['name'])
# only 1 deposit in db
deposits = Deposit.objects.all()
self.assertEqual(len(deposits), 1)
@istest
def post_deposit_then_post_or_put_is_refused_when_status_ready(self):
"""Updating a deposit with status 'ready' should return a 400
"""
url = reverse(COL_IRI, args=[self.collection.name])
external_id = 'some-external-id-1'
# when
response = self.client.post(
url,
content_type='application/zip', # as zip
data=self.archive['data'],
# + headers
CONTENT_LENGTH=self.archive['length'],
HTTP_SLUG=external_id,
HTTP_CONTENT_MD5=self.archive['md5sum'],
HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip',
HTTP_IN_PROGRESS='false',
HTTP_CONTENT_DISPOSITION='attachment; filename=filename0')
# then
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
response_content = parse_xml(BytesIO(response.content))
deposit_id = response_content[
'{http://www.w3.org/2005/Atom}deposit_id']
deposit = Deposit.objects.get(pk=deposit_id)
self.assertEqual(deposit.status, DEPOSIT_STATUS_READY_FOR_CHECKS)
self.assertEqual(deposit.external_id, external_id)
self.assertEqual(deposit.collection, self.collection)
self.assertEqual(deposit.client, self.user)
self.assertIsNone(deposit.swh_id)
deposit_request = DepositRequest.objects.get(deposit=deposit)
self.assertEquals(deposit_request.deposit, deposit)
self.assertRegex(deposit_request.archive.name, 'filename0')
# updating/adding is forbidden
# uri to update the content
edit_se_iri = reverse(
'edit_se_iri', args=[self.collection.name, deposit_id])
em_iri = reverse(
'em_iri', args=[self.collection.name, deposit_id])
# Testing all update/add endpoint should fail
# since the status is ready
archive2 = create_arborescence_zip(
self.root_path, 'archive2', 'file2', b'some content in file 2')
# replacing file is no longer possible since the deposit's
# status is ready
r = self.client.put(
em_iri,
content_type='application/zip',
data=archive2['data'],
CONTENT_LENGTH=archive2['length'],
HTTP_SLUG=external_id,
HTTP_CONTENT_MD5=archive2['md5sum'],
HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip',
HTTP_IN_PROGRESS='false',
HTTP_CONTENT_DISPOSITION='attachment; filename=filename0')
self.assertEquals(r.status_code, status.HTTP_400_BAD_REQUEST)
# adding file is no longer possible since the deposit's status
# is ready
r = self.client.post(
em_iri,
content_type='application/zip',
data=archive2['data'],
CONTENT_LENGTH=archive2['length'],
HTTP_SLUG=external_id,
HTTP_CONTENT_MD5=archive2['md5sum'],
HTTP_PACKAGING='http://purl.org/net/sword/package/SimpleZip',
HTTP_IN_PROGRESS='false',
HTTP_CONTENT_DISPOSITION='attachment; filename=filename0')
self.assertEquals(r.status_code, status.HTTP_400_BAD_REQUEST)
# replacing metadata is no longer possible since the deposit's
# status is ready
r = self.client.put(
edit_se_iri,
content_type='application/atom+xml;type=entry',
data=self.data_atom_entry_ok,
CONTENT_LENGTH=len(self.data_atom_entry_ok),
HTTP_SLUG=external_id)
self.assertEquals(r.status_code, status.HTTP_400_BAD_REQUEST)
# adding new metadata is no longer possible since the
# deposit's status is ready
r = self.client.post(
edit_se_iri,
content_type='application/atom+xml;type=entry',
data=self.data_atom_entry_ok,
CONTENT_LENGTH=len(self.data_atom_entry_ok),
HTTP_SLUG=external_id)
self.assertEquals(r.status_code, status.HTTP_400_BAD_REQUEST)
archive_content = b'some content representing archive'
archive = InMemoryUploadedFile(
BytesIO(archive_content),
field_name='archive0',
name='archive0',
content_type='application/zip',
size=len(archive_content),
charset=None)
atom_entry = InMemoryUploadedFile(
BytesIO(self.data_atom_entry_ok),
field_name='atom0',
name='atom0',
content_type='application/atom+xml; charset="utf-8"',
size=len(self.data_atom_entry_ok),
charset='utf-8')
# replacing multipart metadata is no longer possible since the
# deposit's status is ready
r = self.client.put(
edit_se_iri,
format='multipart',
data={
'archive': archive,
'atom_entry': atom_entry,
})
self.assertEquals(r.status_code, status.HTTP_400_BAD_REQUEST)
# adding new metadata is no longer possible since the
# deposit's status is ready
r = self.client.post(
edit_se_iri,
format='multipart',
data={
'archive': archive,
'atom_entry': atom_entry,
})
self.assertEquals(r.status_code, status.HTTP_400_BAD_REQUEST)
diff --git a/swh/deposit/tests/api/test_deposit_multipart.py b/swh/deposit/tests/api/test_deposit_multipart.py
index 1b0f5bb1..fa01b4f5 100644
--- a/swh/deposit/tests/api/test_deposit_multipart.py
+++ b/swh/deposit/tests/api/test_deposit_multipart.py
@@ -1,327 +1,393 @@
-# Copyright (C) 2017 The Software Heritage developers
+# Copyright (C) 2017-2018 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from django.core.files.uploadedfile import InMemoryUploadedFile
from django.core.urlresolvers import reverse
from io import BytesIO
from nose.tools import istest
from rest_framework import status
from rest_framework.test import APITestCase
from swh.deposit.config import COL_IRI
from swh.deposit.config import DEPOSIT_STATUS_READY_FOR_CHECKS
from swh.deposit.models import Deposit, DepositRequest
from swh.deposit.parsers import parse_xml
from ..common import BasicTestCase, WithAuthTestCase
from ..common import FileSystemCreationRoutine
class DepositMultipartTestCase(APITestCase, WithAuthTestCase, BasicTestCase,
FileSystemCreationRoutine):
"""Post multipart deposit scenario
"""
def setUp(self):
super().setUp()
self.data_atom_entry_ok = b"""
Title
urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a
2005-10-07T17:17:08Z
Contributor
The abstract
The abstract
Access Rights
Alternative Title
Date Available
Bibliographic Citation # noqa
Contributor
Description
Has Part
Has Version
Identifier
Is Part Of
Publisher
References
Rights Holder
Source
Title
Type
"""
self.data_atom_entry_update_in_place = """
urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa7b
Title
Type
"""
@istest
def post_deposit_multipart_without_slug_header_is_bad_request(self):
# given
url = reverse(COL_IRI, args=[self.collection.name])
data_atom_entry = self.data_atom_entry_ok
archive_content = b'some content representing archive'
archive = InMemoryUploadedFile(
BytesIO(archive_content),
field_name='archive0',
name='archive0',
content_type='application/zip',
size=len(archive_content),
charset=None)
atom_entry = InMemoryUploadedFile(
BytesIO(data_atom_entry),
field_name='atom0',
name='atom0',
content_type='application/atom+xml; charset="utf-8"',
size=len(data_atom_entry),
charset='utf-8')
# when
response = self.client.post(
url,
format='multipart',
data={
'archive': archive,
'atom_entry': atom_entry,
},
# + headers
HTTP_IN_PROGRESS='false')
self.assertIn(b'Missing SLUG header', response.content)
self.assertEqual(response.status_code,
status.HTTP_400_BAD_REQUEST)
@istest
- def post_deposit_multipart(self):
- """one multipart deposit should be accepted
+ def post_deposit_multipart_zip(self):
+ """one multipart deposit (zip+xml) should be accepted
"""
# given
url = reverse(COL_IRI, args=[self.collection.name])
# from django.core.files import uploadedfile
data_atom_entry = self.data_atom_entry_ok
archive = InMemoryUploadedFile(
BytesIO(self.archive['data']),
field_name=self.archive['name'],
name=self.archive['name'],
content_type='application/zip',
size=self.archive['length'],
charset=None)
atom_entry = InMemoryUploadedFile(
BytesIO(data_atom_entry),
field_name='atom0',
name='atom0',
content_type='application/atom+xml; charset="utf-8"',
size=len(data_atom_entry),
charset='utf-8')
external_id = 'external-id'
# when
response = self.client.post(
url,
format='multipart',
data={
'archive': archive,
'atom_entry': atom_entry,
},
# + headers
HTTP_IN_PROGRESS='false',
HTTP_SLUG=external_id)
# then
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
response_content = parse_xml(BytesIO(response.content))
deposit_id = response_content[
'{http://www.w3.org/2005/Atom}deposit_id']
deposit = Deposit.objects.get(pk=deposit_id)
self.assertEqual(deposit.status, DEPOSIT_STATUS_READY_FOR_CHECKS)
self.assertEqual(deposit.external_id, external_id)
self.assertEqual(deposit.collection, self.collection)
self.assertEqual(deposit.client, self.user)
self.assertIsNone(deposit.swh_id)
deposit_requests = DepositRequest.objects.filter(deposit=deposit)
self.assertEquals(len(deposit_requests), 2)
for deposit_request in deposit_requests:
self.assertEquals(deposit_request.deposit, deposit)
if deposit_request.type.name == 'archive':
self.assertRegex(deposit_request.archive.name,
self.archive['name'])
else:
self.assertEquals(
deposit_request.metadata[
'{http://www.w3.org/2005/Atom}id'],
'urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a')
+ @istest
+ def post_deposit_multipart_tar(self):
+ """one multipart deposit (tar+xml) should be accepted
+
+ """
+ # given
+ url = reverse(COL_IRI, args=[self.collection.name])
+
+ # from django.core.files import uploadedfile
+ data_atom_entry = self.data_atom_entry_ok
+
+ archive = InMemoryUploadedFile(
+ BytesIO(self.archive['data']),
+ field_name=self.archive['name'],
+ name=self.archive['name'],
+ content_type='application/x-tar',
+ size=self.archive['length'],
+ charset=None)
+
+ atom_entry = InMemoryUploadedFile(
+ BytesIO(data_atom_entry),
+ field_name='atom0',
+ name='atom0',
+ content_type='application/atom+xml; charset="utf-8"',
+ size=len(data_atom_entry),
+ charset='utf-8')
+
+ external_id = 'external-id'
+
+ # when
+ response = self.client.post(
+ url,
+ format='multipart',
+ data={
+ 'archive': archive,
+ 'atom_entry': atom_entry,
+ },
+ # + headers
+ HTTP_IN_PROGRESS='false',
+ HTTP_SLUG=external_id)
+
+ # then
+ self.assertEqual(response.status_code, status.HTTP_201_CREATED)
+
+ response_content = parse_xml(BytesIO(response.content))
+ deposit_id = response_content[
+ '{http://www.w3.org/2005/Atom}deposit_id']
+
+ deposit = Deposit.objects.get(pk=deposit_id)
+ self.assertEqual(deposit.status, DEPOSIT_STATUS_READY_FOR_CHECKS)
+ self.assertEqual(deposit.external_id, external_id)
+ self.assertEqual(deposit.collection, self.collection)
+ self.assertEqual(deposit.client, self.user)
+ self.assertIsNone(deposit.swh_id)
+
+ deposit_requests = DepositRequest.objects.filter(deposit=deposit)
+ self.assertEquals(len(deposit_requests), 2)
+ for deposit_request in deposit_requests:
+ self.assertEquals(deposit_request.deposit, deposit)
+ if deposit_request.type.name == 'archive':
+ self.assertRegex(deposit_request.archive.name,
+ self.archive['name'])
+ else:
+ self.assertEquals(
+ deposit_request.metadata[
+ '{http://www.w3.org/2005/Atom}id'],
+ 'urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a')
+
@istest
def post_deposit_multipart_put_to_replace_metadata(self):
"""One multipart deposit followed by a metadata update should be
accepted
"""
# given
url = reverse(COL_IRI, args=[self.collection.name])
data_atom_entry = self.data_atom_entry_ok
archive = InMemoryUploadedFile(
BytesIO(self.archive['data']),
field_name=self.archive['name'],
name=self.archive['name'],
content_type='application/zip',
size=self.archive['length'],
charset=None)
atom_entry = InMemoryUploadedFile(
BytesIO(data_atom_entry),
field_name='atom0',
name='atom0',
content_type='application/atom+xml; charset="utf-8"',
size=len(data_atom_entry),
charset='utf-8')
external_id = 'external-id'
# when
response = self.client.post(
url,
format='multipart',
data={
'archive': archive,
'atom_entry': atom_entry,
},
# + headers
HTTP_IN_PROGRESS='true',
HTTP_SLUG=external_id)
# then
self.assertEqual(response.status_code, status.HTTP_201_CREATED)
response_content = parse_xml(BytesIO(response.content))
deposit_id = response_content[
'{http://www.w3.org/2005/Atom}deposit_id']
deposit = Deposit.objects.get(pk=deposit_id)
self.assertEqual(deposit.status, 'partial')
self.assertEqual(deposit.external_id, external_id)
self.assertEqual(deposit.collection, self.collection)
self.assertEqual(deposit.client, self.user)
self.assertIsNone(deposit.swh_id)
deposit_requests = DepositRequest.objects.filter(deposit=deposit)
self.assertEquals(len(deposit_requests), 2)
for deposit_request in deposit_requests:
self.assertEquals(deposit_request.deposit, deposit)
if deposit_request.type.name == 'archive':
self.assertRegex(deposit_request.archive.name,
self.archive['name'])
else:
self.assertEquals(
deposit_request.metadata[
'{http://www.w3.org/2005/Atom}id'],
'urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a')
replace_metadata_uri = response._headers['location'][1]
response = self.client.put(
replace_metadata_uri,
content_type='application/atom+xml;type=entry',
data=self.data_atom_entry_update_in_place,
HTTP_IN_PROGRESS='false')
self.assertEqual(response.status_code, status.HTTP_204_NO_CONTENT)
# deposit_id did not change
deposit = Deposit.objects.get(pk=deposit_id)
self.assertEqual(deposit.status, DEPOSIT_STATUS_READY_FOR_CHECKS)
self.assertEqual(deposit.external_id, external_id)
self.assertEqual(deposit.collection, self.collection)
self.assertEqual(deposit.client, self.user)
self.assertIsNone(deposit.swh_id)
deposit_requests = DepositRequest.objects.filter(deposit=deposit)
self.assertEquals(len(deposit_requests), 2)
for deposit_request in deposit_requests:
self.assertEquals(deposit_request.deposit, deposit)
if deposit_request.type.name == 'archive':
self.assertRegex(deposit_request.archive.name,
self.archive['name'])
else:
self.assertEquals(
deposit_request.metadata[
'{http://www.w3.org/2005/Atom}id'],
'urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa7b')
# FAILURE scenarios
@istest
def post_deposit_multipart_only_archive_and_atom_entry(self):
"""Multipart deposit only accepts one archive and one atom+xml"""
# given
url = reverse(COL_IRI, args=[self.collection.name])
- # from django.core.files import uploadedfile
-
archive_content = b'some content representing archive'
archive = InMemoryUploadedFile(BytesIO(archive_content),
field_name='archive0',
name='archive0',
- content_type='application/zip',
+ content_type='application/x-tar',
size=len(archive_content),
charset=None)
other_archive_content = b"some-other-content"
other_archive = InMemoryUploadedFile(BytesIO(other_archive_content),
field_name='atom0',
name='atom0',
- content_type='application/zip',
+ content_type='application/x-tar',
size=len(other_archive_content),
charset='utf-8')
# when
response = self.client.post(
url,
format='multipart',
data={
'archive': archive,
'atom_entry': other_archive,
},
# + headers
HTTP_IN_PROGRESS='false',
HTTP_SLUG='external-id')
# then
self.assertEqual(response.status_code,
status.HTTP_415_UNSUPPORTED_MEDIA_TYPE)
# when
archive.seek(0)
response = self.client.post(
url,
format='multipart',
data={
'archive': archive,
},
# + headers
HTTP_IN_PROGRESS='false',
HTTP_SLUG='external-id')
# then
self.assertEqual(response.status_code,
status.HTTP_415_UNSUPPORTED_MEDIA_TYPE)
diff --git a/swh/deposit/tests/api/test_service_document.py b/swh/deposit/tests/api/test_service_document.py
index 6db695b4..453d16a1 100644
--- a/swh/deposit/tests/api/test_service_document.py
+++ b/swh/deposit/tests/api/test_service_document.py
@@ -1,101 +1,102 @@
# Copyright (C) 2017 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
from django.core.urlresolvers import reverse
from nose.tools import istest
from rest_framework import status
from rest_framework.test import APITestCase
from swh.deposit.tests import TEST_CONFIG
from swh.deposit.config import SD_IRI
from ..common import BasicTestCase, WithAuthTestCase
class ServiceDocumentNoAuthCase(APITestCase, BasicTestCase):
"""Service document endpoints are protected with basic authentication.
"""
@istest
def service_document_no_authentication_fails(self):
"""Without authentication, service document endpoint should return 401
"""
url = reverse(SD_IRI)
response = self.client.get(url)
self.assertEqual(response.status_code, status.HTTP_401_UNAUTHORIZED)
@istest
def service_document_with_http_accept_should_not_break(self):
"""Without auth, sd endpoint through browser should return 401
"""
url = reverse(SD_IRI)
# when
response = self.client.get(
url,
HTTP_ACCEPT='text/html,application/xml;q=9,*/*,q=8')
self.assertEqual(response.status_code, status.HTTP_401_UNAUTHORIZED)
class ServiceDocumentCase(APITestCase, WithAuthTestCase, BasicTestCase):
def assertResponseOk(self, response):
self.assertEqual(response.status_code, status.HTTP_200_OK)
self.assertEquals(response.content.decode('utf-8'),
'''
2.0
%s
The Software Heritage (SWH) Archive
%s Software Collection
application/zip
+ application/x-tar
Collection Policy
Software Heritage Archive
Collect, Preserve, Share
false
false
http://purl.org/net/sword/package/SimpleZip
https://deposit.softwareheritage.org/1/%s/
''' % (TEST_CONFIG['max_upload_size'], self.username, self.username, self.username)) # noqa
@istest
def service_document(self):
"""With authentication, service document list user's collection
"""
url = reverse(SD_IRI)
# when
response = self.client.get(url)
# then
self.assertResponseOk(response)
@istest
def service_document_with_http_accept_header(self):
"""With authentication, with browser, sd list user's collection
"""
url = reverse(SD_IRI)
# when
response = self.client.get(
url,
HTTP_ACCEPT='text/html,application/xml;q=9,*/*,q=8')
self.assertResponseOk(response)