diff --git a/swh/deposit/api/common.py b/swh/deposit/api/common.py
--- a/swh/deposit/api/common.py
+++ b/swh/deposit/api/common.py
@@ -27,7 +27,8 @@
MAX_UPLOAD_SIZE_EXCEEDED, BAD_REQUEST, ERROR_CONTENT,
CHECKSUM_MISMATCH, make_error_dict, MEDIATION_NOT_ALLOWED,
make_error_response_from_dict, FORBIDDEN,
- NOT_FOUND, make_error_response, METHOD_NOT_ALLOWED
+ NOT_FOUND, make_error_response, METHOD_NOT_ALLOWED,
+ ParserError, PARSING_ERROR
)
from ..models import (
Deposit, DepositRequest, DepositCollection,
@@ -502,8 +503,15 @@
if precondition_status_response:
return precondition_status_response
- raw_metadata, metadata = self._read_metadata(
- data['application/atom+xml'])
+ try:
+ raw_metadata, metadata = self._read_metadata(
+ data['application/atom+xml'])
+ except ParserError:
+ return make_error_dict(
+ PARSING_ERROR,
+ 'Malformed xml metadata',
+ "The xml received is malformed. "
+ "Please ensure your metadata file is correctly formatted.")
# actual storage of data
deposit = self._deposit_put(deposit_id=deposit_id,
@@ -560,7 +568,15 @@
- 415 (unsupported media type) if a wrong media type is provided
"""
- raw_metadata, metadata = self._read_metadata(req.data)
+ try:
+ raw_metadata, metadata = self._read_metadata(req.data)
+ except ParserError:
+ return make_error_dict(
+ BAD_REQUEST,
+ 'Malformed xml metadata',
+ "The xml received is malformed. "
+ "Please ensure your metadata file is correctly formatted.")
+
if not metadata:
return make_error_dict(
BAD_REQUEST,
diff --git a/swh/deposit/errors.py b/swh/deposit/errors.py
--- a/swh/deposit/errors.py
+++ b/swh/deposit/errors.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2017 The Software Heritage developers
+# Copyright (C) 2017-2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -20,6 +20,14 @@
MEDIATION_NOT_ALLOWED = 'mediation-not-allowed'
METHOD_NOT_ALLOWED = 'method-not-allowed'
MAX_UPLOAD_SIZE_EXCEEDED = 'max_upload_size_exceeded'
+PARSING_ERROR = 'parsing-error'
+
+
+class ParserError(ValueError):
+ """Specific parsing error detected when parsing the xml metadata input
+
+ """
+ pass
ERRORS = {
@@ -53,6 +61,11 @@
'iri': 'http://purl.org/net/sword/error/ErrorBadRequest',
'tag': 'sword:ErrorBadRequest',
},
+ PARSING_ERROR: {
+ 'status': status.HTTP_400_BAD_REQUEST,
+ 'iri': 'http://purl.org/net/sword/error/ErrorBadRequest',
+ 'tag': 'sword:ErrorBadRequest',
+ },
MEDIATION_NOT_ALLOWED: {
'status': status.HTTP_412_PRECONDITION_FAILED,
'iri': 'http://purl.org/net/sword/error/MediationNotAllowed',
diff --git a/swh/deposit/parsers.py b/swh/deposit/parsers.py
--- a/swh/deposit/parsers.py
+++ b/swh/deposit/parsers.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2017-2018 The Software Heritage developers
+# Copyright (C) 2017-2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -14,6 +14,9 @@
from rest_framework.parsers import BaseParser
from rest_framework.parsers import FileUploadParser
from rest_framework.parsers import MultiPartParser
+from xml.parsers.expat import ExpatError
+
+from swh.deposit.errors import ParserError
class SWHFileUploadZipParser(FileUploadParser):
@@ -76,8 +79,14 @@
Args:
raw_content (bytes): The content to parse
+ Raises:
+ ParserError in case of a malformed xml
+
Returns:
content parsed as dict.
"""
- return SWHXMLParser().parse(raw_content)
+ try:
+ return SWHXMLParser().parse(raw_content)
+ except ExpatError as e:
+ raise ParserError(str(e))
diff --git a/swh/deposit/tests/api/test_deposit_atom.py b/swh/deposit/tests/api/test_deposit_atom.py
--- a/swh/deposit/tests/api/test_deposit_atom.py
+++ b/swh/deposit/tests/api/test_deposit_atom.py
@@ -297,6 +297,21 @@
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
def test_post_deposit_atom_without_slug_header_is_bad_request(self):
+ def test_post_deposit_atom_400_with_parsing_error(self):
+ """Posting parsing error prone atom should return 400
+
+ """
+ atom_entry_data_parsing_error_prone = b"""
+
+ Composing a Web of Audio Applications
+
+
+"""
+ response = self.client.post(
+ reverse(COL_IRI, args=[self.collection.name]),
+ content_type='application/atom+xml;type=entry',
+ data=atom_entry_data_parsing_error_prone)
+ self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
"""Posting an atom entry without a slug header should return a 400
"""
diff --git a/swh/deposit/tests/api/test_deposit_multipart.py b/swh/deposit/tests/api/test_deposit_multipart.py
--- a/swh/deposit/tests/api/test_deposit_multipart.py
+++ b/swh/deposit/tests/api/test_deposit_multipart.py
@@ -400,3 +400,49 @@
'application/x-tar) and 1 atom+xml entry for '
'multipart deposit' in response.content.decode('utf-8')
)
+
+ def test_post_deposit_multipart_400_when_badly_formatted_xml(self):
+ # given
+ url = reverse(COL_IRI, args=[self.collection.name])
+
+ data_atom_entry_ko = b"""
+
+
+ urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a
+
+"""
+
+ archive_content = b'some content representing archive'
+ archive = InMemoryUploadedFile(
+ BytesIO(archive_content),
+ field_name='archive0',
+ name='archive0',
+ content_type='application/zip',
+ size=len(archive_content),
+ charset=None)
+
+ atom_entry = InMemoryUploadedFile(
+ BytesIO(data_atom_entry_ko),
+ field_name='atom0',
+ name='atom0',
+ content_type='application/atom+xml; charset="utf-8"',
+ size=len(data_atom_entry_ko),
+ charset='utf-8')
+
+ # when
+ response = self.client.post(
+ url,
+ format='multipart',
+ data={
+ 'archive': archive,
+ 'atom_entry': atom_entry,
+ },
+ # + headers
+ HTTP_IN_PROGRESS='false',
+ HTTP_SLUG='external-id',
+ )
+
+ self.assertIn(b'Malformed xml metadata', response.content)
+ self.assertEqual(response.status_code,
+ status.HTTP_400_BAD_REQUEST)