diff --git a/swh/deposit/api/common.py b/swh/deposit/api/common.py --- a/swh/deposit/api/common.py +++ b/swh/deposit/api/common.py @@ -27,7 +27,8 @@ MAX_UPLOAD_SIZE_EXCEEDED, BAD_REQUEST, ERROR_CONTENT, CHECKSUM_MISMATCH, make_error_dict, MEDIATION_NOT_ALLOWED, make_error_response_from_dict, FORBIDDEN, - NOT_FOUND, make_error_response, METHOD_NOT_ALLOWED + NOT_FOUND, make_error_response, METHOD_NOT_ALLOWED, + ParserError, PARSING_ERROR ) from ..models import ( Deposit, DepositRequest, DepositCollection, @@ -502,8 +503,15 @@ if precondition_status_response: return precondition_status_response - raw_metadata, metadata = self._read_metadata( - data['application/atom+xml']) + try: + raw_metadata, metadata = self._read_metadata( + data['application/atom+xml']) + except ParserError: + return make_error_dict( + PARSING_ERROR, + 'Malformed xml metadata', + "The xml received is malformed. " + "Please ensure your metadata file is correctly formatted.") # actual storage of data deposit = self._deposit_put(deposit_id=deposit_id, @@ -560,7 +568,15 @@ - 415 (unsupported media type) if a wrong media type is provided """ - raw_metadata, metadata = self._read_metadata(req.data) + try: + raw_metadata, metadata = self._read_metadata(req.data) + except ParserError: + return make_error_dict( + BAD_REQUEST, + 'Malformed xml metadata', + "The xml received is malformed. " + "Please ensure your metadata file is correctly formatted.") + if not metadata: return make_error_dict( BAD_REQUEST, diff --git a/swh/deposit/errors.py b/swh/deposit/errors.py --- a/swh/deposit/errors.py +++ b/swh/deposit/errors.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017 The Software Heritage developers +# Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -20,6 +20,14 @@ MEDIATION_NOT_ALLOWED = 'mediation-not-allowed' METHOD_NOT_ALLOWED = 'method-not-allowed' MAX_UPLOAD_SIZE_EXCEEDED = 'max_upload_size_exceeded' +PARSING_ERROR = 'parsing-error' + + +class ParserError(ValueError): + """Specific parsing error detected when parsing the xml metadata input + + """ + pass ERRORS = { @@ -53,6 +61,11 @@ 'iri': 'http://purl.org/net/sword/error/ErrorBadRequest', 'tag': 'sword:ErrorBadRequest', }, + PARSING_ERROR: { + 'status': status.HTTP_400_BAD_REQUEST, + 'iri': 'http://purl.org/net/sword/error/ErrorBadRequest', + 'tag': 'sword:ErrorBadRequest', + }, MEDIATION_NOT_ALLOWED: { 'status': status.HTTP_412_PRECONDITION_FAILED, 'iri': 'http://purl.org/net/sword/error/MediationNotAllowed', diff --git a/swh/deposit/parsers.py b/swh/deposit/parsers.py --- a/swh/deposit/parsers.py +++ b/swh/deposit/parsers.py @@ -1,4 +1,4 @@ -# Copyright (C) 2017-2018 The Software Heritage developers +# Copyright (C) 2017-2019 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -14,6 +14,9 @@ from rest_framework.parsers import BaseParser from rest_framework.parsers import FileUploadParser from rest_framework.parsers import MultiPartParser +from xml.parsers.expat import ExpatError + +from swh.deposit.errors import ParserError class SWHFileUploadZipParser(FileUploadParser): @@ -76,8 +79,14 @@ Args: raw_content (bytes): The content to parse + Raises: + ParserError in case of a malformed xml + Returns: content parsed as dict. """ - return SWHXMLParser().parse(raw_content) + try: + return SWHXMLParser().parse(raw_content) + except ExpatError as e: + raise ParserError(str(e)) diff --git a/swh/deposit/tests/api/test_deposit_atom.py b/swh/deposit/tests/api/test_deposit_atom.py --- a/swh/deposit/tests/api/test_deposit_atom.py +++ b/swh/deposit/tests/api/test_deposit_atom.py @@ -297,6 +297,21 @@ self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) def test_post_deposit_atom_without_slug_header_is_bad_request(self): + def test_post_deposit_atom_400_with_parsing_error(self): + """Posting parsing error prone atom should return 400 + + """ + atom_entry_data_parsing_error_prone = b""" + + Composing a Web of Audio Applications + + +""" + response = self.client.post( + reverse(COL_IRI, args=[self.collection.name]), + content_type='application/atom+xml;type=entry', + data=atom_entry_data_parsing_error_prone) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) """Posting an atom entry without a slug header should return a 400 """ diff --git a/swh/deposit/tests/api/test_deposit_multipart.py b/swh/deposit/tests/api/test_deposit_multipart.py --- a/swh/deposit/tests/api/test_deposit_multipart.py +++ b/swh/deposit/tests/api/test_deposit_multipart.py @@ -400,3 +400,49 @@ 'application/x-tar) and 1 atom+xml entry for ' 'multipart deposit' in response.content.decode('utf-8') ) + + def test_post_deposit_multipart_400_when_badly_formatted_xml(self): + # given + url = reverse(COL_IRI, args=[self.collection.name]) + + data_atom_entry_ko = b""" + + + urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a + +""" + + archive_content = b'some content representing archive' + archive = InMemoryUploadedFile( + BytesIO(archive_content), + field_name='archive0', + name='archive0', + content_type='application/zip', + size=len(archive_content), + charset=None) + + atom_entry = InMemoryUploadedFile( + BytesIO(data_atom_entry_ko), + field_name='atom0', + name='atom0', + content_type='application/atom+xml; charset="utf-8"', + size=len(data_atom_entry_ko), + charset='utf-8') + + # when + response = self.client.post( + url, + format='multipart', + data={ + 'archive': archive, + 'atom_entry': atom_entry, + }, + # + headers + HTTP_IN_PROGRESS='false', + HTTP_SLUG='external-id', + ) + + self.assertIn(b'Malformed xml metadata', response.content) + self.assertEqual(response.status_code, + status.HTTP_400_BAD_REQUEST)