diff --git a/swh/deposit/api/common.py b/swh/deposit/api/common.py
--- a/swh/deposit/api/common.py
+++ b/swh/deposit/api/common.py
@@ -27,7 +27,8 @@
MAX_UPLOAD_SIZE_EXCEEDED, BAD_REQUEST, ERROR_CONTENT,
CHECKSUM_MISMATCH, make_error_dict, MEDIATION_NOT_ALLOWED,
make_error_response_from_dict, FORBIDDEN,
- NOT_FOUND, make_error_response, METHOD_NOT_ALLOWED
+ NOT_FOUND, make_error_response, METHOD_NOT_ALLOWED,
+ ParserError, PARSING_ERROR
)
from ..models import (
Deposit, DepositRequest, DepositCollection,
@@ -502,8 +503,15 @@
if precondition_status_response:
return precondition_status_response
- raw_metadata, metadata = self._read_metadata(
- data['application/atom+xml'])
+ try:
+ raw_metadata, metadata = self._read_metadata(
+ data['application/atom+xml'])
+ except ParserError:
+ return make_error_dict(
+ PARSING_ERROR,
+ 'Malformed xml metadata',
+ "The xml received is malformed. "
+ "Please ensure your metadata file is correctly formatted.")
# actual storage of data
deposit = self._deposit_put(deposit_id=deposit_id,
@@ -560,7 +568,15 @@
- 415 (unsupported media type) if a wrong media type is provided
"""
- raw_metadata, metadata = self._read_metadata(req.data)
+ try:
+ raw_metadata, metadata = self._read_metadata(req.data)
+ except ParserError:
+ return make_error_dict(
+ BAD_REQUEST,
+ 'Malformed xml metadata',
+ "The xml received is malformed. "
+ "Please ensure your metadata file is correctly formatted.")
+
if not metadata:
return make_error_dict(
BAD_REQUEST,
diff --git a/swh/deposit/errors.py b/swh/deposit/errors.py
--- a/swh/deposit/errors.py
+++ b/swh/deposit/errors.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2017 The Software Heritage developers
+# Copyright (C) 2017-2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -20,6 +20,14 @@
MEDIATION_NOT_ALLOWED = 'mediation-not-allowed'
METHOD_NOT_ALLOWED = 'method-not-allowed'
MAX_UPLOAD_SIZE_EXCEEDED = 'max_upload_size_exceeded'
+PARSING_ERROR = 'parsing-error'
+
+
+class ParserError(ValueError):
+ """Specific parsing error detected when parsing the xml metadata input
+
+ """
+ pass
ERRORS = {
@@ -53,6 +61,11 @@
'iri': 'http://purl.org/net/sword/error/ErrorBadRequest',
'tag': 'sword:ErrorBadRequest',
},
+ PARSING_ERROR: {
+ 'status': status.HTTP_400_BAD_REQUEST,
+ 'iri': 'http://purl.org/net/sword/error/ErrorBadRequest',
+ 'tag': 'sword:ErrorBadRequest',
+ },
MEDIATION_NOT_ALLOWED: {
'status': status.HTTP_412_PRECONDITION_FAILED,
'iri': 'http://purl.org/net/sword/error/MediationNotAllowed',
diff --git a/swh/deposit/parsers.py b/swh/deposit/parsers.py
--- a/swh/deposit/parsers.py
+++ b/swh/deposit/parsers.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2017-2018 The Software Heritage developers
+# Copyright (C) 2017-2019 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -14,6 +14,9 @@
from rest_framework.parsers import BaseParser
from rest_framework.parsers import FileUploadParser
from rest_framework.parsers import MultiPartParser
+from xml.parsers.expat import ExpatError
+
+from swh.deposit.errors import ParserError
class SWHFileUploadZipParser(FileUploadParser):
@@ -76,8 +79,14 @@
Args:
raw_content (bytes): The content to parse
+ Raises:
+ ParserError in case of a malformed xml
+
Returns:
content parsed as dict.
"""
- return SWHXMLParser().parse(raw_content)
+ try:
+ return SWHXMLParser().parse(raw_content)
+ except ExpatError as e:
+ raise ParserError(str(e))
diff --git a/swh/deposit/tests/api/test_deposit_atom.py b/swh/deposit/tests/api/test_deposit_atom.py
--- a/swh/deposit/tests/api/test_deposit_atom.py
+++ b/swh/deposit/tests/api/test_deposit_atom.py
@@ -74,139 +74,6 @@
all
"""
- self.atom_entry_data2 = b"""
-
- %s
-"""
-
- self.atom_entry_data_empty_body = b"""
-"""
-
- self.atom_entry_data3 = b"""
-
- something
-"""
-
- self.atom_entry_data_atom_only = b"""
-
- Awesome Compiler
- urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a
- 1785io25c695
- 2017-10-07T15:17:08Z
- some awesome author
- """
-
- self.atom_entry_data_codemeta = b"""
-
- Awesome Compiler
- urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a
- 1785io25c695
- 1785io25c695
- origin url
- other identifier, DOI, ARK
- Domain
-
- description
- key-word 1
- key-word 2
- creation date
- publication date
- comment
-
- article name
- article id
-
-
- Collaboration/Projet
- project name
- id
-
- see also
- Sponsor A
- Sponsor B
- Platform/OS
- dependencies
- Version
- active
-
- license
- url spdx
-
- .Net Framework 3.0
- Python2.3
-
- author1
- Inria
- UPMC
-
-
- author2
- Inria
- UPMC
-
- http://code.com
- language 1
- language 2
- http://issuetracker.com
- """ # noqa
-
- self.atom_entry_data_dc_codemeta = b"""
-
-
-
- %s
- hal-01587361
- https://hal.inria.fr/hal-01587361
- https://hal.inria.fr/hal-01587361/document
- https://hal.inria.fr/hal-01587361/file/AffectationRO-v1.0.0.zip
- doi:10.5281/zenodo.438684
- The assignment problem
- AffectationRO
- Gruenpeter, Morane
- [INFO] Computer Science [cs]
- [INFO.INFO-RO] Computer Science [cs]/Operations Research [cs.RO]
- SOFTWARE
- Project in OR: The assignment problemA java implementation for the assignment problem first release
- description fr
- 2015-06-01
- 2017-10-19
- en
-
-
- url stable
- Version sur hal
- Version entre par lutilisateur
- Mots-cls
- Commentaire
- Rfrence interne
-
- Collaboration/Projet
- nom du projet
- id
-
- Voir aussi
- Financement
- Projet ANR
- Projet Europen
- Platform/OS
- Dpendances
- Etat du dveloppement
-
- license
- url spdx
-
- Outils de dveloppement- outil no1
- Outils de dveloppement- outil no2
- http://code.com
- language 1
- language 2
- """ # noqa
-
- self.atom_entry_tei = b"""HAL TEI export of hal-01587083CCSDDistributed under a Creative Commons Attribution 4.0 International LicenseHAL API platform
questionnaire software metadataMoraneGruenpeter7de56c632362954fa84172cad80afe4einria.fr1556733MoraneGruenpeterf85a43a5fb4a2e0778a77e017f28c8fdgmail.com2017-09-29 11:21:322017-10-03 17:20:132017-10-03 17:20:132017-09-292017-09-29contributorMoraneGruenpeterf85a43a5fb4a2e0778a77e017f28c8fdgmail.comCCSDhal-01587083https://hal.inria.fr/hal-01587083gruenpeter:hal-0158708320172017questionnaire software metadataMoraneGruenpeter7de56c632362954fa84172cad80afe4einria.fr1556733EnglishComputer Science [cs]SoftwareIRILLInitiative pour la Recherche et l'Innovation sur le Logiciel Libre[https://www.irill.org/]Universite Pierre et Marie Curie - Paris 6UPMC4 place Jussieu - 75005 Paris[http://www.upmc.fr/]Institut National de Recherche en Informatique et en AutomatiqueInriaDomaine de VoluceauRocquencourt - BP 10578153 Le Chesnay Cedex[http://www.inria.fr/en/]Universite Paris Diderot - Paris 7UPD75 rue Thomas-Mann - 75205 Paris cedex 13[http://www.univ-paris-diderot.fr]""" # noqa
-
self.atom_entry_data_badly_formatted = b"""
"""
@@ -250,7 +117,7 @@
""" # noqa
- def test_post_deposit_atom_entry_serialization_error(self):
+ def test_post_deposit_atom_201_even_with_decimal(self):
"""Posting an initial atom entry should return 201 with deposit receipt
"""
@@ -276,17 +143,20 @@
sw_version = dr.metadata.get('codemeta:softwareVersion')
self.assertEqual(sw_version, '10.4')
- def test_post_deposit_atom_empty_body_request(self):
+ def test_post_deposit_atom_400_with_empty_body(self):
"""Posting empty body request should return a 400 response
"""
+ atom_entry_data_empty_body = b"""
+"""
+
response = self.client.post(
reverse(COL_IRI, args=[self.collection.name]),
content_type='application/atom+xml;type=entry',
- data=self.atom_entry_data_empty_body)
+ data=atom_entry_data_empty_body)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
- def test_post_deposit_atom_badly_formatted_is_a_bad_request(self):
+ def test_post_deposit_atom_400_badly_formatted_atom(self):
"""Posting a badly formatted atom should return a 400 response
"""
@@ -296,7 +166,23 @@
data=self.atom_entry_data_badly_formatted)
self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
- def test_post_deposit_atom_without_slug_header_is_bad_request(self):
+ def test_post_deposit_atom_400_with_parsing_error(self):
+ """Posting parsing error prone atom should return 400
+
+ """
+ atom_entry_data_parsing_error_prone = b"""
+
+ Composing a Web of Audio Applications
+
+
+"""
+ response = self.client.post(
+ reverse(COL_IRI, args=[self.collection.name]),
+ content_type='application/atom+xml;type=entry',
+ data=atom_entry_data_parsing_error_prone)
+ self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST)
+
+ def test_post_deposit_atom_400_without_slug_header(self):
"""Posting an atom entry without a slug header should return a 400
"""
@@ -314,14 +200,19 @@
self.assertEqual(response.status_code,
status.HTTP_400_BAD_REQUEST)
- def test_post_deposit_atom_unknown_collection(self):
+ def test_post_deposit_atom_404_unknown_collection(self):
"""Posting an atom entry to an unknown collection should return a 404
"""
+ atom_entry_data3 = b"""
+
+ something
+"""
+
response = self.client.post(
reverse(COL_IRI, args=['unknown-one']),
content_type='application/atom+xml;type=entry',
- data=self.atom_entry_data3,
+ data=atom_entry_data3,
HTTP_SLUG='something')
self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND)
@@ -374,8 +265,59 @@
with self.assertRaises(Deposit.DoesNotExist):
Deposit.objects.get(external_id=external_id)
- atom_entry_data = self.atom_entry_data_dc_codemeta % (
- external_id.encode('utf-8'), )
+ atom_entry_data = b"""
+
+
+
+ %s
+ hal-01587361
+ https://hal.inria.fr/hal-01587361
+ https://hal.inria.fr/hal-01587361/document
+ https://hal.inria.fr/hal-01587361/file/AffectationRO-v1.0.0.zip
+ doi:10.5281/zenodo.438684
+ The assignment problem
+ AffectationRO
+ Gruenpeter, Morane
+ [INFO] Computer Science [cs]
+ [INFO.INFO-RO] Computer Science [cs]/Operations Research [cs.RO]
+ SOFTWARE
+ Project in OR: The assignment problemA java implementation for the assignment problem first release
+ description fr
+ 2015-06-01
+ 2017-10-19
+ en
+
+
+ url stable
+ Version sur hal
+ Version entre par lutilisateur
+ Mots-cls
+ Commentaire
+ Rfrence interne
+
+ Collaboration/Projet
+ nom du projet
+ id
+
+ Voir aussi
+ Financement
+ Projet ANR
+ Projet Europen
+ Platform/OS
+ Dpendances
+ Etat du dveloppement
+
+ license
+ url spdx
+
+ Outils de dveloppement- outil no1
+ Outils de dveloppement- outil no2
+ http://code.com
+ language 1
+ language 2
+ """ % external_id.encode('utf-8') # noqa
# when
response = self.client.post(
@@ -415,7 +357,7 @@
with self.assertRaises(Deposit.DoesNotExist):
Deposit.objects.get(external_id=external_id)
- atom_entry_data = self.atom_entry_tei
+ atom_entry_data = b"""HAL TEI export of hal-01587083CCSDDistributed under a Creative Commons Attribution 4.0 International LicenseHAL API platform
questionnaire software metadataMoraneGruenpeter7de56c632362954fa84172cad80afe4einria.fr1556733MoraneGruenpeterf85a43a5fb4a2e0778a77e017f28c8fdgmail.com2017-09-29 11:21:322017-10-03 17:20:132017-10-03 17:20:132017-09-292017-09-29contributorMoraneGruenpeterf85a43a5fb4a2e0778a77e017f28c8fdgmail.comCCSDhal-01587083https://hal.inria.fr/hal-01587083gruenpeter:hal-0158708320172017questionnaire software metadataMoraneGruenpeter7de56c632362954fa84172cad80afe4einria.fr1556733EnglishComputer Science [cs]SoftwareIRILLInitiative pour la Recherche et l'Innovation sur le Logiciel Libre[https://www.irill.org/]Universite Pierre et Marie Curie - Paris 6UPMC4 place Jussieu - 75005 Paris[http://www.upmc.fr/]Institut National de Recherche en Informatique et en AutomatiqueInriaDomaine de VoluceauRocquencourt - BP 10578153 Le Chesnay Cedex[http://www.inria.fr/en/]Universite Paris Diderot - Paris 7UPD75 rue Thomas-Mann - 75205 Paris cedex 13[http://www.univ-paris-diderot.fr]""" # noqa
# when
response = self.client.post(
@@ -478,7 +420,10 @@
deposit_requests = DepositRequest.objects.filter(deposit=deposit)
self.assertEqual(len(deposit_requests), 1)
- atom_entry_data = self.atom_entry_data2 % external_id.encode('utf-8')
+ atom_entry_data = b"""
+
+ %s
+""" % external_id.encode('utf-8')
update_uri = response._headers['location'][1]
diff --git a/swh/deposit/tests/api/test_deposit_multipart.py b/swh/deposit/tests/api/test_deposit_multipart.py
--- a/swh/deposit/tests/api/test_deposit_multipart.py
+++ b/swh/deposit/tests/api/test_deposit_multipart.py
@@ -400,3 +400,49 @@
'application/x-tar) and 1 atom+xml entry for '
'multipart deposit' in response.content.decode('utf-8')
)
+
+ def test_post_deposit_multipart_400_when_badly_formatted_xml(self):
+ # given
+ url = reverse(COL_IRI, args=[self.collection.name])
+
+ data_atom_entry_ko = b"""
+
+
+ urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a
+
+"""
+
+ archive_content = b'some content representing archive'
+ archive = InMemoryUploadedFile(
+ BytesIO(archive_content),
+ field_name='archive0',
+ name='archive0',
+ content_type='application/zip',
+ size=len(archive_content),
+ charset=None)
+
+ atom_entry = InMemoryUploadedFile(
+ BytesIO(data_atom_entry_ko),
+ field_name='atom0',
+ name='atom0',
+ content_type='application/atom+xml; charset="utf-8"',
+ size=len(data_atom_entry_ko),
+ charset='utf-8')
+
+ # when
+ response = self.client.post(
+ url,
+ format='multipart',
+ data={
+ 'archive': archive,
+ 'atom_entry': atom_entry,
+ },
+ # + headers
+ HTTP_IN_PROGRESS='false',
+ HTTP_SLUG='external-id',
+ )
+
+ self.assertIn(b'Malformed xml metadata', response.content)
+ self.assertEqual(response.status_code,
+ status.HTTP_400_BAD_REQUEST)