diff --git a/swh/deposit/api/common.py b/swh/deposit/api/common.py --- a/swh/deposit/api/common.py +++ b/swh/deposit/api/common.py @@ -17,16 +17,22 @@ from swh.model import hashutil -from ..config import SWHDefaultConfig, EDIT_SE_IRI, EM_IRI, CONT_FILE_IRI -from ..config import ARCHIVE_KEY, METADATA_KEY, STATE_IRI -from ..config import DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_PARTIAL -from ..config import DEPOSIT_STATUS_LOAD_SUCCESS -from ..errors import MAX_UPLOAD_SIZE_EXCEEDED, BAD_REQUEST, ERROR_CONTENT -from ..errors import CHECKSUM_MISMATCH, make_error_dict, MEDIATION_NOT_ALLOWED -from ..errors import make_error_response_from_dict, FORBIDDEN -from ..errors import NOT_FOUND, make_error_response, METHOD_NOT_ALLOWED -from ..models import Deposit, DepositRequest, DepositCollection -from ..models import DepositRequestType, DepositClient +from ..config import ( + SWHDefaultConfig, EDIT_SE_IRI, EM_IRI, CONT_FILE_IRI, + ARCHIVE_KEY, METADATA_KEY, RAW_METADATA_KEY, STATE_IRI, + DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_PARTIAL, + DEPOSIT_STATUS_LOAD_SUCCESS +) +from ..errors import ( + MAX_UPLOAD_SIZE_EXCEEDED, BAD_REQUEST, ERROR_CONTENT, + CHECKSUM_MISMATCH, make_error_dict, MEDIATION_NOT_ALLOWED, + make_error_response_from_dict, FORBIDDEN, + NOT_FOUND, make_error_response, METHOD_NOT_ALLOWED +) +from ..models import ( + Deposit, DepositRequest, DepositCollection, DepositRequestType, + DepositClient +) from ..parsers import parse_xml @@ -218,10 +224,12 @@ metadata = deposit_request_data.get(METADATA_KEY) if metadata: + raw_metadata = deposit_request_data.get(RAW_METADATA_KEY) deposit_request = DepositRequest( type=self.deposit_request_types[METADATA_KEY], deposit=deposit, - metadata=metadata) + metadata=metadata, + raw_metadata=raw_metadata) deposit_request.save() assert deposit_request is not None @@ -405,6 +413,15 @@ 'archive': filehandler.name, } + def _read_metadata(self, metadata_stream): + """Given a metadata stream, reads the metadata and returns both the + parsed and the raw metadata. + + """ + raw_metadata = metadata_stream.read() + metadata = parse_xml(raw_metadata) + return raw_metadata, metadata + def _multipart_upload(self, req, headers, collection_name, deposit_id=None, replace_metadata=False, replace_archives=False): @@ -491,14 +508,17 @@ if precondition_status_response: return precondition_status_response + raw_metadata, metadata = self._read_metadata( + data['application/atom+xml']) + # actual storage of data - atom_metadata = parse_xml(data['application/atom+xml']) deposit = self._deposit_put(deposit_id=deposit_id, in_progress=headers['in-progress'], external_id=external_id) deposit_request_data = { ARCHIVE_KEY: filehandler, - METADATA_KEY: atom_metadata, + METADATA_KEY: metadata, + RAW_METADATA_KEY: raw_metadata, } self._deposit_request_put( deposit, deposit_request_data, replace_metadata, replace_archives) @@ -546,20 +566,22 @@ - 415 (unsupported media type) if a wrong media type is provided """ - if not req.data: + raw_metadata, metadata = self._read_metadata(req.data) + if not metadata: return make_error_dict( BAD_REQUEST, 'Empty body request is not supported', 'Atom entry deposit is supposed to send for metadata. ' 'If the body is empty, there is no metadata.') - external_id = req.data.get('external_identifier', headers['slug']) + external_id = metadata.get('external_identifier', headers['slug']) deposit = self._deposit_put(deposit_id=deposit_id, in_progress=headers['in-progress'], external_id=external_id) + self._deposit_request_put( - deposit, {METADATA_KEY: req.data}, + deposit, {METADATA_KEY: metadata, RAW_METADATA_KEY: raw_metadata}, replace_metadata, replace_archives) return { diff --git a/swh/deposit/config.py b/swh/deposit/config.py --- a/swh/deposit/config.py +++ b/swh/deposit/config.py @@ -23,6 +23,7 @@ ARCHIVE_KEY = 'archive' METADATA_KEY = 'metadata' +RAW_METADATA_KEY = 'raw-metadata' ARCHIVE_TYPE = 'archive' METADATA_TYPE = 'metadata' diff --git a/swh/deposit/migrations/0013_depositrequest_raw_metadata.py b/swh/deposit/migrations/0013_depositrequest_raw_metadata.py new file mode 100644 --- /dev/null +++ b/swh/deposit/migrations/0013_depositrequest_raw_metadata.py @@ -0,0 +1,20 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.14 on 2018-07-19 13:07 +from __future__ import unicode_literals + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('deposit', '0012_deposit_status_detail'), + ] + + operations = [ + migrations.AddField( + model_name='depositrequest', + name='raw_metadata', + field=models.TextField(null=True), + ), + ] diff --git a/swh/deposit/models.py b/swh/deposit/models.py --- a/swh/deposit/models.py +++ b/swh/deposit/models.py @@ -178,6 +178,7 @@ # Deposit request information on the data to inject # this can be null when type is 'archive' metadata = JSONField(null=True) + raw_metadata = models.TextField(null=True) # this can be null when type is 'metadata' archive = models.FileField(null=True, upload_to=client_directory_path) diff --git a/swh/deposit/parsers.py b/swh/deposit/parsers.py --- a/swh/deposit/parsers.py +++ b/swh/deposit/parsers.py @@ -55,6 +55,13 @@ """ media_type = 'application/atom+xml;type=entry' + def parse(self, stream, media_type=None, parser_context=None): + # We do not actually want to parse the stream yet + # because we want to keep the raw data as well + # this is done later in the atom entry call + # (cf. swh.deposit.api.common.SWHBaseDeposit._atom_entry) + return stream + class SWHMultiPartParser(MultiPartParser): """Multipart parser limited to a subset of mediatypes. diff --git a/swh/deposit/tests/api/test_deposit_atom.py b/swh/deposit/tests/api/test_deposit_atom.py --- a/swh/deposit/tests/api/test_deposit_atom.py +++ b/swh/deposit/tests/api/test_deposit_atom.py @@ -367,6 +367,8 @@ # one associated request to a deposit deposit_request = DepositRequest.objects.get(deposit=deposit) self.assertIsNotNone(deposit_request.metadata) + self.assertEquals( + deposit_request.raw_metadata, atom_entry_data.decode('utf-8')) self.assertFalse(bool(deposit_request.archive)) @istest @@ -407,6 +409,8 @@ # one associated request to a deposit deposit_request = DepositRequest.objects.get(deposit=deposit) self.assertIsNotNone(deposit_request.metadata) + self.assertEquals( + deposit_request.raw_metadata, atom_entry_data.decode('utf-8')) self.assertFalse(bool(deposit_request.archive)) @@ -445,6 +449,8 @@ # one associated request to a deposit deposit_request = DepositRequest.objects.get(deposit=deposit) self.assertIsNotNone(deposit_request.metadata) + self.assertEquals( + deposit_request.raw_metadata, atom_entry_data.decode('utf-8')) self.assertFalse(bool(deposit_request.archive)) @istest @@ -508,10 +514,25 @@ self.assertEqual(len(Deposit.objects.all()), 1) # now 2 associated requests to a same deposit - deposit_requests = DepositRequest.objects.filter(deposit=deposit) + deposit_requests = DepositRequest.objects.filter( + deposit=deposit).order_by('id') self.assertEqual(len(deposit_requests), 2) - for deposit_request in deposit_requests: + expected_meta = [ + { + 'metadata': parse_xml(self.atom_entry_data1), + 'raw_metadata': self.atom_entry_data1.decode('utf-8'), + }, + { + 'metadata': parse_xml(atom_entry_data), + 'raw_metadata': atom_entry_data.decode('utf-8'), + } + ] + + for i, deposit_request in enumerate(deposit_requests): actual_metadata = deposit_request.metadata - self.assertIsNotNone(actual_metadata) + self.assertEquals(actual_metadata, + expected_meta[i]['metadata']) + self.assertEquals(deposit_request.raw_metadata, + expected_meta[i]['raw_metadata']) self.assertFalse(bool(deposit_request.archive)) diff --git a/swh/deposit/tests/api/test_deposit_binary.py b/swh/deposit/tests/api/test_deposit_binary.py --- a/swh/deposit/tests/api/test_deposit_binary.py +++ b/swh/deposit/tests/api/test_deposit_binary.py @@ -16,8 +16,10 @@ from swh.deposit.config import DEPOSIT_STATUS_DEPOSITED from swh.deposit.models import Deposit, DepositRequest from swh.deposit.parsers import parse_xml -from ..common import BasicTestCase, WithAuthTestCase, create_arborescence_archive -from ..common import FileSystemCreationRoutine +from ..common import ( + BasicTestCase, WithAuthTestCase, create_arborescence_archive, + FileSystemCreationRoutine +) class DepositTestCase(APITestCase, WithAuthTestCase, BasicTestCase, @@ -186,6 +188,8 @@ deposit_request = DepositRequest.objects.get(deposit=deposit) self.assertEquals(deposit_request.deposit, deposit) self.assertRegex(deposit_request.archive.name, self.archive['name']) + self.assertIsNone(deposit_request.metadata) + self.assertIsNone(deposit_request.raw_metadata) response_content = parse_xml(BytesIO(response.content)) self.assertEqual(response_content['deposit_archive'], diff --git a/swh/deposit/tests/api/test_deposit_multipart.py b/swh/deposit/tests/api/test_deposit_multipart.py --- a/swh/deposit/tests/api/test_deposit_multipart.py +++ b/swh/deposit/tests/api/test_deposit_multipart.py @@ -163,10 +163,14 @@ if deposit_request.type.name == 'archive': self.assertRegex(deposit_request.archive.name, self.archive['name']) + self.assertIsNone(deposit_request.metadata) + self.assertIsNone(deposit_request.raw_metadata) else: self.assertEquals( deposit_request.metadata['id'], 'urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a') + self.assertEquals(deposit_request.raw_metadata, + data_atom_entry.decode('utf-8')) @istest def post_deposit_multipart_tar(self): @@ -229,10 +233,14 @@ if deposit_request.type.name == 'archive': self.assertRegex(deposit_request.archive.name, self.archive['name']) + self.assertIsNone(deposit_request.metadata) + self.assertIsNone(deposit_request.raw_metadata) else: self.assertEquals( deposit_request.metadata['id'], 'urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a') + self.assertEquals(deposit_request.raw_metadata, + data_atom_entry.decode('utf-8')) @istest def post_deposit_multipart_put_to_replace_metadata(self): @@ -300,6 +308,8 @@ self.assertEquals( deposit_request.metadata['id'], 'urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a') + self.assertEquals(deposit_request.raw_metadata, + data_atom_entry.decode('utf-8')) replace_metadata_uri = response._headers['location'][1] response = self.client.put( @@ -329,6 +339,9 @@ self.assertEquals( deposit_request.metadata['id'], 'urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa7b') + self.assertEquals( + deposit_request.raw_metadata, + self.data_atom_entry_update_in_place) # FAILURE scenarios