diff --git a/swh/deposit/api/private/__init__.py b/swh/deposit/api/private/__init__.py
--- a/swh/deposit/api/private/__init__.py
+++ b/swh/deposit/api/private/__init__.py
@@ -3,6 +3,8 @@
 # License: GNU General Public License version 3, or any later version
 # See top-level LICENSE file for more information
 
+from typing import Any, Dict, List, Tuple
+
 from rest_framework.permissions import AllowAny
 
 from swh.deposit import utils
@@ -39,22 +41,27 @@
         for deposit_request in deposit_requests:
             yield deposit_request
 
-    def _metadata_get(self, deposit):
-        """Given a deposit, aggregate all metadata requests.
+    def _metadata_get(self, deposit: Deposit) -> Tuple[Dict[str, Any], List[str]]:
+        """Given a deposit, retrieve all metadata requests into one Dict and returns both that
+           aggregated metadata dict and the list of raw_metdadata.
 
         Args:
-            deposit (Deposit): The deposit instance to extract
-            metadata from.
+            deposit: The deposit instance to extract metadata from
 
         Returns:
-            metadata dict from the deposit.
+            Tuple of aggregated metadata dict, list of raw_metadata
 
         """
-        metadata = (
-            m.metadata
-            for m in self._deposit_requests(deposit, request_type=METADATA_TYPE)
-        )
-        return utils.merge(*metadata)
+        metadata: List[Dict[str, Any]] = []
+        raw_metadata: List[str] = []
+        for deposit_request in self._deposit_requests(
+            deposit, request_type=METADATA_TYPE
+        ):
+            metadata.append(deposit_request.metadata)
+            raw_metadata.append(deposit_request.raw_metadata)
+
+        aggregated_metadata = utils.merge(*metadata)
+        return (aggregated_metadata, raw_metadata)
 
 
 class APIPrivateView(APIConfig, AuthenticatedAPIView):
diff --git a/swh/deposit/api/private/deposit_check.py b/swh/deposit/api/private/deposit_check.py
--- a/swh/deposit/api/private/deposit_check.py
+++ b/swh/deposit/api/private/deposit_check.py
@@ -11,6 +11,7 @@
 import zipfile
 
 from rest_framework import status
+from rest_framework.request import Request
 
 from swh.scheduler.utils import create_oneshot_task_dict
 
@@ -130,22 +131,22 @@
         return True, None
 
     def process_get(
-        self, req, collection_name: str, deposit_id: int
+        self, req: Request, collection_name: str, deposit_id: int
     ) -> Tuple[int, Dict, str]:
         """Build a unique tarball from the multiple received and stream that
            content to the client.
 
         Args:
-            req (Request):
-            collection_name (str): Collection owning the deposit
-            deposit_id (id): Deposit concerned by the reading
+            req: Client request
+            collection_name: Collection owning the deposit
+            deposit_id: Deposit concerned by the reading
 
         Returns:
             Tuple status, stream of content, content-type
 
         """
         deposit = Deposit.objects.get(pk=deposit_id)
-        metadata = self._metadata_get(deposit)
+        metadata, _ = self._metadata_get(deposit)
         problems: Dict = {}
         # will check each deposit's associated request (both of type
         # archive and metadata) for errors
diff --git a/swh/deposit/api/private/deposit_read.py b/swh/deposit/api/private/deposit_read.py
--- a/swh/deposit/api/private/deposit_read.py
+++ b/swh/deposit/api/private/deposit_read.py
@@ -131,26 +131,44 @@
             commit_date = deposit.complete_date
         return (normalize_date(author_date), normalize_date(commit_date))
 
-    def metadata_read(self, deposit):
-        """Read and aggregate multiple data on deposit into one unified data
-           dictionary.
+    def metadata_read(self, deposit: Deposit) -> Dict[str, Any]:
+        """Read and aggregate multiple deposit information into one unified dictionary.
 
         Args:
-            deposit (Deposit): Deposit concerned by the data aggregation.
+            deposit: Deposit concerned by the data aggregation.
 
         Returns:
-            Dictionary of data representing the deposit to inject in swh.
+            Dictionary of deposit information read by the deposit loader, with the
+            following keys:
+
+                **origin** (Dict): Information about the origin
+
+                **origin_metadata (Dict): Metadata about the origin to load
+
+                    **metadata_raw** (List[str]): List of raw metadata received for the
+                      deposit
+
+                    **metadata_dict** (Dict): Deposit aggregated metadata into one dict
+
+                    **provider** (Dict): the metadata provider information about the
+                      deposit client
+
+                    **tool** (Dict): the deposit information
+
+                **deposit** (Dict): deposit information relevant to build the revision
+                  (author_date, committer_date, etc...)
 
         """
-        metadata = self._metadata_get(deposit)
+        metadata, raw_metadata = self._metadata_get(deposit)
         # Read information metadata
         data = {"origin": {"type": "deposit", "url": deposit.origin_url,}}
 
         author_date, commit_date = self._normalize_dates(deposit, metadata)
 
         if deposit.parent:
-            swh_persistent_id = deposit.parent.swhid
-            swhid = identifiers.parse_swhid(swh_persistent_id)
+            parent_swhid = deposit.parent.swhid
+            assert parent_swhid is not None
+            swhid = identifiers.parse_swhid(parent_swhid)
             parent_revision = swhid.object_id
             parents = [parent_revision]
         else:
@@ -165,7 +183,8 @@
                 "metadata": {},
             },
             "tool": self.tool,
-            "metadata": metadata,
+            "metadata_raw": raw_metadata,
+            "metadata_dict": metadata,
         }
         data["deposit"] = {
             "id": deposit.id,
diff --git a/swh/deposit/tests/api/test_deposit_private_read_metadata.py b/swh/deposit/tests/api/test_deposit_private_read_metadata.py
--- a/swh/deposit/tests/api/test_deposit_private_read_metadata.py
+++ b/swh/deposit/tests/api/test_deposit_private_read_metadata.py
@@ -6,9 +6,10 @@
 from django.urls import reverse
 from rest_framework import status
 
-from swh.deposit import __version__
+from swh.deposit import __version__, utils
 from swh.deposit.config import EDIT_SE_IRI, PRIVATE_GET_DEPOSIT_METADATA, SWH_PERSON
 from swh.deposit.models import Deposit
+from swh.deposit.parsers import parse_xml
 
 PRIVATE_GET_DEPOSIT_METADATA_NC = PRIVATE_GET_DEPOSIT_METADATA + "-nc"
 
@@ -22,14 +23,6 @@
     ]
 
 
-def update_deposit(authenticated_client, collection, deposit, atom_dataset):
-    for atom_data in ["entry-data2", "entry-data3"]:
-        update_deposit_with_metadata(
-            authenticated_client, collection, deposit, atom_dataset[atom_data]
-        )
-    return deposit
-
-
 def update_deposit_with_metadata(authenticated_client, collection, deposit, metadata):
     # update deposit's metadata
     response = authenticated_client.post(
@@ -52,28 +45,29 @@
     deposit = partial_deposit
     deposit.external_id = "some-external-id"
     deposit.save()
-    deposit = update_deposit(
-        authenticated_client, deposit_collection, deposit, atom_dataset
-    )
+
+    metadata_xml_atoms = [
+        atom_dataset[atom_key] for atom_key in ["entry-data2", "entry-data3"]
+    ]
+    metadata_xml_raws = [parse_xml(xml) for xml in metadata_xml_atoms]
+    for atom_xml in metadata_xml_atoms:
+        deposit = update_deposit_with_metadata(
+            authenticated_client, deposit_collection, deposit, atom_xml,
+        )
 
     for url in private_get_raw_url_endpoints(deposit_collection, deposit):
         response = authenticated_client.get(url)
         assert response.status_code == status.HTTP_200_OK
         assert response._headers["content-type"][1] == "application/json"
         data = response.json()
-
-        expected_meta = {
+        assert data == {
             "origin": {
                 "type": "deposit",
                 "url": "https://hal-test.archives-ouvertes.fr/some-external-id",
             },
             "origin_metadata": {
-                "metadata": {
-                    "author": ["some awesome author", "another one", "no one"],
-                    "codemeta:dateCreated": "2017-10-07T15:17:08Z",
-                    "external_identifier": "some-external-id",
-                    "url": "https://hal-test.archives-ouvertes.fr/some-external-id",  # noqa
-                },
+                "metadata_raw": metadata_xml_atoms,
+                "metadata_dict": utils.merge(*metadata_xml_raws),
                 "provider": {
                     "metadata": {},
                     "provider_name": "",
@@ -106,8 +100,6 @@
             },
         }
 
-        assert data == expected_meta
-
 
 def test_read_metadata_revision_with_parent(
     authenticated_client, deposit_collection, partial_deposit, atom_dataset
@@ -118,9 +110,15 @@
     deposit = partial_deposit
     deposit.external_id = "some-external-id"
     deposit.save()
-    deposit = update_deposit(
-        authenticated_client, deposit_collection, deposit, atom_dataset
-    )
+    metadata_xml_atoms = [
+        atom_dataset[atom_key] for atom_key in ["entry-data2", "entry-data3"]
+    ]
+    metadata_xml_raws = [parse_xml(xml) for xml in metadata_xml_atoms]
+    for atom_xml in metadata_xml_atoms:
+        deposit = update_deposit_with_metadata(
+            authenticated_client, deposit_collection, deposit, atom_xml,
+        )
+
     rev_id = "da78a9d4cf1d5d29873693fd496142e3a18c20fa"
     swhid = "swh:1:rev:%s" % rev_id
     fake_parent = Deposit(
@@ -136,19 +134,14 @@
         assert response.status_code == status.HTTP_200_OK
         assert response._headers["content-type"][1] == "application/json"
         data = response.json()
-
-        expected_meta = {
+        assert data == {
             "origin": {
                 "type": "deposit",
                 "url": "https://hal-test.archives-ouvertes.fr/some-external-id",
             },
             "origin_metadata": {
-                "metadata": {
-                    "author": ["some awesome author", "another one", "no one"],
-                    "codemeta:dateCreated": "2017-10-07T15:17:08Z",
-                    "external_identifier": "some-external-id",
-                    "url": "https://hal-test.archives-ouvertes.fr/some-external-id",  # noqa
-                },
+                "metadata_raw": metadata_xml_atoms,
+                "metadata_dict": utils.merge(*metadata_xml_raws),
                 "provider": {
                     "metadata": {},
                     "provider_name": "",
@@ -181,8 +174,6 @@
             },
         }
 
-        assert data == expected_meta
-
 
 def test_read_metadata_3(
     authenticated_client, deposit_collection, partial_deposit, atom_dataset
@@ -193,9 +184,7 @@
     deposit = partial_deposit
     deposit.external_id = "hal-01243065"
     deposit.save()
-    deposit = update_deposit(
-        authenticated_client, deposit_collection, deposit, atom_dataset
-    )
+
     # add metadata to the deposit with datePublished and dateCreated
     codemeta_entry_data = (
         atom_dataset["metadata"]
@@ -204,9 +193,16 @@
   <codemeta:datePublished>2017-05-03T16:08:47+02:00</codemeta:datePublished>
 """
     )
-    update_deposit_with_metadata(
-        authenticated_client, deposit_collection, deposit, codemeta_entry_data
-    )
+    metadata_xml_atoms = [
+        atom_dataset["entry-data2"],
+        atom_dataset["entry-data3"],
+        codemeta_entry_data,
+    ]
+    metadata_xml_raws = [parse_xml(xml) for xml in metadata_xml_atoms]
+    for atom_xml in metadata_xml_atoms:
+        update_deposit_with_metadata(
+            authenticated_client, deposit_collection, deposit, atom_xml,
+        )
 
     for url in private_get_raw_url_endpoints(deposit_collection, deposit):
         response = authenticated_client.get(url)
@@ -214,52 +210,14 @@
         assert response.status_code == status.HTTP_200_OK
         assert response._headers["content-type"][1] == "application/json"
         data = response.json()
-
-        metadata = {
-            "author": [
-                "some awesome author",
-                "another one",
-                "no one",
-                {"email": "hal@ccsd.cnrs.fr", "name": "HAL"},
-            ],
-            "client": "hal",
-            "codemeta:applicationCategory": "test",
-            "codemeta:author": {"codemeta:name": "Morane Gruenpeter"},
-            "codemeta:dateCreated": [
-                "2017-10-07T15:17:08Z",
-                "2015-04-06T17:08:47+02:00",
-            ],
-            "codemeta:datePublished": "2017-05-03T16:08:47+02:00",
-            "codemeta:description": "this is the description",
-            "codemeta:developmentStatus": "stable",
-            "codemeta:keywords": "DSP programming",
-            "codemeta:license": [
-                {"codemeta:name": "GNU General Public License v3.0 only"},
-                {
-                    "codemeta:name": "CeCILL "
-                    "Free "
-                    "Software "
-                    "License "
-                    "Agreement "
-                    "v1.1"
-                },
-            ],
-            "codemeta:programmingLanguage": ["php", "python", "C"],
-            "codemeta:runtimePlatform": "phpstorm",
-            "codemeta:url": "https://hal-test.archives-ouvertes.fr/hal-01243065",  # noqa
-            "codemeta:version": "1",
-            "external_identifier": ["some-external-id", "hal-01243065"],
-            "id": "hal-01243065",
-            "title": "Composing a Web of Audio Applications",
-            "url": "https://hal-test.archives-ouvertes.fr/some-external-id",
-        }
-        expected_meta = {
+        assert data == {
             "origin": {
                 "type": "deposit",
                 "url": "https://hal-test.archives-ouvertes.fr/hal-01243065",
             },
             "origin_metadata": {
-                "metadata": metadata,
+                "metadata_raw": metadata_xml_atoms,
+                "metadata_dict": utils.merge(*metadata_xml_raws),
                 "provider": {
                     "metadata": {},
                     "provider_name": "",
@@ -291,7 +249,6 @@
                 "revision_parents": [],
             },
         }
-        assert data == expected_meta
 
 
 def test_read_metadata_4(
@@ -317,48 +274,14 @@
         assert response._headers["content-type"][1] == "application/json"
         data = response.json()
 
-        metadata = {
-            "author": {"email": "hal@ccsd.cnrs.fr", "name": "HAL"},
-            "client": "hal",
-            "codemeta:applicationCategory": "test",
-            "codemeta:author": {"codemeta:name": "Morane Gruenpeter"},
-            "codemeta:description": "this is the description",
-            "codemeta:developmentStatus": "stable",
-            "codemeta:keywords": "DSP programming",
-            "codemeta:license": [
-                {
-                    "codemeta:name": "GNU "
-                    "General "
-                    "Public "
-                    "License "
-                    "v3.0 "
-                    "only"
-                },
-                {
-                    "codemeta:name": "CeCILL "
-                    "Free "
-                    "Software "
-                    "License "
-                    "Agreement "
-                    "v1.1"
-                },
-            ],
-            "codemeta:programmingLanguage": ["php", "python", "C"],
-            "codemeta:runtimePlatform": "phpstorm",
-            "codemeta:url": "https://hal-test.archives-ouvertes.fr/hal-01243065",
-            "codemeta:version": "1",
-            "external_identifier": "hal-01243065",
-            "id": "hal-01243065",
-            "title": "Composing a Web of Audio Applications",
-        }
-
         expected_origin = {
             "type": "deposit",
             "url": "https://hal-test.archives-ouvertes.fr/%s" % (deposit.external_id),
         }
 
         expected_origin_metadata = {
-            "metadata": metadata,
+            "metadata_raw": [codemeta_entry_data],
+            "metadata_dict": parse_xml(codemeta_entry_data),
             "provider": {
                 "metadata": {},
                 "provider_name": "",
@@ -438,51 +361,9 @@
             "url": "https://hal-test.archives-ouvertes.fr/external-id-partial",
         }
 
-        metadata = {
-            "author": {"email": "hal@ccsd.cnrs.fr", "name": "HAL"},
-            "client": "hal",
-            "codemeta:applicationCategory": "test",
-            "codemeta:author": {"codemeta:name": "Morane Gruenpeter"},
-            "codemeta:dateCreated": [
-                "2015-04-06T17:08:47+02:00",
-                "2016-04-06T17:08:47+02:00",
-            ],
-            "codemeta:datePublished": [
-                "2017-05-03T16:08:47+02:00",
-                "2018-05-03T16:08:47+02:00",
-            ],
-            "codemeta:description": "this is the description",
-            "codemeta:developmentStatus": "stable",
-            "codemeta:keywords": "DSP programming",
-            "codemeta:license": [
-                {
-                    "codemeta:name": "GNU "
-                    "General "
-                    "Public "
-                    "License "
-                    "v3.0 "
-                    "only"
-                },
-                {
-                    "codemeta:name": "CeCILL "
-                    "Free "
-                    "Software "
-                    "License "
-                    "Agreement "
-                    "v1.1"
-                },
-            ],
-            "codemeta:programmingLanguage": ["php", "python", "C"],
-            "codemeta:runtimePlatform": "phpstorm",
-            "codemeta:url": "https://hal-test.archives-ouvertes.fr/hal-01243065",  # noqa
-            "codemeta:version": "1",
-            "external_identifier": "hal-01243065",
-            "id": "hal-01243065",
-            "title": "Composing a Web of Audio Applications",
-        }
-
         expected_origin_metadata = {
-            "metadata": metadata,
+            "metadata_raw": [codemeta_entry_data],
+            "metadata_dict": parse_xml(codemeta_entry_data),
             "provider": {
                 "metadata": {},
                 "provider_name": "",