Page MenuHomeSoftware Heritage

D4100.id14471.diff
No OneTemporary

D4100.id14471.diff

diff --git a/swh/deposit/api/private/__init__.py b/swh/deposit/api/private/__init__.py
--- a/swh/deposit/api/private/__init__.py
+++ b/swh/deposit/api/private/__init__.py
@@ -3,6 +3,8 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+from typing import Any, Dict, List, Tuple
+
from rest_framework.permissions import AllowAny
from swh.deposit import utils
@@ -39,22 +41,27 @@
for deposit_request in deposit_requests:
yield deposit_request
- def _metadata_get(self, deposit):
- """Given a deposit, aggregate all metadata requests.
+ def _metadata_get(self, deposit: Deposit) -> Tuple[Dict[str, Any], List[str]]:
+ """Given a deposit, retrieve all metadata requests into one Dict and returns both that
+ aggregated metadata dict and the list of raw_metdadata.
Args:
- deposit (Deposit): The deposit instance to extract
- metadata from.
+ deposit: The deposit instance to extract metadata from
Returns:
- metadata dict from the deposit.
+ Tuple of aggregated metadata dict, list of raw_metadata
"""
- metadata = (
- m.metadata
- for m in self._deposit_requests(deposit, request_type=METADATA_TYPE)
- )
- return utils.merge(*metadata)
+ metadata: List[Dict[str, Any]] = []
+ raw_metadata: List[str] = []
+ for deposit_request in self._deposit_requests(
+ deposit, request_type=METADATA_TYPE
+ ):
+ metadata.append(deposit_request.metadata)
+ raw_metadata.append(deposit_request.raw_metadata)
+
+ aggregated_metadata = utils.merge(*metadata)
+ return (aggregated_metadata, raw_metadata)
class APIPrivateView(APIConfig, AuthenticatedAPIView):
diff --git a/swh/deposit/api/private/deposit_check.py b/swh/deposit/api/private/deposit_check.py
--- a/swh/deposit/api/private/deposit_check.py
+++ b/swh/deposit/api/private/deposit_check.py
@@ -11,6 +11,7 @@
import zipfile
from rest_framework import status
+from rest_framework.request import Request
from swh.scheduler.utils import create_oneshot_task_dict
@@ -130,22 +131,22 @@
return True, None
def process_get(
- self, req, collection_name: str, deposit_id: int
+ self, req: Request, collection_name: str, deposit_id: int
) -> Tuple[int, Dict, str]:
"""Build a unique tarball from the multiple received and stream that
content to the client.
Args:
- req (Request):
- collection_name (str): Collection owning the deposit
- deposit_id (id): Deposit concerned by the reading
+ req: Client request
+ collection_name: Collection owning the deposit
+ deposit_id: Deposit concerned by the reading
Returns:
Tuple status, stream of content, content-type
"""
deposit = Deposit.objects.get(pk=deposit_id)
- metadata = self._metadata_get(deposit)
+ metadata, _ = self._metadata_get(deposit)
problems: Dict = {}
# will check each deposit's associated request (both of type
# archive and metadata) for errors
diff --git a/swh/deposit/api/private/deposit_read.py b/swh/deposit/api/private/deposit_read.py
--- a/swh/deposit/api/private/deposit_read.py
+++ b/swh/deposit/api/private/deposit_read.py
@@ -131,26 +131,44 @@
commit_date = deposit.complete_date
return (normalize_date(author_date), normalize_date(commit_date))
- def metadata_read(self, deposit):
- """Read and aggregate multiple data on deposit into one unified data
- dictionary.
+ def metadata_read(self, deposit: Deposit) -> Dict[str, Any]:
+ """Read and aggregate multiple deposit information into one unified dictionary.
Args:
- deposit (Deposit): Deposit concerned by the data aggregation.
+ deposit: Deposit concerned by the data aggregation.
Returns:
- Dictionary of data representing the deposit to inject in swh.
+ Dictionary of deposit information read by the deposit loader, with the
+ following keys:
+
+ **origin** (Dict): Information about the origin
+
+ **origin_metadata (Dict): Metadata about the origin to load
+
+ **metadata_raw** (List[str]): List of raw metadata received for the
+ deposit
+
+ **metadata_dict** (Dict): Deposit aggregated metadata into one dict
+
+ **provider** (Dict): the metadata provider information about the
+ deposit client
+
+ **tool** (Dict): the deposit information
+
+ **deposit** (Dict): deposit information relevant to build the revision
+ (author_date, committer_date, etc...)
"""
- metadata = self._metadata_get(deposit)
+ metadata, raw_metadata = self._metadata_get(deposit)
# Read information metadata
data = {"origin": {"type": "deposit", "url": deposit.origin_url,}}
author_date, commit_date = self._normalize_dates(deposit, metadata)
if deposit.parent:
- swh_persistent_id = deposit.parent.swhid
- swhid = identifiers.parse_swhid(swh_persistent_id)
+ parent_swhid = deposit.parent.swhid
+ assert parent_swhid is not None
+ swhid = identifiers.parse_swhid(parent_swhid)
parent_revision = swhid.object_id
parents = [parent_revision]
else:
@@ -165,7 +183,8 @@
"metadata": {},
},
"tool": self.tool,
- "metadata": metadata,
+ "metadata_raw": raw_metadata,
+ "metadata_dict": metadata,
}
data["deposit"] = {
"id": deposit.id,
diff --git a/swh/deposit/tests/api/test_deposit_private_read_metadata.py b/swh/deposit/tests/api/test_deposit_private_read_metadata.py
--- a/swh/deposit/tests/api/test_deposit_private_read_metadata.py
+++ b/swh/deposit/tests/api/test_deposit_private_read_metadata.py
@@ -6,9 +6,10 @@
from django.urls import reverse
from rest_framework import status
-from swh.deposit import __version__
+from swh.deposit import __version__, utils
from swh.deposit.config import EDIT_SE_IRI, PRIVATE_GET_DEPOSIT_METADATA, SWH_PERSON
from swh.deposit.models import Deposit
+from swh.deposit.parsers import parse_xml
PRIVATE_GET_DEPOSIT_METADATA_NC = PRIVATE_GET_DEPOSIT_METADATA + "-nc"
@@ -22,14 +23,6 @@
]
-def update_deposit(authenticated_client, collection, deposit, atom_dataset):
- for atom_data in ["entry-data2", "entry-data3"]:
- update_deposit_with_metadata(
- authenticated_client, collection, deposit, atom_dataset[atom_data]
- )
- return deposit
-
-
def update_deposit_with_metadata(authenticated_client, collection, deposit, metadata):
# update deposit's metadata
response = authenticated_client.post(
@@ -52,28 +45,29 @@
deposit = partial_deposit
deposit.external_id = "some-external-id"
deposit.save()
- deposit = update_deposit(
- authenticated_client, deposit_collection, deposit, atom_dataset
- )
+
+ metadata_xml_atoms = [
+ atom_dataset[atom_key] for atom_key in ["entry-data2", "entry-data3"]
+ ]
+ metadata_xml_raws = [parse_xml(xml) for xml in metadata_xml_atoms]
+ for atom_xml in metadata_xml_atoms:
+ deposit = update_deposit_with_metadata(
+ authenticated_client, deposit_collection, deposit, atom_xml,
+ )
for url in private_get_raw_url_endpoints(deposit_collection, deposit):
response = authenticated_client.get(url)
assert response.status_code == status.HTTP_200_OK
assert response._headers["content-type"][1] == "application/json"
data = response.json()
-
- expected_meta = {
+ assert data == {
"origin": {
"type": "deposit",
"url": "https://hal-test.archives-ouvertes.fr/some-external-id",
},
"origin_metadata": {
- "metadata": {
- "author": ["some awesome author", "another one", "no one"],
- "codemeta:dateCreated": "2017-10-07T15:17:08Z",
- "external_identifier": "some-external-id",
- "url": "https://hal-test.archives-ouvertes.fr/some-external-id", # noqa
- },
+ "metadata_raw": metadata_xml_atoms,
+ "metadata_dict": utils.merge(*metadata_xml_raws),
"provider": {
"metadata": {},
"provider_name": "",
@@ -106,8 +100,6 @@
},
}
- assert data == expected_meta
-
def test_read_metadata_revision_with_parent(
authenticated_client, deposit_collection, partial_deposit, atom_dataset
@@ -118,9 +110,15 @@
deposit = partial_deposit
deposit.external_id = "some-external-id"
deposit.save()
- deposit = update_deposit(
- authenticated_client, deposit_collection, deposit, atom_dataset
- )
+ metadata_xml_atoms = [
+ atom_dataset[atom_key] for atom_key in ["entry-data2", "entry-data3"]
+ ]
+ metadata_xml_raws = [parse_xml(xml) for xml in metadata_xml_atoms]
+ for atom_xml in metadata_xml_atoms:
+ deposit = update_deposit_with_metadata(
+ authenticated_client, deposit_collection, deposit, atom_xml,
+ )
+
rev_id = "da78a9d4cf1d5d29873693fd496142e3a18c20fa"
swhid = "swh:1:rev:%s" % rev_id
fake_parent = Deposit(
@@ -136,19 +134,14 @@
assert response.status_code == status.HTTP_200_OK
assert response._headers["content-type"][1] == "application/json"
data = response.json()
-
- expected_meta = {
+ assert data == {
"origin": {
"type": "deposit",
"url": "https://hal-test.archives-ouvertes.fr/some-external-id",
},
"origin_metadata": {
- "metadata": {
- "author": ["some awesome author", "another one", "no one"],
- "codemeta:dateCreated": "2017-10-07T15:17:08Z",
- "external_identifier": "some-external-id",
- "url": "https://hal-test.archives-ouvertes.fr/some-external-id", # noqa
- },
+ "metadata_raw": metadata_xml_atoms,
+ "metadata_dict": utils.merge(*metadata_xml_raws),
"provider": {
"metadata": {},
"provider_name": "",
@@ -181,8 +174,6 @@
},
}
- assert data == expected_meta
-
def test_read_metadata_3(
authenticated_client, deposit_collection, partial_deposit, atom_dataset
@@ -193,9 +184,7 @@
deposit = partial_deposit
deposit.external_id = "hal-01243065"
deposit.save()
- deposit = update_deposit(
- authenticated_client, deposit_collection, deposit, atom_dataset
- )
+
# add metadata to the deposit with datePublished and dateCreated
codemeta_entry_data = (
atom_dataset["metadata"]
@@ -204,9 +193,16 @@
<codemeta:datePublished>2017-05-03T16:08:47+02:00</codemeta:datePublished>
"""
)
- update_deposit_with_metadata(
- authenticated_client, deposit_collection, deposit, codemeta_entry_data
- )
+ metadata_xml_atoms = [
+ atom_dataset["entry-data2"],
+ atom_dataset["entry-data3"],
+ codemeta_entry_data,
+ ]
+ metadata_xml_raws = [parse_xml(xml) for xml in metadata_xml_atoms]
+ for atom_xml in metadata_xml_atoms:
+ update_deposit_with_metadata(
+ authenticated_client, deposit_collection, deposit, atom_xml,
+ )
for url in private_get_raw_url_endpoints(deposit_collection, deposit):
response = authenticated_client.get(url)
@@ -214,52 +210,14 @@
assert response.status_code == status.HTTP_200_OK
assert response._headers["content-type"][1] == "application/json"
data = response.json()
-
- metadata = {
- "author": [
- "some awesome author",
- "another one",
- "no one",
- {"email": "hal@ccsd.cnrs.fr", "name": "HAL"},
- ],
- "client": "hal",
- "codemeta:applicationCategory": "test",
- "codemeta:author": {"codemeta:name": "Morane Gruenpeter"},
- "codemeta:dateCreated": [
- "2017-10-07T15:17:08Z",
- "2015-04-06T17:08:47+02:00",
- ],
- "codemeta:datePublished": "2017-05-03T16:08:47+02:00",
- "codemeta:description": "this is the description",
- "codemeta:developmentStatus": "stable",
- "codemeta:keywords": "DSP programming",
- "codemeta:license": [
- {"codemeta:name": "GNU General Public License v3.0 only"},
- {
- "codemeta:name": "CeCILL "
- "Free "
- "Software "
- "License "
- "Agreement "
- "v1.1"
- },
- ],
- "codemeta:programmingLanguage": ["php", "python", "C"],
- "codemeta:runtimePlatform": "phpstorm",
- "codemeta:url": "https://hal-test.archives-ouvertes.fr/hal-01243065", # noqa
- "codemeta:version": "1",
- "external_identifier": ["some-external-id", "hal-01243065"],
- "id": "hal-01243065",
- "title": "Composing a Web of Audio Applications",
- "url": "https://hal-test.archives-ouvertes.fr/some-external-id",
- }
- expected_meta = {
+ assert data == {
"origin": {
"type": "deposit",
"url": "https://hal-test.archives-ouvertes.fr/hal-01243065",
},
"origin_metadata": {
- "metadata": metadata,
+ "metadata_raw": metadata_xml_atoms,
+ "metadata_dict": utils.merge(*metadata_xml_raws),
"provider": {
"metadata": {},
"provider_name": "",
@@ -291,7 +249,6 @@
"revision_parents": [],
},
}
- assert data == expected_meta
def test_read_metadata_4(
@@ -317,48 +274,14 @@
assert response._headers["content-type"][1] == "application/json"
data = response.json()
- metadata = {
- "author": {"email": "hal@ccsd.cnrs.fr", "name": "HAL"},
- "client": "hal",
- "codemeta:applicationCategory": "test",
- "codemeta:author": {"codemeta:name": "Morane Gruenpeter"},
- "codemeta:description": "this is the description",
- "codemeta:developmentStatus": "stable",
- "codemeta:keywords": "DSP programming",
- "codemeta:license": [
- {
- "codemeta:name": "GNU "
- "General "
- "Public "
- "License "
- "v3.0 "
- "only"
- },
- {
- "codemeta:name": "CeCILL "
- "Free "
- "Software "
- "License "
- "Agreement "
- "v1.1"
- },
- ],
- "codemeta:programmingLanguage": ["php", "python", "C"],
- "codemeta:runtimePlatform": "phpstorm",
- "codemeta:url": "https://hal-test.archives-ouvertes.fr/hal-01243065",
- "codemeta:version": "1",
- "external_identifier": "hal-01243065",
- "id": "hal-01243065",
- "title": "Composing a Web of Audio Applications",
- }
-
expected_origin = {
"type": "deposit",
"url": "https://hal-test.archives-ouvertes.fr/%s" % (deposit.external_id),
}
expected_origin_metadata = {
- "metadata": metadata,
+ "metadata_raw": [codemeta_entry_data],
+ "metadata_dict": parse_xml(codemeta_entry_data),
"provider": {
"metadata": {},
"provider_name": "",
@@ -438,51 +361,9 @@
"url": "https://hal-test.archives-ouvertes.fr/external-id-partial",
}
- metadata = {
- "author": {"email": "hal@ccsd.cnrs.fr", "name": "HAL"},
- "client": "hal",
- "codemeta:applicationCategory": "test",
- "codemeta:author": {"codemeta:name": "Morane Gruenpeter"},
- "codemeta:dateCreated": [
- "2015-04-06T17:08:47+02:00",
- "2016-04-06T17:08:47+02:00",
- ],
- "codemeta:datePublished": [
- "2017-05-03T16:08:47+02:00",
- "2018-05-03T16:08:47+02:00",
- ],
- "codemeta:description": "this is the description",
- "codemeta:developmentStatus": "stable",
- "codemeta:keywords": "DSP programming",
- "codemeta:license": [
- {
- "codemeta:name": "GNU "
- "General "
- "Public "
- "License "
- "v3.0 "
- "only"
- },
- {
- "codemeta:name": "CeCILL "
- "Free "
- "Software "
- "License "
- "Agreement "
- "v1.1"
- },
- ],
- "codemeta:programmingLanguage": ["php", "python", "C"],
- "codemeta:runtimePlatform": "phpstorm",
- "codemeta:url": "https://hal-test.archives-ouvertes.fr/hal-01243065", # noqa
- "codemeta:version": "1",
- "external_identifier": "hal-01243065",
- "id": "hal-01243065",
- "title": "Composing a Web of Audio Applications",
- }
-
expected_origin_metadata = {
- "metadata": metadata,
+ "metadata_raw": [codemeta_entry_data],
+ "metadata_dict": parse_xml(codemeta_entry_data),
"provider": {
"metadata": {},
"provider_name": "",

File Metadata

Mime Type
text/plain
Expires
Thu, Jan 23, 1:59 AM (19 h, 33 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3223663

Event Timeline