Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F7147853
D4100.id14471.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
18 KB
Subscribers
None
D4100.id14471.diff
View Options
diff --git a/swh/deposit/api/private/__init__.py b/swh/deposit/api/private/__init__.py
--- a/swh/deposit/api/private/__init__.py
+++ b/swh/deposit/api/private/__init__.py
@@ -3,6 +3,8 @@
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+from typing import Any, Dict, List, Tuple
+
from rest_framework.permissions import AllowAny
from swh.deposit import utils
@@ -39,22 +41,27 @@
for deposit_request in deposit_requests:
yield deposit_request
- def _metadata_get(self, deposit):
- """Given a deposit, aggregate all metadata requests.
+ def _metadata_get(self, deposit: Deposit) -> Tuple[Dict[str, Any], List[str]]:
+ """Given a deposit, retrieve all metadata requests into one Dict and returns both that
+ aggregated metadata dict and the list of raw_metdadata.
Args:
- deposit (Deposit): The deposit instance to extract
- metadata from.
+ deposit: The deposit instance to extract metadata from
Returns:
- metadata dict from the deposit.
+ Tuple of aggregated metadata dict, list of raw_metadata
"""
- metadata = (
- m.metadata
- for m in self._deposit_requests(deposit, request_type=METADATA_TYPE)
- )
- return utils.merge(*metadata)
+ metadata: List[Dict[str, Any]] = []
+ raw_metadata: List[str] = []
+ for deposit_request in self._deposit_requests(
+ deposit, request_type=METADATA_TYPE
+ ):
+ metadata.append(deposit_request.metadata)
+ raw_metadata.append(deposit_request.raw_metadata)
+
+ aggregated_metadata = utils.merge(*metadata)
+ return (aggregated_metadata, raw_metadata)
class APIPrivateView(APIConfig, AuthenticatedAPIView):
diff --git a/swh/deposit/api/private/deposit_check.py b/swh/deposit/api/private/deposit_check.py
--- a/swh/deposit/api/private/deposit_check.py
+++ b/swh/deposit/api/private/deposit_check.py
@@ -11,6 +11,7 @@
import zipfile
from rest_framework import status
+from rest_framework.request import Request
from swh.scheduler.utils import create_oneshot_task_dict
@@ -130,22 +131,22 @@
return True, None
def process_get(
- self, req, collection_name: str, deposit_id: int
+ self, req: Request, collection_name: str, deposit_id: int
) -> Tuple[int, Dict, str]:
"""Build a unique tarball from the multiple received and stream that
content to the client.
Args:
- req (Request):
- collection_name (str): Collection owning the deposit
- deposit_id (id): Deposit concerned by the reading
+ req: Client request
+ collection_name: Collection owning the deposit
+ deposit_id: Deposit concerned by the reading
Returns:
Tuple status, stream of content, content-type
"""
deposit = Deposit.objects.get(pk=deposit_id)
- metadata = self._metadata_get(deposit)
+ metadata, _ = self._metadata_get(deposit)
problems: Dict = {}
# will check each deposit's associated request (both of type
# archive and metadata) for errors
diff --git a/swh/deposit/api/private/deposit_read.py b/swh/deposit/api/private/deposit_read.py
--- a/swh/deposit/api/private/deposit_read.py
+++ b/swh/deposit/api/private/deposit_read.py
@@ -131,26 +131,44 @@
commit_date = deposit.complete_date
return (normalize_date(author_date), normalize_date(commit_date))
- def metadata_read(self, deposit):
- """Read and aggregate multiple data on deposit into one unified data
- dictionary.
+ def metadata_read(self, deposit: Deposit) -> Dict[str, Any]:
+ """Read and aggregate multiple deposit information into one unified dictionary.
Args:
- deposit (Deposit): Deposit concerned by the data aggregation.
+ deposit: Deposit concerned by the data aggregation.
Returns:
- Dictionary of data representing the deposit to inject in swh.
+ Dictionary of deposit information read by the deposit loader, with the
+ following keys:
+
+ **origin** (Dict): Information about the origin
+
+ **origin_metadata (Dict): Metadata about the origin to load
+
+ **metadata_raw** (List[str]): List of raw metadata received for the
+ deposit
+
+ **metadata_dict** (Dict): Deposit aggregated metadata into one dict
+
+ **provider** (Dict): the metadata provider information about the
+ deposit client
+
+ **tool** (Dict): the deposit information
+
+ **deposit** (Dict): deposit information relevant to build the revision
+ (author_date, committer_date, etc...)
"""
- metadata = self._metadata_get(deposit)
+ metadata, raw_metadata = self._metadata_get(deposit)
# Read information metadata
data = {"origin": {"type": "deposit", "url": deposit.origin_url,}}
author_date, commit_date = self._normalize_dates(deposit, metadata)
if deposit.parent:
- swh_persistent_id = deposit.parent.swhid
- swhid = identifiers.parse_swhid(swh_persistent_id)
+ parent_swhid = deposit.parent.swhid
+ assert parent_swhid is not None
+ swhid = identifiers.parse_swhid(parent_swhid)
parent_revision = swhid.object_id
parents = [parent_revision]
else:
@@ -165,7 +183,8 @@
"metadata": {},
},
"tool": self.tool,
- "metadata": metadata,
+ "metadata_raw": raw_metadata,
+ "metadata_dict": metadata,
}
data["deposit"] = {
"id": deposit.id,
diff --git a/swh/deposit/tests/api/test_deposit_private_read_metadata.py b/swh/deposit/tests/api/test_deposit_private_read_metadata.py
--- a/swh/deposit/tests/api/test_deposit_private_read_metadata.py
+++ b/swh/deposit/tests/api/test_deposit_private_read_metadata.py
@@ -6,9 +6,10 @@
from django.urls import reverse
from rest_framework import status
-from swh.deposit import __version__
+from swh.deposit import __version__, utils
from swh.deposit.config import EDIT_SE_IRI, PRIVATE_GET_DEPOSIT_METADATA, SWH_PERSON
from swh.deposit.models import Deposit
+from swh.deposit.parsers import parse_xml
PRIVATE_GET_DEPOSIT_METADATA_NC = PRIVATE_GET_DEPOSIT_METADATA + "-nc"
@@ -22,14 +23,6 @@
]
-def update_deposit(authenticated_client, collection, deposit, atom_dataset):
- for atom_data in ["entry-data2", "entry-data3"]:
- update_deposit_with_metadata(
- authenticated_client, collection, deposit, atom_dataset[atom_data]
- )
- return deposit
-
-
def update_deposit_with_metadata(authenticated_client, collection, deposit, metadata):
# update deposit's metadata
response = authenticated_client.post(
@@ -52,28 +45,29 @@
deposit = partial_deposit
deposit.external_id = "some-external-id"
deposit.save()
- deposit = update_deposit(
- authenticated_client, deposit_collection, deposit, atom_dataset
- )
+
+ metadata_xml_atoms = [
+ atom_dataset[atom_key] for atom_key in ["entry-data2", "entry-data3"]
+ ]
+ metadata_xml_raws = [parse_xml(xml) for xml in metadata_xml_atoms]
+ for atom_xml in metadata_xml_atoms:
+ deposit = update_deposit_with_metadata(
+ authenticated_client, deposit_collection, deposit, atom_xml,
+ )
for url in private_get_raw_url_endpoints(deposit_collection, deposit):
response = authenticated_client.get(url)
assert response.status_code == status.HTTP_200_OK
assert response._headers["content-type"][1] == "application/json"
data = response.json()
-
- expected_meta = {
+ assert data == {
"origin": {
"type": "deposit",
"url": "https://hal-test.archives-ouvertes.fr/some-external-id",
},
"origin_metadata": {
- "metadata": {
- "author": ["some awesome author", "another one", "no one"],
- "codemeta:dateCreated": "2017-10-07T15:17:08Z",
- "external_identifier": "some-external-id",
- "url": "https://hal-test.archives-ouvertes.fr/some-external-id", # noqa
- },
+ "metadata_raw": metadata_xml_atoms,
+ "metadata_dict": utils.merge(*metadata_xml_raws),
"provider": {
"metadata": {},
"provider_name": "",
@@ -106,8 +100,6 @@
},
}
- assert data == expected_meta
-
def test_read_metadata_revision_with_parent(
authenticated_client, deposit_collection, partial_deposit, atom_dataset
@@ -118,9 +110,15 @@
deposit = partial_deposit
deposit.external_id = "some-external-id"
deposit.save()
- deposit = update_deposit(
- authenticated_client, deposit_collection, deposit, atom_dataset
- )
+ metadata_xml_atoms = [
+ atom_dataset[atom_key] for atom_key in ["entry-data2", "entry-data3"]
+ ]
+ metadata_xml_raws = [parse_xml(xml) for xml in metadata_xml_atoms]
+ for atom_xml in metadata_xml_atoms:
+ deposit = update_deposit_with_metadata(
+ authenticated_client, deposit_collection, deposit, atom_xml,
+ )
+
rev_id = "da78a9d4cf1d5d29873693fd496142e3a18c20fa"
swhid = "swh:1:rev:%s" % rev_id
fake_parent = Deposit(
@@ -136,19 +134,14 @@
assert response.status_code == status.HTTP_200_OK
assert response._headers["content-type"][1] == "application/json"
data = response.json()
-
- expected_meta = {
+ assert data == {
"origin": {
"type": "deposit",
"url": "https://hal-test.archives-ouvertes.fr/some-external-id",
},
"origin_metadata": {
- "metadata": {
- "author": ["some awesome author", "another one", "no one"],
- "codemeta:dateCreated": "2017-10-07T15:17:08Z",
- "external_identifier": "some-external-id",
- "url": "https://hal-test.archives-ouvertes.fr/some-external-id", # noqa
- },
+ "metadata_raw": metadata_xml_atoms,
+ "metadata_dict": utils.merge(*metadata_xml_raws),
"provider": {
"metadata": {},
"provider_name": "",
@@ -181,8 +174,6 @@
},
}
- assert data == expected_meta
-
def test_read_metadata_3(
authenticated_client, deposit_collection, partial_deposit, atom_dataset
@@ -193,9 +184,7 @@
deposit = partial_deposit
deposit.external_id = "hal-01243065"
deposit.save()
- deposit = update_deposit(
- authenticated_client, deposit_collection, deposit, atom_dataset
- )
+
# add metadata to the deposit with datePublished and dateCreated
codemeta_entry_data = (
atom_dataset["metadata"]
@@ -204,9 +193,16 @@
<codemeta:datePublished>2017-05-03T16:08:47+02:00</codemeta:datePublished>
"""
)
- update_deposit_with_metadata(
- authenticated_client, deposit_collection, deposit, codemeta_entry_data
- )
+ metadata_xml_atoms = [
+ atom_dataset["entry-data2"],
+ atom_dataset["entry-data3"],
+ codemeta_entry_data,
+ ]
+ metadata_xml_raws = [parse_xml(xml) for xml in metadata_xml_atoms]
+ for atom_xml in metadata_xml_atoms:
+ update_deposit_with_metadata(
+ authenticated_client, deposit_collection, deposit, atom_xml,
+ )
for url in private_get_raw_url_endpoints(deposit_collection, deposit):
response = authenticated_client.get(url)
@@ -214,52 +210,14 @@
assert response.status_code == status.HTTP_200_OK
assert response._headers["content-type"][1] == "application/json"
data = response.json()
-
- metadata = {
- "author": [
- "some awesome author",
- "another one",
- "no one",
- {"email": "hal@ccsd.cnrs.fr", "name": "HAL"},
- ],
- "client": "hal",
- "codemeta:applicationCategory": "test",
- "codemeta:author": {"codemeta:name": "Morane Gruenpeter"},
- "codemeta:dateCreated": [
- "2017-10-07T15:17:08Z",
- "2015-04-06T17:08:47+02:00",
- ],
- "codemeta:datePublished": "2017-05-03T16:08:47+02:00",
- "codemeta:description": "this is the description",
- "codemeta:developmentStatus": "stable",
- "codemeta:keywords": "DSP programming",
- "codemeta:license": [
- {"codemeta:name": "GNU General Public License v3.0 only"},
- {
- "codemeta:name": "CeCILL "
- "Free "
- "Software "
- "License "
- "Agreement "
- "v1.1"
- },
- ],
- "codemeta:programmingLanguage": ["php", "python", "C"],
- "codemeta:runtimePlatform": "phpstorm",
- "codemeta:url": "https://hal-test.archives-ouvertes.fr/hal-01243065", # noqa
- "codemeta:version": "1",
- "external_identifier": ["some-external-id", "hal-01243065"],
- "id": "hal-01243065",
- "title": "Composing a Web of Audio Applications",
- "url": "https://hal-test.archives-ouvertes.fr/some-external-id",
- }
- expected_meta = {
+ assert data == {
"origin": {
"type": "deposit",
"url": "https://hal-test.archives-ouvertes.fr/hal-01243065",
},
"origin_metadata": {
- "metadata": metadata,
+ "metadata_raw": metadata_xml_atoms,
+ "metadata_dict": utils.merge(*metadata_xml_raws),
"provider": {
"metadata": {},
"provider_name": "",
@@ -291,7 +249,6 @@
"revision_parents": [],
},
}
- assert data == expected_meta
def test_read_metadata_4(
@@ -317,48 +274,14 @@
assert response._headers["content-type"][1] == "application/json"
data = response.json()
- metadata = {
- "author": {"email": "hal@ccsd.cnrs.fr", "name": "HAL"},
- "client": "hal",
- "codemeta:applicationCategory": "test",
- "codemeta:author": {"codemeta:name": "Morane Gruenpeter"},
- "codemeta:description": "this is the description",
- "codemeta:developmentStatus": "stable",
- "codemeta:keywords": "DSP programming",
- "codemeta:license": [
- {
- "codemeta:name": "GNU "
- "General "
- "Public "
- "License "
- "v3.0 "
- "only"
- },
- {
- "codemeta:name": "CeCILL "
- "Free "
- "Software "
- "License "
- "Agreement "
- "v1.1"
- },
- ],
- "codemeta:programmingLanguage": ["php", "python", "C"],
- "codemeta:runtimePlatform": "phpstorm",
- "codemeta:url": "https://hal-test.archives-ouvertes.fr/hal-01243065",
- "codemeta:version": "1",
- "external_identifier": "hal-01243065",
- "id": "hal-01243065",
- "title": "Composing a Web of Audio Applications",
- }
-
expected_origin = {
"type": "deposit",
"url": "https://hal-test.archives-ouvertes.fr/%s" % (deposit.external_id),
}
expected_origin_metadata = {
- "metadata": metadata,
+ "metadata_raw": [codemeta_entry_data],
+ "metadata_dict": parse_xml(codemeta_entry_data),
"provider": {
"metadata": {},
"provider_name": "",
@@ -438,51 +361,9 @@
"url": "https://hal-test.archives-ouvertes.fr/external-id-partial",
}
- metadata = {
- "author": {"email": "hal@ccsd.cnrs.fr", "name": "HAL"},
- "client": "hal",
- "codemeta:applicationCategory": "test",
- "codemeta:author": {"codemeta:name": "Morane Gruenpeter"},
- "codemeta:dateCreated": [
- "2015-04-06T17:08:47+02:00",
- "2016-04-06T17:08:47+02:00",
- ],
- "codemeta:datePublished": [
- "2017-05-03T16:08:47+02:00",
- "2018-05-03T16:08:47+02:00",
- ],
- "codemeta:description": "this is the description",
- "codemeta:developmentStatus": "stable",
- "codemeta:keywords": "DSP programming",
- "codemeta:license": [
- {
- "codemeta:name": "GNU "
- "General "
- "Public "
- "License "
- "v3.0 "
- "only"
- },
- {
- "codemeta:name": "CeCILL "
- "Free "
- "Software "
- "License "
- "Agreement "
- "v1.1"
- },
- ],
- "codemeta:programmingLanguage": ["php", "python", "C"],
- "codemeta:runtimePlatform": "phpstorm",
- "codemeta:url": "https://hal-test.archives-ouvertes.fr/hal-01243065", # noqa
- "codemeta:version": "1",
- "external_identifier": "hal-01243065",
- "id": "hal-01243065",
- "title": "Composing a Web of Audio Applications",
- }
-
expected_origin_metadata = {
- "metadata": metadata,
+ "metadata_raw": [codemeta_entry_data],
+ "metadata_dict": parse_xml(codemeta_entry_data),
"provider": {
"metadata": {},
"provider_name": "",
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Jan 23, 1:59 AM (19 h, 33 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3223663
Attached To
D4100: deposit: Transit raw metadata to the loader to unify with metadata update scenario
Event Timeline
Log In to Comment