Changeset View
Standalone View
swh/deposit/api/deposit_update.py
# Copyright (C) 2017-2020 The Software Heritage developers | # Copyright (C) 2017-2020 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from datetime import datetime, timezone | |||||
from typing import Any, Dict, Optional, Tuple | from typing import Any, Dict, Optional, Tuple | ||||
from rest_framework import status | from rest_framework import status | ||||
from rest_framework.request import Request | from rest_framework.request import Request | ||||
from swh.deposit.api.checks import check_metadata | from swh.deposit.api.checks import check_metadata | ||||
from swh.deposit.api.converters import convert_status_detail | from swh.deposit.api.converters import convert_status_detail | ||||
from swh.deposit.models import Deposit | from swh.deposit.models import Deposit | ||||
▲ Show 20 Lines • Show All 181 Lines • ▼ Show 20 Lines | ) -> Dict[str, Any]: | ||||
deposit_id=deposit_id, | deposit_id=deposit_id, | ||||
replace_metadata=True, | replace_metadata=True, | ||||
) | ) | ||||
# Update metadata on a deposit already ingested | # Update metadata on a deposit already ingested | ||||
# Write to the metadata storage (and the deposit backend) | # Write to the metadata storage (and the deposit backend) | ||||
# no ingestion triggered | # no ingestion triggered | ||||
date_now = datetime.now(tz=timezone.utc) | |||||
deposit = Deposit.objects.get(pk=deposit_id) | deposit = Deposit.objects.get(pk=deposit_id) | ||||
assert deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS | assert deposit.status == DEPOSIT_STATUS_LOAD_SUCCESS | ||||
if swhid != deposit.swhid: | if swhid != deposit.swhid: | ||||
return make_error_dict( | return make_error_dict( | ||||
BAD_REQUEST, | BAD_REQUEST, | ||||
f"Mismatched provided SWHID {swhid} with deposit's {deposit.swhid}.", | f"Mismatched provided SWHID {swhid} with deposit's {deposit.swhid}.", | ||||
"The provided SWHID does not match the deposit to update. " | "The provided SWHID does not match the deposit to update. " | ||||
▲ Show 20 Lines • Show All 61 Lines • ▼ Show 20 Lines | ) -> Dict[str, Any]: | ||||
origin=deposit.origin_url, | origin=deposit.origin_url, | ||||
) | ) | ||||
# write to metadata storage | # write to metadata storage | ||||
self.storage_metadata.metadata_authority_add([metadata_authority]) | self.storage_metadata.metadata_authority_add([metadata_authority]) | ||||
self.storage_metadata.metadata_fetcher_add([metadata_fetcher]) | self.storage_metadata.metadata_fetcher_add([metadata_fetcher]) | ||||
self.storage_metadata.raw_extrinsic_metadata_add([metadata_object]) | self.storage_metadata.raw_extrinsic_metadata_add([metadata_object]) | ||||
# ensure that new extrinsic metadata sent is stored in the metadata storage | |||||
page_result = self.storage_metadata.raw_extrinsic_metadata_get( | |||||
MetadataTargetType.DIRECTORY, | |||||
deposit_swhid, | |||||
metadata_authority, | |||||
after=date_now, | |||||
) | |||||
found: bool = False | |||||
# one page result iteration should be enough since we pass the date (which is | |||||
vlorentz: why this assumption?
It has the potential to cause bugs, so just use `swh.core.api.classes. | |||||
Done Inline ActionsI think that since we start at "around" now (after=date_now), the But it's mostly because I thought of calling stream_results but could not I would not mind a sample if you have that in store ;) ardumont: I think that since we start at "around" now (`after=date_now`), the
probabibility that happens… | |||||
Done Inline Actionsit's not 0 if for some reason HAL sent us a burst of updates at the same time (eg. a user hammering an update button in their UI) results: Iterable[RawExtrinsicMetadata] = stream_results( self.storage_metadata.raw_extrinsic_metadata_get,MetadataTargetType.DIRECTORY, deposit_swhid, metadata_authority, after=date_now, ) vlorentz: it's not 0 if for some reason HAL sent us a burst of updates at the same time (eg. a user… | |||||
Done Inline Actionsi did not say 0, i said tends towards 0. Even with a user bursting updates, i'm not convinced we'll hit the 10k limits (default limit to the get call). Thanks for stream_results snippet .oO(d'oh) ardumont: i did not say 0, i said `tends towards 0`.
Even with a user bursting updates, i'm not… | |||||
# around now) as the minimal boundary for the lookup | |||||
for extrinsic_metadata in page_result.results: | |||||
if ( | |||||
Not Done Inline Actions@vlorentz I saw somewhere : If yes, this means that the format inserted to the ERMDS on the deposit side should match the chosen format. moranegg: @vlorentz I saw somewhere :
`sword-v2-atom-codemeta-in-json`
should this be:
`sword-v2-atom… | |||||
extrinsic_metadata.format == "sword-v2-atom-codemeta" | |||||
and extrinsic_metadata.metadata == metadata_object.metadata | |||||
): | |||||
found = True | |||||
break | |||||
if not found: | |||||
Done Inline Actionsardumont: No real idea on how to check that part... nor how to convey that information to the client. | |||||
raise ValueError("Failure to save the extrinsic metadata storage.") | |||||
return { | return { | ||||
"deposit_id": deposit_id, | "deposit_id": deposit_id, | ||||
"deposit_date": deposit_request.date, | "deposit_date": deposit_request.date, | ||||
"status": deposit.status, | "status": deposit.status, | ||||
"archive": None, | "archive": None, | ||||
} | } | ||||
def process_post( | def process_post( | ||||
▲ Show 20 Lines • Show All 55 Lines • Show Last 20 Lines |
why this assumption?
It has the potential to cause bugs, so just use swh.core.api.classes.stream_results. It fetches new pages lazily, so if extra pages aren't needed, it won't fetch them.