Changeset View
Standalone View
swh/deposit/api/common.py
Show All 12 Lines | |||||
from django.shortcuts import render | from django.shortcuts import render | ||||
from django.utils import timezone | from django.utils import timezone | ||||
from rest_framework import status | from rest_framework import status | ||||
from rest_framework.authentication import BasicAuthentication | from rest_framework.authentication import BasicAuthentication | ||||
from rest_framework.permissions import IsAuthenticated | from rest_framework.permissions import IsAuthenticated | ||||
from rest_framework.views import APIView | from rest_framework.views import APIView | ||||
from swh.model import hashutil | from swh.model import hashutil | ||||
from swh.scheduler.utils import create_oneshot_task_dict | |||||
from swh.deposit.utils import origin_url_from | |||||
from ..config import ( | from ..config import ( | ||||
SWHDefaultConfig, EDIT_SE_IRI, EM_IRI, CONT_FILE_IRI, | SWHDefaultConfig, EDIT_SE_IRI, EM_IRI, CONT_FILE_IRI, | ||||
ARCHIVE_KEY, METADATA_KEY, RAW_METADATA_KEY, STATE_IRI, | ARCHIVE_KEY, METADATA_KEY, RAW_METADATA_KEY, STATE_IRI, | ||||
DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_PARTIAL, | DEPOSIT_STATUS_DEPOSITED, DEPOSIT_STATUS_PARTIAL, | ||||
DEPOSIT_STATUS_VERIFIED, PRIVATE_CHECK_DEPOSIT, | |||||
DEPOSIT_STATUS_LOAD_SUCCESS, ARCHIVE_TYPE, METADATA_TYPE | DEPOSIT_STATUS_LOAD_SUCCESS, ARCHIVE_TYPE, METADATA_TYPE | ||||
) | ) | ||||
from ..errors import ( | from ..errors import ( | ||||
MAX_UPLOAD_SIZE_EXCEEDED, BAD_REQUEST, ERROR_CONTENT, | MAX_UPLOAD_SIZE_EXCEEDED, BAD_REQUEST, ERROR_CONTENT, | ||||
CHECKSUM_MISMATCH, make_error_dict, MEDIATION_NOT_ALLOWED, | CHECKSUM_MISMATCH, make_error_dict, MEDIATION_NOT_ALLOWED, | ||||
make_error_response_from_dict, FORBIDDEN, | make_error_response_from_dict, FORBIDDEN, | ||||
NOT_FOUND, make_error_response, METHOD_NOT_ALLOWED, | NOT_FOUND, make_error_response, METHOD_NOT_ALLOWED, | ||||
ParserError, PARSING_ERROR | ParserError, PARSING_ERROR | ||||
▲ Show 20 Lines • Show All 86 Lines • ▼ Show 20 Lines | def _compute_md5(self, filehandler): | ||||
the md5 checksum (str) | the md5 checksum (str) | ||||
""" | """ | ||||
h = hashlib.md5() | h = hashlib.md5() | ||||
for chunk in filehandler: | for chunk in filehandler: | ||||
h.update(chunk) | h.update(chunk) | ||||
return h.digest() | return h.digest() | ||||
def _deposit_put(self, deposit_id=None, in_progress=False, | def _deposit_put(self, req, deposit_id=None, in_progress=False, | ||||
external_id=None): | external_id=None): | ||||
"""Save/Update a deposit in db. | """Save/Update a deposit in db. | ||||
Args: | Args: | ||||
deposit_id (int): deposit identifier | deposit_id (int): deposit identifier | ||||
in_progress (dict): The deposit's status | in_progress (dict): The deposit's status | ||||
external_id (str): The external identifier to associate to | external_id (str): The external identifier to associate to | ||||
the deposit | the deposit | ||||
Show All 27 Lines | def _deposit_put(self, req, deposit_id=None, in_progress=False, | ||||
parent=deposit_parent) | parent=deposit_parent) | ||||
else: | else: | ||||
deposit = Deposit.objects.get(pk=deposit_id) | deposit = Deposit.objects.get(pk=deposit_id) | ||||
# update metadata | # update metadata | ||||
deposit.complete_date = complete_date | deposit.complete_date = complete_date | ||||
deposit.status = status_type | deposit.status = status_type | ||||
if self.config['checks']: | |||||
deposit.save() # needed to have a deposit id | |||||
args = [deposit.collection.name, deposit.id] | |||||
scheduler = self.scheduler | |||||
if (deposit.status == DEPOSIT_STATUS_DEPOSITED and | |||||
not deposit.check_task_id): | |||||
ardumont: I just realized something about absolute and relative urls.
Do you have a sample of the url… | |||||
Done Inline ActionsOk so absolute URLs are indeed URLs starting with http(s):// scheme. I don't really see why it should break worker job (dev, staging, prod). FTR I have this working in docker dev on my machine. In the following diffs I also have code to be able to test the interaction with the scheduler, which should also improve a bit the reliability of the stack. douardda: Ok so absolute URLs are indeed URLs starting with http(s):// scheme.
I don't really see why it… | |||||
Not Done Inline Actions
well, we already agreed some time ago it should not. In the current code of the deposit checker, it will break. (This is no longer true on the deposit loader so \m/, checked the scheduler db, see below) The deposit client (used by the checker) expects from its settings the absolute urls of the deposit server it's supposed to connect to. It then expects a relative url to fill in the blank to answer correctly. See some swh-scheduler' tasks sample (prod): softwareheritage-scheduler=> select arguments from task where type='check-deposit' limit 10; arguments ------------------------------------------------------------------------------ {"args": [], "kwargs": {"deposit_check_url": "/1/private/other-client/352/check/"}} {"args": [], "kwargs": {"deposit_check_url": "/1/private/hal/351/check/"}}
yes, server side, eveything was tested but that part indeed iirc. ardumont: > I don't really see why it should break worker job (dev, staging, prod).
well, we already… | |||||
Not Done Inline Actions
Clarifying... We should use absolute urls. That's all i'm trying to convey. This needs to include the necessary fix on the deposit client used by the checker. [1] and in the end, there will be a slight impact there as well, still related to relative url... (it does the same stuff to speak with the private api part to update the deposit's status, i can take care of this i think, just tell me if you want me to ;). ardumont: >> Ok so absolute URLs are indeed URLs starting with http(s):// scheme.
> well, we already… | |||||
Not Done Inline Actions
never mind that part, i think it's fine as is. ardumont: > [1] and in the end, there will be a slight impact there as well, still related to relative… | |||||
check_url = req.build_absolute_uri( | |||||
reverse(PRIVATE_CHECK_DEPOSIT, args=args)) | |||||
task = create_oneshot_task_dict( | |||||
'check-deposit', deposit_check_url=check_url) | |||||
check_task_id = scheduler.create_tasks([task])[0]['id'] | |||||
deposit.check_task_id = check_task_id | |||||
elif (deposit.status == DEPOSIT_STATUS_VERIFIED and | |||||
not deposit.load_task_id): | |||||
url = origin_url_from(deposit) | |||||
task = create_oneshot_task_dict( | |||||
'load-deposit', url=url, deposit_id=deposit.id) | |||||
load_task_id = scheduler.create_task([task])[0]['id'] | |||||
deposit.load_task_id = load_task_id | |||||
deposit.save() | deposit.save() | ||||
return deposit | return deposit | ||||
def _deposit_request_put(self, deposit, deposit_request_data, | def _deposit_request_put(self, deposit, deposit_request_data, | ||||
replace_metadata=False, replace_archives=False): | replace_metadata=False, replace_archives=False): | ||||
"""Save a deposit request with metadata attached to a deposit. | """Save a deposit request with metadata attached to a deposit. | ||||
▲ Show 20 Lines • Show All 201 Lines • ▼ Show 20 Lines | def _binary_upload(self, req, headers, collection_name, deposit_id=None, | ||||
if precondition_status_response: | if precondition_status_response: | ||||
return precondition_status_response | return precondition_status_response | ||||
external_id = headers['slug'] | external_id = headers['slug'] | ||||
# actual storage of data | # actual storage of data | ||||
archive_metadata = filehandler | archive_metadata = filehandler | ||||
deposit = self._deposit_put(deposit_id=deposit_id, | deposit = self._deposit_put(req, deposit_id=deposit_id, | ||||
in_progress=headers['in-progress'], | in_progress=headers['in-progress'], | ||||
external_id=external_id) | external_id=external_id) | ||||
self._deposit_request_put( | self._deposit_request_put( | ||||
deposit, {ARCHIVE_KEY: archive_metadata}, | deposit, {ARCHIVE_KEY: archive_metadata}, | ||||
replace_metadata=replace_metadata, | replace_metadata=replace_metadata, | ||||
replace_archives=replace_archives) | replace_archives=replace_archives) | ||||
return { | return { | ||||
▲ Show 20 Lines • Show All 104 Lines • ▼ Show 20 Lines | def _multipart_upload(self, req, headers, collection_name, | ||||
except ParserError: | except ParserError: | ||||
return make_error_dict( | return make_error_dict( | ||||
PARSING_ERROR, | PARSING_ERROR, | ||||
'Malformed xml metadata', | 'Malformed xml metadata', | ||||
"The xml received is malformed. " | "The xml received is malformed. " | ||||
"Please ensure your metadata file is correctly formatted.") | "Please ensure your metadata file is correctly formatted.") | ||||
# actual storage of data | # actual storage of data | ||||
deposit = self._deposit_put(deposit_id=deposit_id, | deposit = self._deposit_put(req, deposit_id=deposit_id, | ||||
in_progress=headers['in-progress'], | in_progress=headers['in-progress'], | ||||
external_id=external_id) | external_id=external_id) | ||||
deposit_request_data = { | deposit_request_data = { | ||||
ARCHIVE_KEY: filehandler, | ARCHIVE_KEY: filehandler, | ||||
METADATA_KEY: metadata, | METADATA_KEY: metadata, | ||||
RAW_METADATA_KEY: raw_metadata, | RAW_METADATA_KEY: raw_metadata, | ||||
} | } | ||||
self._deposit_request_put( | self._deposit_request_put( | ||||
▲ Show 20 Lines • Show All 55 Lines • ▼ Show 20 Lines | def _atom_entry(self, req, headers, collection_name, | ||||
return make_error_dict( | return make_error_dict( | ||||
BAD_REQUEST, | BAD_REQUEST, | ||||
'Empty body request is not supported', | 'Empty body request is not supported', | ||||
'Atom entry deposit is supposed to send for metadata. ' | 'Atom entry deposit is supposed to send for metadata. ' | ||||
'If the body is empty, there is no metadata.') | 'If the body is empty, there is no metadata.') | ||||
external_id = metadata.get('external_identifier', headers['slug']) | external_id = metadata.get('external_identifier', headers['slug']) | ||||
deposit = self._deposit_put(deposit_id=deposit_id, | deposit = self._deposit_put(req, deposit_id=deposit_id, | ||||
in_progress=headers['in-progress'], | in_progress=headers['in-progress'], | ||||
external_id=external_id) | external_id=external_id) | ||||
self._deposit_request_put( | self._deposit_request_put( | ||||
deposit, {METADATA_KEY: metadata, RAW_METADATA_KEY: raw_metadata}, | deposit, {METADATA_KEY: metadata, RAW_METADATA_KEY: raw_metadata}, | ||||
replace_metadata, replace_archives) | replace_metadata, replace_archives) | ||||
return { | return { | ||||
▲ Show 20 Lines • Show All 296 Lines • Show Last 20 Lines |
I just realized something about absolute and relative urls.
Do you have a sample of the url before and after?
Currently, relative url are computed and passed to worker as:.
'/1/private/<collection>/<deposit-id>/check'
The worker is then by its configuration completing that part when doing its job.
Thus, if by absolute, you mean something like:
That will currently work in jenkins.
But that won't work in real life for the workers though (dev, staging, prod).
That's some caveat we spoke about some time ago.