Changeset View
Changeset View
Standalone View
Standalone View
swh/web/common/origin_save.py
Show First 20 Lines • Show All 219 Lines • ▼ Show 20 Lines | def origin_exists(origin_url: str) -> OriginExistenceCheckInfo: | ||||
return OriginExistenceCheckInfo( | return OriginExistenceCheckInfo( | ||||
origin_url=origin_url, | origin_url=origin_url, | ||||
exists=exists, | exists=exists, | ||||
last_modified=last_modified, | last_modified=last_modified, | ||||
content_length=content_length, | content_length=content_length, | ||||
) | ) | ||||
def _check_origin_exists(origin_url: Optional[str]) -> OriginExistenceCheckInfo: | def _check_origin_exists(url: str) -> OriginExistenceCheckInfo: | ||||
"""Ensure the origin exists, if not raise an explicit message.""" | """Ensure an URL exists, if not raise an explicit message.""" | ||||
if not origin_url: | metadata = origin_exists(url) | ||||
raise BadInputExc("The origin url provided must be set!") | |||||
metadata = origin_exists(origin_url) | |||||
if not metadata["exists"]: | if not metadata["exists"]: | ||||
raise BadInputExc( | raise BadInputExc(f"The provided url ({escape(url)}) does not exist!") | ||||
f"The provided origin url ({escape(origin_url)}) does not exist!" | |||||
) | |||||
return metadata | return metadata | ||||
def _get_visit_info_for_save_request( | def _get_visit_info_for_save_request( | ||||
save_request: SaveOriginRequest, | save_request: SaveOriginRequest, | ||||
) -> Tuple[Optional[datetime], Optional[str]]: | ) -> Tuple[Optional[datetime], Optional[str]]: | ||||
"""Retrieve visit information out of a save request | """Retrieve visit information out of a save request | ||||
▲ Show 20 Lines • Show All 179 Lines • ▼ Show 20 Lines | Returns: | ||||
**not created**, **not yet scheduled**, **scheduled**, | **not created**, **not yet scheduled**, **scheduled**, | ||||
**succeed** or **failed** | **succeed** or **failed** | ||||
""" | """ | ||||
visit_type_tasks = get_savable_visit_types_dict(privileged_user) | visit_type_tasks = get_savable_visit_types_dict(privileged_user) | ||||
_check_visit_type_savable(visit_type, privileged_user) | _check_visit_type_savable(visit_type, privileged_user) | ||||
_check_origin_url_valid(origin_url) | _check_origin_url_valid(origin_url) | ||||
artifact_url = kwargs.get("artifact_url") | |||||
if visit_type == "archives": | |||||
metadata = _check_origin_exists(artifact_url) | |||||
# if all checks passed so far, we can try and save the origin | # if all checks passed so far, we can try and save the origin | ||||
save_request_status = can_save_origin(origin_url, privileged_user) | save_request_status = can_save_origin(origin_url, privileged_user) | ||||
task = None | task = None | ||||
# if the origin save request is accepted, create a scheduler | # if the origin save request is accepted, create a scheduler | ||||
# task to load it into the archive | # task to load it into the archive | ||||
if save_request_status == SAVE_REQUEST_ACCEPTED: | if save_request_status == SAVE_REQUEST_ACCEPTED: | ||||
# create a task with high priority | # create a task with high priority | ||||
task_kwargs: Dict[str, Any] = { | task_kwargs: Dict[str, Any] = { | ||||
"priority": "high", | "priority": "high", | ||||
"url": origin_url, | "url": origin_url, | ||||
} | } | ||||
if visit_type == "archives": | if visit_type == "archives": | ||||
# extra arguments for that type are required | # extra arguments for that type are required | ||||
assert metadata is not None | archives_data = kwargs.get("archives_data", []) | ||||
task_kwargs = dict( | if not archives_data: | ||||
**task_kwargs, | raise BadInputExc( | ||||
artifacts=[ | "Artifacts data are missing for the archives visit type." | ||||
) | |||||
artifacts = [] | |||||
for artifact in archives_data: | |||||
artifact_url = artifact.get("artifact_url") | |||||
artifact_version = artifact.get("artifact_version") | |||||
if not artifact_url or not artifact_version: | |||||
raise BadInputExc("Missing url or version for an artifact to load.") | |||||
metadata = _check_origin_exists(artifact_url) | |||||
artifacts.append( | |||||
{ | { | ||||
"url": artifact_url, | "url": artifact_url, | ||||
"version": kwargs["artifact_version"], | "version": artifact_version, | ||||
"time": metadata["last_modified"], | "time": metadata["last_modified"], | ||||
"length": metadata["content_length"], | "length": metadata["content_length"], | ||||
} | } | ||||
], | |||||
) | ) | ||||
task_kwargs = dict(**task_kwargs, artifacts=artifacts, snapshot_append=True) | |||||
sor = None | sor = None | ||||
# get list of previously sumitted save requests | # get list of previously sumitted save requests | ||||
current_sors = list( | current_sors = list( | ||||
SaveOriginRequest.objects.filter( | SaveOriginRequest.objects.filter( | ||||
visit_type=visit_type, origin_url=origin_url | visit_type=visit_type, origin_url=origin_url | ||||
) | ) | ||||
) | ) | ||||
▲ Show 20 Lines • Show All 421 Lines • Show Last 20 Lines |