Changeset View
Changeset View
Standalone View
Standalone View
swh/web/common/origin_save.py
Show First 20 Lines • Show All 66 Lines • ▼ Show 20 Lines | def get_origin_save_unauthorized_urls() -> List[str]: | ||||
loaded into the archive (blacklist). | loaded into the archive (blacklist). | ||||
Returns: | Returns: | ||||
list: the list of unauthorized origin url prefix | list: the list of unauthorized origin url prefix | ||||
""" | """ | ||||
return [origin.url for origin in SaveUnauthorizedOrigin.objects.all()] | return [origin.url for origin in SaveUnauthorizedOrigin.objects.all()] | ||||
def can_save_origin(origin_url: str) -> str: | def can_save_origin(origin_url: str, bypass_pending_review: bool = False) -> str: | ||||
""" | """ | ||||
Check if a software origin can be saved into the archive. | Check if a software origin can be saved into the archive. | ||||
Based on the origin url, the save request will be either: | Based on the origin url, the save request will be either: | ||||
* immediately accepted if the url is whitelisted | * immediately accepted if the url is whitelisted | ||||
* rejected if the url is blacklisted | * rejected if the url is blacklisted | ||||
* put in pending state for manual review otherwise | * put in pending state for manual review otherwise | ||||
Show All 10 Lines | for url_prefix in get_origin_save_unauthorized_urls(): | ||||
if origin_url.startswith(url_prefix): | if origin_url.startswith(url_prefix): | ||||
return SAVE_REQUEST_REJECTED | return SAVE_REQUEST_REJECTED | ||||
# if the origin url is in the white list, it can be immediately saved | # if the origin url is in the white list, it can be immediately saved | ||||
for url_prefix in get_origin_save_authorized_urls(): | for url_prefix in get_origin_save_authorized_urls(): | ||||
if origin_url.startswith(url_prefix): | if origin_url.startswith(url_prefix): | ||||
return SAVE_REQUEST_ACCEPTED | return SAVE_REQUEST_ACCEPTED | ||||
# otherwise, the origin url needs to be manually verified | # otherwise, the origin url needs to be manually verified if the user | ||||
# that submitted it does not have special permission | |||||
if bypass_pending_review: | |||||
# mark the origin URL as trusted in that case | |||||
SaveAuthorizedOrigin.objects.get_or_create(url=origin_url) | |||||
return SAVE_REQUEST_ACCEPTED | |||||
else: | |||||
return SAVE_REQUEST_PENDING | return SAVE_REQUEST_PENDING | ||||
# map visit type to scheduler task | # map visit type to scheduler task | ||||
# TODO: do not hardcode the task name here (T1157) | # TODO: do not hardcode the task name here (T1157) | ||||
_visit_type_task = {"git": "load-git", "hg": "load-hg", "svn": "load-svn"} | _visit_type_task = {"git": "load-git", "hg": "load-hg", "svn": "load-svn"} | ||||
# map scheduler task status to origin save status | # map scheduler task status to origin save status | ||||
▲ Show 20 Lines • Show All 215 Lines • ▼ Show 20 Lines | ) -> SaveOriginRequestInfo: | ||||
if must_save: | if must_save: | ||||
save_request.save() | save_request.save() | ||||
return save_request.to_dict() | return save_request.to_dict() | ||||
def create_save_origin_request( | def create_save_origin_request( | ||||
visit_type: str, origin_url: str | visit_type: str, origin_url: str, bypass_pending_review: bool = False | ||||
) -> SaveOriginRequestInfo: | ) -> SaveOriginRequestInfo: | ||||
""" | """ | ||||
Create a loading task to save a software origin into the archive. | Create a loading task to save a software origin into the archive. | ||||
This function aims to create a software origin loading task | This function aims to create a software origin loading task | ||||
trough the use of the swh-scheduler component. | trough the use of the swh-scheduler component. | ||||
First, some checks are performed to see if the visit type and origin | First, some checks are performed to see if the visit type and origin | ||||
Show All 25 Lines | Returns: | ||||
**succeed** or **failed** | **succeed** or **failed** | ||||
""" | """ | ||||
_check_visit_type_savable(visit_type) | _check_visit_type_savable(visit_type) | ||||
_check_origin_url_valid(origin_url) | _check_origin_url_valid(origin_url) | ||||
_check_origin_exists(origin_url) | _check_origin_exists(origin_url) | ||||
# if all checks passed so far, we can try and save the origin | # if all checks passed so far, we can try and save the origin | ||||
save_request_status = can_save_origin(origin_url) | save_request_status = can_save_origin(origin_url, bypass_pending_review) | ||||
task = None | task = None | ||||
# if the origin save request is accepted, create a scheduler | # if the origin save request is accepted, create a scheduler | ||||
# task to load it into the archive | # task to load it into the archive | ||||
if save_request_status == SAVE_REQUEST_ACCEPTED: | if save_request_status == SAVE_REQUEST_ACCEPTED: | ||||
# create a task with high priority | # create a task with high priority | ||||
kwargs = { | kwargs = { | ||||
"priority": "high", | "priority": "high", | ||||
▲ Show 20 Lines • Show All 413 Lines • Show Last 20 Lines |