Changeset View
Changeset View
Standalone View
Standalone View
swh/web/common/origin_save.py
Show First 20 Lines • Show All 107 Lines • ▼ Show 20 Lines | def can_save_origin(origin_url: str, bypass_pending_review: bool = False) -> str: | ||||
else: | else: | ||||
return SAVE_REQUEST_PENDING | return SAVE_REQUEST_PENDING | ||||
# map visit type to scheduler task | # map visit type to scheduler task | ||||
# TODO: do not hardcode the task name here (T1157) | # TODO: do not hardcode the task name here (T1157) | ||||
_visit_type_task = {"git": "load-git", "hg": "load-hg", "svn": "load-svn"} | _visit_type_task = {"git": "load-git", "hg": "load-hg", "svn": "load-svn"} | ||||
_visit_type_task_privileged = { | |||||
"bundle": "load-archive-files", | |||||
} | |||||
# map scheduler task status to origin save status | # map scheduler task status to origin save status | ||||
_save_task_status = { | _save_task_status = { | ||||
"next_run_not_scheduled": SAVE_TASK_NOT_YET_SCHEDULED, | "next_run_not_scheduled": SAVE_TASK_NOT_YET_SCHEDULED, | ||||
"next_run_scheduled": SAVE_TASK_SCHEDULED, | "next_run_scheduled": SAVE_TASK_SCHEDULED, | ||||
"completed": SAVE_TASK_SUCCEEDED, | "completed": SAVE_TASK_SUCCEEDED, | ||||
"disabled": SAVE_TASK_FAILED, | "disabled": SAVE_TASK_FAILED, | ||||
} | } | ||||
# map scheduler task_run status to origin save status | # map scheduler task_run status to origin save status | ||||
_save_task_run_status = { | _save_task_run_status = { | ||||
"scheduled": SAVE_TASK_SCHEDULED, | "scheduled": SAVE_TASK_SCHEDULED, | ||||
"started": SAVE_TASK_RUNNING, | "started": SAVE_TASK_RUNNING, | ||||
"eventful": SAVE_TASK_SUCCEEDED, | "eventful": SAVE_TASK_SUCCEEDED, | ||||
"uneventful": SAVE_TASK_SUCCEEDED, | "uneventful": SAVE_TASK_SUCCEEDED, | ||||
"failed": SAVE_TASK_FAILED, | "failed": SAVE_TASK_FAILED, | ||||
"permfailed": SAVE_TASK_FAILED, | "permfailed": SAVE_TASK_FAILED, | ||||
"lost": SAVE_TASK_FAILED, | "lost": SAVE_TASK_FAILED, | ||||
} | } | ||||
def get_savable_visit_types() -> List[str]: | def get_savable_visit_types(privileged_user: bool = False) -> List[str]: | ||||
anlambert: I would name the parameter `privileged_user`, this is clearer. | |||||
""" | """Get the list of visit types that can be performed through a save request. | ||||
Get the list of visit types that can be performed | |||||
through a save request. | Args: | ||||
privileged_user: Flag to determine if all visit types should be returned or not. | |||||
Default to False to only list unprivileged visit types. | |||||
Returns: | Returns: | ||||
list: the list of saveable visit types | the list of saveable visit types | ||||
""" | """ | ||||
return sorted(list(_visit_type_task.keys())) | task_types = list(_visit_type_task.keys()) | ||||
if privileged_user: | |||||
Done Inline Actionsthe second call to list copy is not needed anlambert: the second call to list copy is not needed | |||||
Done Inline Actionsyou can drop the call to list() here anlambert: you can drop the call to list() here | |||||
task_types += _visit_type_task_privileged.keys() | |||||
return sorted(task_types) | |||||
def _check_visit_type_savable(visit_type: str) -> None: | def _check_visit_type_savable(visit_type: str, privileged_user: bool = False) -> None: | ||||
allowed_visit_types = ", ".join(get_savable_visit_types()) | visit_type_tasks = get_savable_visit_types(privileged_user) | ||||
if visit_type not in _visit_type_task: | if visit_type not in visit_type_tasks: | ||||
allowed_visit_types = ", ".join(visit_type_tasks) | |||||
raise BadInputExc( | raise BadInputExc( | ||||
"Visit of type %s can not be saved! " | f"Visit of type {visit_type} can not be saved! " | ||||
"Allowed types are the following: %s" % (visit_type, allowed_visit_types) | f"Allowed types are the following: {allowed_visit_types}" | ||||
) | ) | ||||
_validate_url = URLValidator(schemes=["http", "https", "svn", "git"]) | _validate_url = URLValidator(schemes=["http", "https", "svn", "git"]) | ||||
def _check_origin_url_valid(origin_url: str) -> None: | def _check_origin_url_valid(origin_url: str) -> None: | ||||
try: | try: | ||||
▲ Show 20 Lines • Show All 173 Lines • ▼ Show 20 Lines | if must_save: | ||||
save_request.save() | save_request.save() | ||||
return save_request.to_dict() | return save_request.to_dict() | ||||
def create_save_origin_request( | def create_save_origin_request( | ||||
visit_type: str, | visit_type: str, | ||||
origin_url: str, | origin_url: str, | ||||
bypass_pending_review: bool = False, | privileged_user: bool = False, | ||||
Done Inline Actionssame here anlambert: same here | |||||
user_id: Optional[int] = None, | user_id: Optional[int] = None, | ||||
) -> SaveOriginRequestInfo: | ) -> SaveOriginRequestInfo: | ||||
""" | """Create a loading task to save a software origin into the archive. | ||||
Create a loading task to save a software origin into the archive. | |||||
This function aims to create a software origin loading task | This function aims to create a software origin loading task | ||||
trough the use of the swh-scheduler component. | trough the use of the swh-scheduler component. | ||||
First, some checks are performed to see if the visit type and origin | First, some checks are performed to see if the visit type and origin | ||||
url are valid but also if the the save request can be accepted. | url are valid but also if the the save request can be accepted. | ||||
If those checks passed, the loading task is then created. | If those checks passed, the loading task is then created. | ||||
Otherwise, the save request is put in pending or rejected state. | Otherwise, the save request is put in pending or rejected state. | ||||
All the submitted save requests are logged into the swh-web | All the submitted save requests are logged into the swh-web | ||||
database to keep track of them. | database to keep track of them. | ||||
Args: | Args: | ||||
visit_type: the type of visit to perform (e.g git, hg, svn, ...) | visit_type: the type of visit to perform (e.g git, hg, svn, ...) | ||||
origin_url: the url of the origin to save | origin_url: the url of the origin to save | ||||
privileged_user: Whether the user has privileged_user access to extra | |||||
functionality (e.g. bypass save code now review, access to extra visit type) | |||||
user_id: User identifier (provided when authenticated) | |||||
Raises: | Raises: | ||||
BadInputExc: the visit type or origin url is invalid or inexistent | BadInputExc: the visit type or origin url is invalid or inexistent | ||||
ForbiddenExc: the provided origin url is blacklisted | ForbiddenExc: the provided origin url is blacklisted | ||||
Returns: | Returns: | ||||
dict: A dict describing the save request with the following keys: | dict: A dict describing the save request with the following keys: | ||||
* **visit_type**: the type of visit to perform | * **visit_type**: the type of visit to perform | ||||
* **origin_url**: the url of the origin | * **origin_url**: the url of the origin | ||||
* **save_request_date**: the date the request was submitted | * **save_request_date**: the date the request was submitted | ||||
* **save_request_status**: the request status, either **accepted**, | * **save_request_status**: the request status, either **accepted**, | ||||
**rejected** or **pending** | **rejected** or **pending** | ||||
* **save_task_status**: the origin loading task status, either | * **save_task_status**: the origin loading task status, either | ||||
**not created**, **not yet scheduled**, **scheduled**, | **not created**, **not yet scheduled**, **scheduled**, | ||||
**succeed** or **failed** | **succeed** or **failed** | ||||
""" | """ | ||||
_check_visit_type_savable(visit_type) | _check_visit_type_savable(visit_type, privileged_user) | ||||
_check_origin_url_valid(origin_url) | _check_origin_url_valid(origin_url) | ||||
# if all checks passed so far, we can try and save the origin | # if all checks passed so far, we can try and save the origin | ||||
save_request_status = can_save_origin(origin_url, bypass_pending_review) | save_request_status = can_save_origin(origin_url, privileged_user) | ||||
task = None | task = None | ||||
# if the origin save request is accepted, create a scheduler | # if the origin save request is accepted, create a scheduler | ||||
# task to load it into the archive | # task to load it into the archive | ||||
if save_request_status == SAVE_REQUEST_ACCEPTED: | if save_request_status == SAVE_REQUEST_ACCEPTED: | ||||
# create a task with high priority | # create a task with high priority | ||||
kwargs = { | kwargs = { | ||||
"priority": "high", | "priority": "high", | ||||
▲ Show 20 Lines • Show All 379 Lines • ▼ Show 20 Lines | load_task_statuses = ( | ||||
SAVE_TASK_NOT_CREATED, | SAVE_TASK_NOT_CREATED, | ||||
SAVE_TASK_NOT_YET_SCHEDULED, | SAVE_TASK_NOT_YET_SCHEDULED, | ||||
SAVE_TASK_SCHEDULED, | SAVE_TASK_SCHEDULED, | ||||
SAVE_TASK_SUCCEEDED, | SAVE_TASK_SUCCEEDED, | ||||
SAVE_TASK_FAILED, | SAVE_TASK_FAILED, | ||||
SAVE_TASK_RUNNING, | SAVE_TASK_RUNNING, | ||||
) | ) | ||||
visit_types = get_savable_visit_types() | # for metrics, we want access to all visit types | ||||
visit_types = get_savable_visit_types(privileged_user=True) | |||||
labels_set = product(request_statuses, visit_types) | labels_set = product(request_statuses, visit_types) | ||||
for labels in labels_set: | for labels in labels_set: | ||||
_submitted_save_requests_gauge.labels(*labels).set(0) | _submitted_save_requests_gauge.labels(*labels).set(0) | ||||
labels_set = product(load_task_statuses, visit_types) | labels_set = product(load_task_statuses, visit_types) | ||||
Show All 30 Lines |
I would name the parameter privileged_user, this is clearer.