diff --git a/assets/src/bundles/save/index.js b/assets/src/bundles/save/index.js --- a/assets/src/bundles/save/index.js +++ b/assets/src/bundles/save/index.js @@ -199,6 +199,9 @@ $('#swh-origin-save-request-status').html(errorAlert); } else if (statusCode === 429) { $('#swh-origin-save-request-status').html(saveRequestRateLimitedAlert); + } else if (statusCode === 400) { + const errorAlert = htmlAlert('danger', `${errorData['reason']}`); + $('#swh-origin-save-request-status').html(errorAlert); } else { $('#swh-origin-save-request-status').html(saveRequestUnknownErrorAlert); } diff --git a/cypress/integration/origin-save.spec.js b/cypress/integration/origin-save.spec.js --- a/cypress/integration/origin-save.spec.js +++ b/cypress/integration/origin-save.spec.js @@ -14,6 +14,7 @@ 'warning': 'The "save code now" request has been put in pending state and may be accepted for processing after manual review.', 'rejected': 'The "save code now" request has been rejected because the provided origin url is blacklisted.', 'rateLimit': 'The rate limit for "save code now" requests has been reached. Please try again later.', + 'not-found': 'The provided url does not exist!', 'unknownError': 'An unexpected error happened when submitting the "save code now request', 'csrfError': 'CSRF Failed: Referrer checking failed - no Referrer.' }; @@ -226,6 +227,21 @@ }); }); + it('should show error when the origin does not exist (status: 400)', function() { + stubSaveRequest({requestUrl: this.originSaveUrl, + saveRequestStatus: 'rejected', + originUrl: origin.url, + saveTaskStatus: 'not created', + responseStatus: 400, + reasonMessage: saveCodeMsg['not-found']}); + + makeOriginSaveRequest(origin.type, origin.url); + + cy.wait('@saveRequest').then(() => { + checkAlertVisible('danger', saveCodeMsg['not-found']); + }); + }); + it('should show error when unknown error occurs (status other than 200, 403, 429)', function() { stubSaveRequest({requestUrl: this.originSaveUrl, saveRequestStatus: 'Error', diff --git a/swh/web/common/origin_save.py b/swh/web/common/origin_save.py --- a/swh/web/common/origin_save.py +++ b/swh/web/common/origin_save.py @@ -38,7 +38,11 @@ SaveUnauthorizedOrigin, ) from swh.web.common.origin_visits import get_origin_visits -from swh.web.common.typing import OriginInfo, SaveOriginRequestInfo +from swh.web.common.typing import ( + OriginExistenceCheckInfo, + OriginInfo, + SaveOriginRequestInfo, +) from swh.web.common.utils import SWH_WEB_METRICS_REGISTRY, parse_iso8601_date_to_utc scheduler = config.scheduler() @@ -156,6 +160,37 @@ ) +def origin_exists(origin_url: str) -> OriginExistenceCheckInfo: + """Check the origin url for existence. If it exists, extract some more useful + information on the origin. + + """ + resp = requests.head(origin_url) + exists = resp.ok is True + content_length: Optional[int] = None + last_modified: Optional[str] = None + if exists: + size_ = resp.headers.get("Content-Length") + content_length = int(size_) if size_ else None + last_modified = resp.headers.get("Last-Modified") + + return OriginExistenceCheckInfo( + origin_url=origin_url, + exists=exists, + last_modified=last_modified, + content_length=content_length, + ) + + +def _check_origin_exists(origin_url: str) -> None: + """Ensure the origin exists, if not raise an explicit message.""" + check = origin_exists(origin_url) + if not check["exists"]: + raise BadInputExc( + f"The provided origin url ({escape(origin_url)}) does not exist!" + ) + + def _get_visit_info_for_save_request( save_request: SaveOriginRequest, ) -> Tuple[Optional[datetime], Optional[str]]: @@ -314,12 +349,11 @@ database to keep track of them. Args: - visit_type (str): the type of visit to perform (currently only - ``git`` but ``svn`` and ``hg`` will soon be available) - origin_url (str): the url of the origin to save + visit_type: the type of visit to perform (e.g git, hg, svn, ...) + origin_url: the url of the origin to save Raises: - BadInputExc: the visit type or origin url is invalid + BadInputExc: the visit type or origin url is invalid or inexistent ForbiddenExc: the provided origin url is blacklisted Returns: @@ -338,6 +372,8 @@ """ _check_visit_type_savable(visit_type) _check_origin_url_valid(origin_url) + _check_origin_exists(origin_url) + # if all checks passed so far, we can try and save the origin save_request_status = can_save_origin(origin_url) task = None diff --git a/swh/web/common/typing.py b/swh/web/common/typing.py --- a/swh/web/common/typing.py +++ b/swh/web/common/typing.py @@ -245,3 +245,14 @@ """End of the visit if terminated""" save_task_status: str """Status of the scheduled task""" + + +class OriginExistenceCheckInfo(TypedDict): + origin_url: str + """Origin to check""" + exists: bool + """Does the url exist?""" + content_length: Optional[int] + """content length of the artifact""" + last_modified: Optional[str] + """Last modification time reported by the server""" diff --git a/swh/web/tests/admin/test_origin_save.py b/swh/web/tests/admin/test_origin_save.py --- a/swh/web/tests/admin/test_origin_save.py +++ b/swh/web/tests/admin/test_origin_save.py @@ -122,6 +122,7 @@ def test_accept_pending_save_request(client, mocker): mock_scheduler = mocker.patch("swh.web.common.origin_save.scheduler") + mock_origin_exists = mocker.patch("swh.web.common.origin_save._check_origin_exists") visit_type = "git" origin_url = "https://v2.pikacode.com/bthate/botlib.git" save_request_url = reverse( @@ -130,6 +131,7 @@ ) response = check_http_post_response(client, save_request_url, status_code=200) assert response.data["save_request_status"] == SAVE_REQUEST_PENDING + assert mock_origin_exists.called accept_request_url = reverse( "admin-origin-save-request-accept", diff --git a/swh/web/tests/api/views/test_origin_save.py b/swh/web/tests/api/views/test_origin_save.py --- a/swh/web/tests/api/views/test_origin_save.py +++ b/swh/web/tests/api/views/test_origin_save.py @@ -24,6 +24,7 @@ SaveOriginRequest, SaveUnauthorizedOrigin, ) +from swh.web.common.typing import OriginExistenceCheckInfo from swh.web.common.utils import reverse from swh.web.settings.tests import save_origin_rate_post from swh.web.tests.utils import ( @@ -73,6 +74,11 @@ ): mock_scheduler = mocker.patch("swh.web.common.origin_save.scheduler") + mock_origin_exists = mocker.patch("swh.web.common.origin_save.origin_exists") + mock_origin_exists.return_value = OriginExistenceCheckInfo( + origin_url=origin_url, exists=True, last_modified=None, content_length=None + ) + if scheduler_task_status is None: mock_scheduler.get_tasks.return_value = [] else: diff --git a/swh/web/tests/common/test_origin_save.py b/swh/web/tests/common/test_origin_save.py --- a/swh/web/tests/common/test_origin_save.py +++ b/swh/web/tests/common/test_origin_save.py @@ -12,6 +12,7 @@ import requests from swh.core.pytest_plugin import get_response_cb +from swh.web.common.exc import BadInputExc from swh.web.common.models import ( SAVE_REQUEST_ACCEPTED, SAVE_TASK_FAILED, @@ -22,11 +23,17 @@ SaveOriginRequest, ) from swh.web.common.origin_save import ( + _check_origin_exists, get_save_origin_requests, get_save_origin_task_info, + origin_exists, refresh_save_origin_request_statuses, ) -from swh.web.common.typing import OriginVisitInfo, SaveOriginRequestInfo +from swh.web.common.typing import ( + OriginExistenceCheckInfo, + OriginVisitInfo, + SaveOriginRequestInfo, +) from swh.web.config import get_config _es_url = "http://esnode1.internal.softwareheritage.org:9200" @@ -312,6 +319,57 @@ ) +def test_origin_exists_404(requests_mock): + """Origin which does not exist should be reported as inexistent""" + url_ko = "https://example.org/some-inexistant-url" + requests_mock.head(url_ko, status_code=404) + + actual_result = origin_exists(url_ko) + assert actual_result == OriginExistenceCheckInfo( + origin_url=url_ko, exists=False, last_modified=None, content_length=None, + ) + + with pytest.raises(BadInputExc, match="not exist"): + _check_origin_exists(url_ko) + + +def test_origin_exists_200_no_data(requests_mock): + """Origin which does not exist should be reported as inexistent""" + url = "http://example.org/real-url" + requests_mock.head( + url, status_code=200, + ) + + actual_result = origin_exists(url) + assert actual_result == OriginExistenceCheckInfo( + origin_url=url, exists=True, last_modified=None, content_length=None, + ) + + # passes the check + _check_origin_exists(url) + + +def test_origin_exists_200_with_data(requests_mock): + """Origin which does not exist should be reported as inexistent""" + url = "http://example.org/real-url" + requests_mock.head( + url, + status_code=200, + headers={ + "content-length": "10", + "last-modified": "Sun, 21 Aug 2011 16:26:32 GMT", + }, + ) + + actual_result = origin_exists(url) + assert actual_result == OriginExistenceCheckInfo( + origin_url=url, + exists=True, + content_length=10, + last_modified="Sun, 21 Aug 2011 16:26:32 GMT", + ) + + @pytest.mark.django_db @pytest.mark.parametrize("visit_status", ["created", "ongoing",]) def test_get_save_origin_requests_no_visit_date_found(mocker, visit_status):