diff --git a/swh/web/api/views/origin.py b/swh/web/api/views/origin.py --- a/swh/web/api/views/origin.py +++ b/swh/web/api/views/origin.py @@ -21,6 +21,7 @@ from swh.web.common import archive from swh.web.common.exc import BadInputExc from swh.web.common.origin_visits import get_origin_visits +from swh.web.common.typing import OriginInfo from swh.web.common.utils import origin_visit_types, reverse DOC_RETURN_ORIGIN = """ @@ -143,6 +144,7 @@ return api_lookup( archive.lookup_origin, ori_dict, + lookup_similar_urls=False, notfound_msg=error_msg, enrich_fn=enrich_origin, request=request, @@ -326,7 +328,7 @@ """ result = {} - origin_query = {"url": origin_url} + origin_query = OriginInfo(url=origin_url) notfound_msg = "No origin {} found".format(origin_url) url_args_next = {"origin_url": origin_url} per_page = int(request.query_params.get("per_page", "10")) @@ -334,7 +336,7 @@ last_visit = int(last_visit_str) if last_visit_str else None def _lookup_origin_visits(origin_query, last_visit=last_visit, per_page=per_page): - all_visits = get_origin_visits(origin_query) + all_visits = get_origin_visits(origin_query, lookup_similar_urls=False) all_visits.reverse() visits = [] if not last_visit: @@ -412,11 +414,13 @@ :swh_web_api:`origin/https://github.com/hylang/hy/visit/latest/` """ + require_snapshot = request.query_params.get("require_snapshot", "false") return api_lookup( archive.lookup_origin_visit_latest, origin_url, bool(strtobool(require_snapshot)), + lookup_similar_urls=False, notfound_msg=("No visit for origin {} found".format(origin_url)), enrich_fn=partial( enrich_origin_visit, with_origin_link=True, with_origin_visit_link=False @@ -453,10 +457,12 @@ :swh_web_api:`origin/https://github.com/hylang/hy/visit/1/` """ + return api_lookup( archive.lookup_origin_visit, origin_url, int(visit_id), + lookup_similar_urls=False, notfound_msg=("No visit {} for origin {} found".format(visit_id, origin_url)), enrich_fn=partial( enrich_origin_visit, with_origin_link=True, with_origin_visit_link=False diff --git a/swh/web/common/archive.py b/swh/web/common/archive.py --- a/swh/web/common/archive.py +++ b/swh/web/common/archive.py @@ -232,18 +232,20 @@ return converters.from_swh(lic, hashess={"id"}) -def lookup_origin(origin: OriginInfo) -> OriginInfo: +def lookup_origin(origin: OriginInfo, lookup_similar_urls: bool = True) -> OriginInfo: """Return information about the origin matching dict origin. Args: origin: origin's dict with 'url' key + lookup_similar_urls: if :const:`True`, lookup origin with and + without trailing slash in its URL Returns: origin information as dict. """ origin_urls = [origin["url"]] - if origin["url"]: + if origin["url"] and lookup_similar_urls: # handle case when user provided an origin url with a trailing # slash while the url in storage does not have it (e.g. GitHub) if origin["url"].endswith("/"): @@ -977,9 +979,9 @@ """Yields the origin origins' visits. Args: - origin_url (str): origin to list visits for - last_visit (int): last visit to lookup from - limit (int): Number of elements max to display + origin_url: origin to list visits for + last_visit: last visit to lookup from + limit: Number of elements max to display Yields: OriginVisit for that origin @@ -1018,6 +1020,7 @@ require_snapshot: bool = False, type: Optional[str] = None, allowed_statuses: Optional[List[str]] = None, + lookup_similar_urls: bool = True, ) -> Optional[OriginVisitInfo]: """Return the origin's latest visit @@ -1030,12 +1033,19 @@ ``allowed_statuses=['full']`` will only consider visits that have successfully run to completion. require_snapshot: filter out origins without a snapshot + lookup_similar_urls: if :const:`True`, lookup origin with and + without trailing slash in its URL Returns: The origin visit info as dict if found """ + # check origin existence in the archive + origin_url = lookup_origin( + OriginInfo(url=origin_url), lookup_similar_urls=lookup_similar_urls + )["url"] + visit_status = origin_get_latest_visit_status( storage, origin_url, @@ -1048,12 +1058,18 @@ ) -def lookup_origin_visit(origin_url: str, visit_id: int) -> OriginVisitInfo: +def lookup_origin_visit( + origin_url: str, + visit_id: int, + lookup_similar_urls: bool = True, +) -> OriginVisitInfo: """Return information about visit visit_id with origin origin. Args: origin: origin concerned by the visit visit_id: the visit identifier to lookup + lookup_similar_urls: if :const:`True`, lookup origin with and + without trailing slash in its URL Raises: NotFoundExc if no origin visit matching the criteria is found @@ -1062,6 +1078,11 @@ The dict origin_visit concerned """ + # check origin existence in the archive + origin_url = lookup_origin( + OriginInfo(url=origin_url), lookup_similar_urls=lookup_similar_urls + )["url"] + visit = storage.origin_visit_get_by(origin_url, visit_id) visit_status = storage.origin_visit_status_get_latest(origin_url, visit_id) if not visit: diff --git a/swh/web/common/origin_visits.py b/swh/web/common/origin_visits.py --- a/swh/web/common/origin_visits.py +++ b/swh/web/common/origin_visits.py @@ -13,7 +13,9 @@ from swh.web.common.utils import parse_iso8601_date_to_utc -def get_origin_visits(origin_info: OriginInfo) -> List[OriginVisitInfo]: +def get_origin_visits( + origin_info: OriginInfo, lookup_similar_urls: bool = True +) -> List[OriginVisitInfo]: """Function that returns the list of visits for a swh origin. That list is put in cache in order to speedup the navigation in the swh web browse ui. @@ -23,6 +25,8 @@ Args: origin_info: dict describing the origin to fetch visits from + lookup_similar_urls: if :const:`True`, lookup origin with and + without trailing slash in its URL Returns: A list of dict describing the origin visits @@ -33,10 +37,9 @@ from swh.web.common import archive - if "url" in origin_info: - origin_url = origin_info["url"] - else: - origin_url = archive.lookup_origin(origin_info)["url"] + origin_url = archive.lookup_origin( + origin_info, lookup_similar_urls=lookup_similar_urls + )["url"] cache_entry_id = "origin_visits_%s" % origin_url cache_entry = cache.get(cache_entry_id) diff --git a/swh/web/tests/api/views/test_origin.py b/swh/web/tests/api/views/test_origin.py --- a/swh/web/tests/api/views/test_origin.py +++ b/swh/web/tests/api/views/test_origin.py @@ -32,24 +32,26 @@ from swh.web.tests.utils import check_api_get_responses -def test_api_lookup_origin_visits_raise_error(api_client, mocker): +def test_api_lookup_origin_visits_raise_error(api_client, origin, mocker): mock_get_origin_visits = mocker.patch("swh.web.api.views.origin.get_origin_visits") err_msg = "voluntary error to check the bad request middleware." mock_get_origin_visits.side_effect = BadInputExc(err_msg) - url = reverse("api-1-origin-visits", url_args={"origin_url": "http://foo"}) + url = reverse("api-1-origin-visits", url_args={"origin_url": origin["url"]}) rv = check_api_get_responses(api_client, url, status_code=400) assert rv.data == {"exception": "BadInputExc", "reason": err_msg} -def test_api_lookup_origin_visits_raise_swh_storage_error_db(api_client, mocker): +def test_api_lookup_origin_visits_raise_swh_storage_error_db( + api_client, origin, mocker +): mock_get_origin_visits = mocker.patch("swh.web.api.views.origin.get_origin_visits") err_msg = "Storage exploded! Will be back online shortly!" mock_get_origin_visits.side_effect = StorageDBError(err_msg) - url = reverse("api-1-origin-visits", url_args={"origin_url": "http://foo"}) + url = reverse("api-1-origin-visits", url_args={"origin_url": origin["url"]}) rv = check_api_get_responses(api_client, url, status_code=503) assert rv.data == { "exception": "StorageDBError", @@ -57,13 +59,15 @@ } -def test_api_lookup_origin_visits_raise_swh_storage_error_api(api_client, mocker): +def test_api_lookup_origin_visits_raise_swh_storage_error_api( + api_client, origin, mocker +): mock_get_origin_visits = mocker.patch("swh.web.api.views.origin.get_origin_visits") err_msg = "Storage API dropped dead! Will resurrect asap!" mock_get_origin_visits.side_effect = StorageAPIError(err_msg) - url = reverse("api-1-origin-visits", url_args={"origin_url": "http://foo"}) + url = reverse("api-1-origin-visits", url_args={"origin_url": origin["url"]}) rv = check_api_get_responses(api_client, url, status_code=503) assert rv.data == { "exception": "StorageAPIError", @@ -837,3 +841,27 @@ assert {o["url"] for o in rv.data} == { o["url"] for o in archived_origins if o["type"] == visit_type } + + +@pytest.mark.parametrize( + "view_name, extra_args", + [ + ("api-1-origin", {}), + ("api-1-origin-visits", {}), + ("api-1-origin-visit", {"visit_id": 1}), + ("api-1-origin-visit-latest", {}), + ("api-origin-intrinsic-metadata", {}), + ], +) +def test_api_origin_by_url_with_extra_trailing_slash( + api_client, origin, view_name, extra_args +): + origin_url = origin["url"] + assert not origin_url.endswith("/") + origin_url = origin_url + "/" + url = reverse(view_name, url_args={"origin_url": origin_url, **extra_args}) + rv = check_api_get_responses(api_client, url, status_code=404) + assert rv.data == { + "exception": "NotFoundExc", + "reason": f"Origin with url {origin_url} not found!", + }