diff --git a/swh/core/pytest_plugin.py b/swh/core/pytest_plugin.py --- a/swh/core/pytest_plugin.py +++ b/swh/core/pytest_plugin.py @@ -11,7 +11,7 @@ from functools import partial from os import path from typing import Dict, List, Optional -from urllib.parse import urlparse +from urllib.parse import urlparse, unquote from requests.adapters import BaseAdapter from requests.structures import CaseInsensitiveDict @@ -26,10 +26,12 @@ MAX_VISIT_FILES = 10 -def get_response_cb(request, context, datadir, - ignore_urls: List[str] = [], - visits: Optional[Dict] = None): - """Mount point callback to fetch on disk the request's content. +def get_response_cb( + request: requests.Request, context, datadir, + ignore_urls: List[str] = [], + visits: Optional[Dict] = None): + """Mount point callback to fetch on disk the request's content. The request + urls provided are url decoded first to resolve the associated file on disk. This is meant to be used as 'body' argument of the requests_mock.get() method. @@ -65,9 +67,10 @@ datadir/http_nowhere.com/path_to_resource,a=b,c=d Args: - request (requests.Request): Object requests + request: Object requests context (requests.Context): Object holding response metadata information (status_code, headers, etc...) + datadir: Data files path ignore_urls: urls whose status response should be 404 even if the local file exists visits: Dict of url, number of visits. If None, disable multi visit @@ -80,10 +83,11 @@ logger.debug('get_response_cb(%s, %s)', request, context) logger.debug('url: %s', request.url) logger.debug('ignore_urls: %s', ignore_urls) - if request.url in ignore_urls: + unquoted_url = unquote(request.url) + if unquoted_url in ignore_urls: context.status_code = 404 return None - url = urlparse(request.url) + url = urlparse(unquoted_url) # http://pypi.org ~> http_pypi.org # https://files.pythonhosted.org ~> https_files.pythonhosted.org dirname = '%s_%s' % (url.scheme, url.hostname) diff --git a/swh/core/tests/data/https_forge.s.o/api_diffusion,attachments[uris]=1 b/swh/core/tests/data/https_forge.s.o/api_diffusion,attachments[uris]=1 new file mode 100644 --- /dev/null +++ b/swh/core/tests/data/https_forge.s.o/api_diffusion,attachments[uris]=1 @@ -0,0 +1 @@ +"something" diff --git a/swh/core/tests/data/https_www.reference.com/web,q=What+Is+an+Example+of+a+URL?,qo=contentPageRelatedSearch,o=600605,l=dir,sga=1 b/swh/core/tests/data/https_www.reference.com/web,q=What+Is+an+Example+of+a+URL?,qo=contentPageRelatedSearch,o=600605,l=dir,sga=1 new file mode 100644 --- /dev/null +++ b/swh/core/tests/data/https_www.reference.com/web,q=What+Is+an+Example+of+a+URL?,qo=contentPageRelatedSearch,o=600605,l=dir,sga=1 @@ -0,0 +1 @@ +"something else" diff --git a/swh/core/tests/test_pytest_plugin.py b/swh/core/tests/test_pytest_plugin.py --- a/swh/core/tests/test_pytest_plugin.py +++ b/swh/core/tests/test_pytest_plugin.py @@ -6,10 +6,25 @@ import requests from os import path +from urllib.parse import unquote from swh.core.pytest_plugin import requests_mock_datadir_factory +def test_get_response_cb_with_encoded_url(requests_mock_datadir): + # The following urls (quoted, unquoted) will be resolved as the same file + for encoded_url, expected_response in [ + ('https://forge.s.o/api/diffusion?attachments%5Buris%5D=1', + "something"), + ('https://www.reference.com/web?q=What+Is+an+Example+of+a+URL?&qo=contentPageRelatedSearch&o=600605&l=dir&sga=1', # noqa + "something else"), + ]: + for url in [encoded_url, unquote(encoded_url)]: + response = requests.get(url) + assert response.ok + assert response.json() == expected_response + + def test_get_response_cb_with_visits_nominal(requests_mock_datadir_visits): response = requests.get('https://example.com/file.json') assert response.ok