diff --git a/swh/web/client/client.py b/swh/web/client/client.py --- a/swh/web/client/client.py +++ b/swh/web/client/client.py @@ -553,3 +553,52 @@ r.raise_for_status() yield from r.iter_content(chunk_size=None, decode_unicode=False) + + def origin_search( + self, + query: str, + limit: Optional[int] = None, + with_visit: bool = False, + **req_args, + ) -> Iterator[Dict[str, Any]]: + """List origin search results + + Args: + query: search keywords + limit: the maximum number of found origins to return + with_visit: if true, only return origins with at least one visit + + Returns: + an iterator over search results + + Raises: + requests.HTTPError: if HTTP request fails + + """ + + params = [] + if limit is not None: + params.append(("limit", limit)) + if with_visit: + params.append(("with_visit", True)) + + done = False + nb_returned = 0 + q = f"origin/search/{query}/" + while not done: + r = self._call(q, params=params, **req_args) + json = r.json() + if limit and nb_returned + len(json) > limit: + json = json[: limit - nb_returned] + + nb_returned += len(json) + yield from json + + if limit and nb_returned == limit: + done = True + + if "next" in r.links and "url" in r.links["next"]: + params = [] + q = r.links["next"]["url"] + else: + done = True diff --git a/swh/web/client/tests/api_data.py b/swh/web/client/tests/api_data.py --- a/swh/web/client/tests/api_data.py +++ b/swh/web/client/tests/api_data.py @@ -7717,6 +7717,138 @@ "origin_visit_url": "https://archive.softwareheritage.org/api/1/origin/https://github.com/NixOS/nixpkgs/visit/30/", "snapshot_url": "https://archive.softwareheritage.org/api/1/snapshot/100de51846f317e6ab48da79d985cefa6fdefe42/" } +] + """, # NoQA: E501 # NoQA: E501 + "origin/search/foo%20bar%20baz%20qux/?with_visit=true": r""" +[ + { + "url": "https://github.com/foo-bar-baz-qux/mygithubpage", + "origin_visits_url": "https://archive.softwareheritage.org/api/1/origin/https://github.com/foo-bar-baz-qux/mygithubpage/visits/" + }, + { + "url": "https://github.com/foo-bar-baz-qux/foo-bar-baz-qux.github.io", + "origin_visits_url": "https://archive.softwareheritage.org/api/1/origin/https://github.com/foo-bar-baz-qux/foo-bar-baz-qux.github.io/visits/" + }, + { + "url": "https://github.com/tunnckoCore/foo-bar-baz-qux", + "origin_visits_url": "https://archive.softwareheritage.org/api/1/origin/https://github.com/tunnckoCore/foo-bar-baz-qux/visits/" + }, + { + "url": "https://github.com/foo-bar-baz-qux/aml-project", + "origin_visits_url": "https://archive.softwareheritage.org/api/1/origin/https://github.com/foo-bar-baz-qux/aml-project/visits/" + }, + { + "url": "https://github.com/foo-bar-baz-qux/ci_test", + "origin_visits_url": "https://archive.softwareheritage.org/api/1/origin/https://github.com/foo-bar-baz-qux/ci_test/visits/" + }, + { + "url": "https://github.com/foo-bar-baz-qux/extreme-computing", + "origin_visits_url": "https://archive.softwareheritage.org/api/1/origin/https://github.com/foo-bar-baz-qux/extreme-computing/visits/" + }, + { + "url": "https://github.com/foo-bar-baz-qux/cs205-homework", + "origin_visits_url": "https://archive.softwareheritage.org/api/1/origin/https://github.com/foo-bar-baz-qux/cs205-homework/visits/" + }, + { + "url": "https://github.com/foo-bar-baz-qux/rstan", + "origin_visits_url": "https://archive.softwareheritage.org/api/1/origin/https://github.com/foo-bar-baz-qux/rstan/visits/" + }, + { + "url": "https://github.com/foo-bar-baz-qux/cs207", + "origin_visits_url": "https://archive.softwareheritage.org/api/1/origin/https://github.com/foo-bar-baz-qux/cs207/visits/" + }, + { + "url": "https://github.com/foo-bar-baz-qux/2015lab1", + "origin_visits_url": "https://archive.softwareheritage.org/api/1/origin/https://github.com/foo-bar-baz-qux/2015lab1/visits/" + }, + { + "url": "https://github.com/foo-bar-baz-qux/stan", + "origin_visits_url": "https://archive.softwareheritage.org/api/1/origin/https://github.com/foo-bar-baz-qux/stan/visits/" + }, + { + "url": "https://github.com/foo-bar-baz-qux/atom-script", + "origin_visits_url": "https://archive.softwareheritage.org/api/1/origin/https://github.com/foo-bar-baz-qux/atom-script/visits/" + }, + { + "url": "https://github.com/foobarbazquxquux/gordon-test", + "origin_visits_url": "https://archive.softwareheritage.org/api/1/origin/https://github.com/foobarbazquxquux/gordon-test/visits/" + }, + { + "url": "https://github.com/foobarbazquxquux/helios", + "origin_visits_url": "https://archive.softwareheritage.org/api/1/origin/https://github.com/foobarbazquxquux/helios/visits/" + }, + { + "url": "https://github.com/foo-bar-baz-qux/2016", + "origin_visits_url": "https://archive.softwareheritage.org/api/1/origin/https://github.com/foo-bar-baz-qux/2016/visits/" + }, + { + "url": "https://github.com/foo-bar-baz-qux/devtools", + "origin_visits_url": "https://archive.softwareheritage.org/api/1/origin/https://github.com/foo-bar-baz-qux/devtools/visits/" + }, + { + "url": "https://github.com/foo-bar-baz-qux/dplyr", + "origin_visits_url": "https://archive.softwareheritage.org/api/1/origin/https://github.com/foo-bar-baz-qux/dplyr/visits/" + }, + { + "url": "https://github.com/foo-bar-baz-qux/paletter", + "origin_visits_url": "https://archive.softwareheritage.org/api/1/origin/https://github.com/foo-bar-baz-qux/paletter/visits/" + }, + { + "url": "https://github.com/foo-bar-baz-qux/ggplot2", + "origin_visits_url": "https://archive.softwareheritage.org/api/1/origin/https://github.com/foo-bar-baz-qux/ggplot2/visits/" + }, + { + "url": "https://github.com/foo-bar-baz-qux/stm", + "origin_visits_url": "https://archive.softwareheritage.org/api/1/origin/https://github.com/foo-bar-baz-qux/stm/visits/" + }, + { + "url": "https://github.com/foo-bar-baz-qux/concept-to-clinic", + "origin_visits_url": "https://archive.softwareheritage.org/api/1/origin/https://github.com/foo-bar-baz-qux/concept-to-clinic/visits/" + }, + { + "url": "https://github.com/foo-bar-baz-qux/scales", + "origin_visits_url": "https://archive.softwareheritage.org/api/1/origin/https://github.com/foo-bar-baz-qux/scales/visits/" + }, + { + "url": "https://www.npmjs.com/package/foo-bar-baz-qux", + "origin_visits_url": "https://archive.softwareheritage.org/api/1/origin/https://www.npmjs.com/package/foo-bar-baz-qux/visits/" + }, + { + "url": "https://bitbucket.org/foobarbazqux/rp.git", + "origin_visits_url": "https://archive.softwareheritage.org/api/1/origin/https://bitbucket.org/foobarbazqux/rp.git/visits/" + }, + { + "url": "https://github.com/foo-bar-baz-qux/knowledge-repo", + "origin_visits_url": "https://archive.softwareheritage.org/api/1/origin/https://github.com/foo-bar-baz-qux/knowledge-repo/visits/" + }, + { + "url": "https://github.com/foo-bar-baz-qux/EconML", + "origin_visits_url": "https://archive.softwareheritage.org/api/1/origin/https://github.com/foo-bar-baz-qux/EconML/visits/" + } +] + """, # NoQA: E501 # NoQA: E501 + "origin/search/python/?limit=5": r""" +[ + { + "url": "https://github.com/neon670/python.dev", + "origin_visits_url": "https://archive.softwareheritage.org/api/1/origin/https://github.com/neon670/python.dev/visits/" + }, + { + "url": "https://github.com/aur-archive/python-werkzeug", + "origin_visits_url": "https://archive.softwareheritage.org/api/1/origin/https://github.com/aur-archive/python-werkzeug/visits/" + }, + { + "url": "https://github.com/jsagon/jtradutor-web-python", + "origin_visits_url": "https://archive.softwareheritage.org/api/1/origin/https://github.com/jsagon/jtradutor-web-python/visits/" + }, + { + "url": "https://github.com/zjmwqx/ipythonCode", + "origin_visits_url": "https://archive.softwareheritage.org/api/1/origin/https://github.com/zjmwqx/ipythonCode/visits/" + }, + { + "url": "https://github.com/knutab/Python-BSM", + "origin_visits_url": "https://archive.softwareheritage.org/api/1/origin/https://github.com/knutab/Python-BSM/visits/" + } ] """, # NoQA: E501 # NoQA: E501 } diff --git a/swh/web/client/tests/gen-api-data.sh b/swh/web/client/tests/gen-api-data.sh --- a/swh/web/client/tests/gen-api-data.sh +++ b/swh/web/client/tests/gen-api-data.sh @@ -20,6 +20,8 @@ urls="${urls} snapshot/cabcc7d7bf639bbe1cc3b41989e1806618dd5764/?branches_count=1000&branches_from=refs/tags/v3.0-rc7" urls="${urls} origin/https://github.com/NixOS/nixpkgs/visits/?last_visit=50&per_page=10" urls="${urls} origin/https://github.com/NixOS/nixpkgs/visits/?last_visit=40&per_page=10" +urls="${urls} origin/search/foo%20bar%20baz%20qux/?with_visit=true" +urls="${urls} origin/search/python/?limit=5" echo "# GENERATED FILE, DO NOT EDIT." echo "# Run './gen-api-data.sh > api_data.py' instead." diff --git a/swh/web/client/tests/test_web_api_client.py b/swh/web/client/tests/test_web_api_client.py --- a/swh/web/client/tests/test_web_api_client.py +++ b/swh/web/client/tests/test_web_api_client.py @@ -150,6 +150,33 @@ assert visits[7]["snapshot"] == parse_swhid(snapshot_swhid) +def test_origin_search(web_api_client, web_api_mock): + limited_results = list(web_api_client.origin_search("python", limit=5)) + assert len(limited_results) == 5 + + results = list(web_api_client.origin_search("foo bar baz qux", with_visit=True)) + actual_urls = [r["url"] for r in results] + actual_visits = [r["origin_visits_url"] for r in results] + # Check *some* of the URLS since the search could return more results in the future + expected = [ + ( + "https://github.com/foo-bar-baz-qux/mygithubpage", + "https://archive.softwareheritage.org/api/1/origin/https://github.com/foo-bar-baz-qux/mygithubpage/visits/", # NoQA: E501 + ), + ( + "https://www.npmjs.com/package/foo-bar-baz-qux", + "https://archive.softwareheritage.org/api/1/origin/https://www.npmjs.com/package/foo-bar-baz-qux/visits/", # NoQA: E501 + ), + ( + "https://bitbucket.org/foobarbazqux/rp.git", + "https://archive.softwareheritage.org/api/1/origin/https://bitbucket.org/foobarbazqux/rp.git/visits/", # NoQA: E501 + ), + ] + for (url, visit) in expected: + assert url in actual_urls + assert visit in actual_visits + + def test_known(web_api_client, web_api_mock): # full list of SWHIDs for which we mock a {known: True} answer known_swhids = [