diff --git a/swh/web/api/views/content.py b/swh/web/api/views/content.py --- a/swh/web/api/views/content.py +++ b/swh/web/api/views/content.py @@ -16,7 +16,6 @@ from swh.web.api.views.utils import api_lookup from swh.web.common import archive from swh.web.common.exc import NotFoundExc -from swh.web.common.utils import reverse @api_route( @@ -168,22 +167,6 @@ ) -@api_route(r"/content/(?P[0-9a-z_:]*[0-9a-f]+)/ctags/", "api-1-content-ctags") -@api_doc("/content/ctags/", tags=["hidden"]) -def api_content_ctags(request: Request, q: str): - """ - Get information about all `Ctags `_-style - symbols defined in a content object. - """ - return api_lookup( - archive.lookup_content_ctags, - q, - notfound_msg="No ctags symbol found for content {}.".format(q), - enrich_fn=utils.enrich_metadata_endpoint, - request=request, - ) - - @api_route( r"/content/(?P[0-9a-z_:]*[0-9a-f]+)/raw/", "api-1-content-raw", @@ -233,53 +216,6 @@ ) -@api_route(r"/content/symbol/(?P.+)/", "api-1-content-symbol") -@api_doc("/content/symbol/", tags=["hidden"]) -def api_content_symbol(request: Request, q: str): - """Search content objects by `Ctags `_-style - symbol (e.g., function name, data type, method, ...). - - """ - result = {} - last_sha1 = request.query_params.get("last_sha1", None) - per_page = int(request.query_params.get("per_page", "10")) - - def lookup_exp(exp, last_sha1=last_sha1, per_page=per_page): - exp = list(archive.lookup_expression(exp, last_sha1, per_page)) - return exp if exp else None - - symbols = api_lookup( - lookup_exp, - q, - notfound_msg="No indexed raw content match expression '{}'.".format(q), - enrich_fn=functools.partial(utils.enrich_content, top_url=True), - request=request, - ) - - if symbols: - nb_symbols = len(symbols) - - if nb_symbols == per_page: - query_params = {} - new_last_sha1 = symbols[-1]["sha1"] - query_params["last_sha1"] = new_last_sha1 - if request.query_params.get("per_page"): - query_params["per_page"] = per_page - - result["headers"] = { - "link-next": reverse( - "api-1-content-symbol", - url_args={"q": q}, - query_params=query_params, - request=request, - ) - } - - result.update({"results": symbols}) - - return result - - @api_route(r"/content/known/search/", "api-1-content-known", methods=["POST"]) @api_route(r"/content/known/(?P(?!search).+)/", "api-1-content-known") @api_doc("/content/known/", tags=["hidden"]) diff --git a/swh/web/common/archive.py b/swh/web/common/archive.py --- a/swh/web/common/archive.py +++ b/swh/web/common/archive.py @@ -67,31 +67,6 @@ return hashes -def lookup_expression(expression, last_sha1, per_page): - """Lookup expression in raw content. - - Args: - expression (str): An expression to lookup through raw indexed - content - last_sha1 (str): Last sha1 seen - per_page (int): Number of results per page - - Yields: - ctags whose content match the expression - - """ - - limit = min(per_page, MAX_LIMIT) - ctags = idx_storage.content_ctags_search( - expression, last_sha1=last_sha1, limit=limit - ) - for ctag in ctags: - ctag = converters.from_swh(ctag, hashess={"id"}) - ctag["sha1"] = ctag["id"] - ctag.pop("id") - yield ctag - - def lookup_hash(q: str) -> Dict[str, Any]: """Check if the storage contains a given content checksum and return it if found. @@ -147,29 +122,6 @@ return hash_ -def lookup_content_ctags(q): - """Return ctags information from a specified content. - - Args: - q: query string of the form - - Yields: - ctags information (dict) list if the content is found. - - """ - sha1 = _lookup_content_sha1(q) - - if not sha1: - return None - - ctags = list(idx_storage.content_ctags_get([sha1])) - if not ctags: - return None - - for ctag in ctags: - yield converters.from_swh(ctag, hashess={"id"}) - - def lookup_content_filetype(q): """Return filetype information from a specified content. diff --git a/swh/web/tests/api/views/test_content.py b/swh/web/tests/api/views/test_content.py --- a/swh/web/tests/api/views/test_content.py +++ b/swh/web/tests/api/views/test_content.py @@ -6,7 +6,7 @@ import pytest from swh.web.common.utils import reverse -from swh.web.tests.conftest import ctags_json_missing, fossology_missing +from swh.web.tests.conftest import fossology_missing from swh.web.tests.data import random_content from swh.web.tests.utils import ( check_api_get_responses, @@ -61,64 +61,6 @@ } -@pytest.mark.skip # Language indexer is disabled -@pytest.mark.skipif( - ctags_json_missing, reason="requires ctags with json output support" -) -def test_api_content_symbol(api_client, indexer_data, contents_with_ctags): - expected_data = {} - for content_sha1 in contents_with_ctags["sha1s"]: - indexer_data.content_add_ctags(content_sha1) - for ctag in indexer_data.content_get_ctags(content_sha1): - if ctag["name"] == contents_with_ctags["symbol_name"]: - expected_data[content_sha1] = ctag - break - url = reverse( - "api-1-content-symbol", - url_args={"q": contents_with_ctags["symbol_name"]}, - query_params={"per_page": 100}, - ) - rv = check_api_get_responses(api_client, url, status_code=200) - - for entry in rv.data: - content_sha1 = entry["sha1"] - expected_entry = expected_data[content_sha1] - for key, view_name in ( - ("content_url", "api-1-content"), - ("data_url", "api-1-content-raw"), - ("license_url", "api-1-content-license"), - ("language_url", "api-1-content-language"), - ("filetype_url", "api-1-content-filetype"), - ): - expected_entry[key] = reverse( - view_name, - url_args={"q": "sha1:%s" % content_sha1}, - request=rv.wsgi_request, - ) - expected_entry["sha1"] = content_sha1 - del expected_entry["id"] - assert entry == expected_entry - assert "Link" not in rv - - url = reverse( - "api-1-content-symbol", - url_args={"q": contents_with_ctags["symbol_name"]}, - query_params={"per_page": 2}, - ) - - rv = check_api_get_responses(api_client, url, status_code=200) - - next_url = ( - reverse( - "api-1-content-symbol", - url_args={"q": contents_with_ctags["symbol_name"]}, - query_params={"last_sha1": rv.data[1]["sha1"], "per_page": 2}, - request=rv.wsgi_request, - ), - ) - assert rv["Link"] == '<%s>; rel="next"' % next_url - - def test_api_content_symbol_not_found(api_client): url = reverse("api-1-content-symbol", url_args={"q": "bar"}) rv = check_api_get_responses(api_client, url, status_code=404) @@ -129,26 +71,6 @@ assert "Link" not in rv -@pytest.mark.skipif( - ctags_json_missing, reason="requires ctags with json output support" -) -def test_api_content_ctags(api_client, indexer_data, content): - indexer_data.content_add_ctags(content["sha1"]) - url = reverse( - "api-1-content-ctags", url_args={"q": "sha1_git:%s" % content["sha1_git"]} - ) - rv = check_api_get_responses(api_client, url, status_code=200) - content_url = reverse( - "api-1-content", - url_args={"q": "sha1:%s" % content["sha1"]}, - request=rv.wsgi_request, - ) - expected_data = list(indexer_data.content_get_ctags(content["sha1"])) - for e in expected_data: - e["content_url"] = content_url - assert rv.data == expected_data - - @pytest.mark.skipif(fossology_missing, reason="requires fossology-nomossa installed") def test_api_content_license(api_client, indexer_data, content): indexer_data.content_add_license(content["sha1"]) diff --git a/swh/web/tests/common/test_archive.py b/swh/web/tests/common/test_archive.py --- a/swh/web/tests/common/test_archive.py +++ b/swh/web/tests/common/test_archive.py @@ -30,7 +30,7 @@ from swh.web.common import archive from swh.web.common.exc import BadInputExc, NotFoundExc from swh.web.common.typing import OriginInfo, PagedResult -from swh.web.tests.conftest import ctags_json_missing, fossology_missing +from swh.web.tests.conftest import fossology_missing from swh.web.tests.data import random_content, random_sha1 from swh.web.tests.strategies import new_origin, new_revision, visit_dates @@ -88,31 +88,6 @@ assert {"found": True} == actual_lookup -@pytest.mark.skipif( - ctags_json_missing, reason="requires ctags with json output support" -) -def test_lookup_content_ctags(indexer_data, contents_with_ctags): - content_sha1 = random.choice(contents_with_ctags["sha1s"]) - indexer_data.content_add_ctags(content_sha1) - actual_ctags = list(archive.lookup_content_ctags("sha1:%s" % content_sha1)) - - expected_data = list(indexer_data.content_get_ctags(content_sha1)) - for ctag in expected_data: - ctag["id"] = content_sha1 - - assert actual_ctags == expected_data - - -def test_lookup_content_ctags_no_hash(): - unknown_content_ = random_content() - - actual_ctags = list( - archive.lookup_content_ctags("sha1:%s" % unknown_content_["sha1"]) - ) - - assert actual_ctags == [] - - def test_lookup_content_filetype(indexer_data, content): indexer_data.content_add_mimetype(content["sha1"]) actual_filetype = archive.lookup_content_filetype(content["sha1"]) @@ -121,40 +96,6 @@ assert actual_filetype == expected_filetype -def test_lookup_expression(indexer_data, contents_with_ctags): - per_page = 10 - expected_ctags = [] - - for content_sha1 in contents_with_ctags["sha1s"]: - if len(expected_ctags) == per_page: - break - indexer_data.content_add_ctags(content_sha1) - for ctag in indexer_data.content_get_ctags(content_sha1): - if len(expected_ctags) == per_page: - break - if ctag["name"] == contents_with_ctags["symbol_name"]: - del ctag["id"] - ctag["sha1"] = content_sha1 - expected_ctags.append(ctag) - - actual_ctags = list( - archive.lookup_expression( - contents_with_ctags["symbol_name"], last_sha1=None, per_page=10 - ) - ) - - assert actual_ctags == expected_ctags - - -def test_lookup_expression_no_result(): - expected_ctags = [] - - actual_ctags = list( - archive.lookup_expression("barfoo", last_sha1=None, per_page=10) - ) - assert actual_ctags == expected_ctags - - @pytest.mark.skipif(fossology_missing, reason="requires fossology-nomossa installed") def test_lookup_content_license(indexer_data, content): indexer_data.content_add_license(content["sha1"]) diff --git a/swh/web/tests/common/test_utils.py b/swh/web/tests/common/test_utils.py --- a/swh/web/tests/common/test_utils.py +++ b/swh/web/tests/common/test_utils.py @@ -35,7 +35,6 @@ "/api/1/content/sha1:%s/", "/api/1/content/sha1_git:%s/", "/api/1/directory/%s/", - "/api/1/content/sha1:%s/ctags/", ] for template in templates: diff --git a/swh/web/tests/conftest.py b/swh/web/tests/conftest.py --- a/swh/web/tests/conftest.py +++ b/swh/web/tests/conftest.py @@ -10,7 +10,6 @@ import os import random import shutil -from subprocess import PIPE, run import sys import time from typing import Any, Dict, List, Optional @@ -59,12 +58,6 @@ os.environ["LC_ALL"] = "C.UTF-8" -# Used to skip some tests -ctags_json_missing = ( - shutil.which("ctags") is None - or b"+json" not in run(["ctags", "--version"], stdout=PIPE).stdout -) - fossology_missing = shutil.which("nomossa") is None # Register some hypothesis profiles @@ -444,35 +437,6 @@ return random.choice(_content_utf8_detected_as_binary()) -@pytest.fixture(scope="function") -def contents_with_ctags(): - """ - Fixture returning contents ingested into the test archive. - Those contents are ctags compatible, that is running ctags on those lay results. - """ - return { - "sha1s": [ - "0ab37c02043ebff946c1937523f60aadd0844351", - "15554cf7608dde6bfefac7e3d525596343a85b6f", - "2ce837f1489bdfb8faf3ebcc7e72421b5bea83bd", - "30acd0b47fc25e159e27a980102ddb1c4bea0b95", - "4f81f05aaea3efb981f9d90144f746d6b682285b", - "5153aa4b6e4455a62525bc4de38ed0ff6e7dd682", - "59d08bafa6a749110dfb65ba43a61963d5a5bf9f", - "7568285b2d7f31ae483ae71617bd3db873deaa2c", - "7ed3ee8e94ac52ba983dd7690bdc9ab7618247b4", - "8ed7ef2e7ff9ed845e10259d08e4145f1b3b5b03", - "9b3557f1ab4111c8607a4f2ea3c1e53c6992916c", - "9c20da07ed14dc4fcd3ca2b055af99b2598d8bdd", - "c20ceebd6ec6f7a19b5c3aebc512a12fbdc9234b", - "e89e55a12def4cd54d5bff58378a3b5119878eb7", - "e8c0654fe2d75ecd7e0b01bee8a8fc60a130097e", - "eb6595e559a1d34a2b41e8d4835e0e4f98a5d2b5", - ], - "symbol_name": "ABS", - } - - @pytest.fixture(scope="function") def directory(tests_data): """Fixture returning a random directory ingested into the test archive.""" @@ -1030,7 +994,6 @@ self.idx_storage = tests_data["idx_storage"] self.mimetype_indexer = tests_data["mimetype_indexer"] self.license_indexer = tests_data["license_indexer"] - self.ctags_indexer = tests_data["ctags_indexer"] def content_add_mimetype(self, cnt_id): self.mimetype_indexer.run([hash_to_bytes(cnt_id)]) @@ -1050,15 +1013,6 @@ for license in licenses: yield converters.from_swh(license.to_dict(), hashess={"id"}) - def content_add_ctags(self, cnt_id): - self.ctags_indexer.run([hash_to_bytes(cnt_id)]) - - def content_get_ctags(self, cnt_id): - cnt_id_bytes = hash_to_bytes(cnt_id) - ctags = self.idx_storage.content_ctags_get([cnt_id_bytes]) - for ctag in ctags: - yield converters.from_swh(ctag, hashess={"id"}) - @pytest.fixture def keycloak_oidc(keycloak_oidc, mocker): diff --git a/swh/web/tests/data.py b/swh/web/tests/data.py --- a/swh/web/tests/data.py +++ b/swh/web/tests/data.py @@ -1,4 +1,4 @@ -# Copyright (C) 2018-2021 The Software Heritage developers +# Copyright (C) 2018-2022 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU Affero General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -13,7 +13,6 @@ from swh.core.config import merge_configs from swh.counters import get_counters -from swh.indexer.ctags import CtagsIndexer from swh.indexer.fossology_license import FossologyLicenseIndexer from swh.indexer.mimetype import MimetypeIndexer from swh.indexer.storage import get_indexer_storage @@ -111,23 +110,6 @@ ) -_TEST_CTAGS_INDEXER_CONFIG = merge_configs( - _TEST_INDEXER_BASE_CONFIG, - { - "workdir": "/tmp/swh/indexer.ctags", - "languages": {"c": "c"}, - "tools": { - "name": "universal-ctags", - "version": "~git7859817b", - "configuration": { - "command_line": """ctags --fields=+lnz --sort=no --links=no """ - """--output-format=json """ - }, - }, - }, -) - - # Lightweight git repositories that will be loaded to generate # input data for tests _TEST_ORIGINS = [ @@ -497,7 +479,6 @@ for idx_name, idx_class, idx_config in ( ("mimetype_indexer", MimetypeIndexer, _TEST_MIMETYPE_INDEXER_CONFIG), ("license_indexer", FossologyLicenseIndexer, _TEST_LICENSE_INDEXER_CONFIG), - ("ctags_indexer", CtagsIndexer, _TEST_CTAGS_INDEXER_CONFIG), ): idx = idx_class(config=idx_config) idx.storage = tests_data["storage"]