diff --git a/swh/web/api/views/origin.py b/swh/web/api/views/origin.py --- a/swh/web/api/views/origin.py +++ b/swh/web/api/views/origin.py @@ -4,6 +4,8 @@ # See top-level LICENSE file for more information from distutils.util import strtobool +from rest_framework import status +from rest_framework.response import Response from swh.web.common import service from swh.web.common.utils import ( @@ -174,6 +176,55 @@ return result +@api_route(r'/origin/metadata-search/', + 'api-origin-metadata-search') +@api_doc('/origin/metadata-search/') +def api_origin_metadata_search(request): + """ + .. http:get:: /api/1/origin/metadata-search/ + + Search for software origins whose metadata (expressed as a + JSON-LD/CodeMeta dictionary) match the provided criteria. + For now, only full-text search on this dictionary is supported. + + :query str fulltext: a string that will be matched against origin metadata; + results are ranked and ordered starting with the best ones. + :query int limit: the maximum number of found origins to return + + :>jsonarr number origin_id: the origin unique identifier + :>jsonarr dict metadata: metadata of the origin (as a JSON-LD/CodeMeta dictionary) + :>jsonarr string from_revision: the revision used to extract these + metadata (the current HEAD or one of the former HEADs) + :>jsonarr dict tool: the tool used to extract these metadata + + :reqheader Accept: the requested response content type, + either ``application/json`` (default) or ``application/yaml`` + :resheader Content-Type: this depends on :http:header:`Accept` header of request + + **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` + + :statuscode 200: no error + + **Example:** + + .. parsed-literal:: + + :swh_web_api:`origin/metadata-search/?limit=2&fulltext=Jane%20Doe` + """ # noqa + fulltext = request.query_params.get('fulltext', None) + limit = int(request.query_params.get('limit', '70')) + + if not fulltext: + content = '"fulltext" must be provided and non-empty.' + return Response(content, status=status.HTTP_400_BAD_REQUEST) + + results = api_lookup(service.search_origin, fulltext, limit) + + return { + 'results': results, + } + + @api_route(r'/origin/(?P[0-9]+)/visits/', 'api-origin-visits') @api_doc('/origin/visits/') def api_origin_visits(request, origin_id): diff --git a/swh/web/common/service.py b/swh/web/common/service.py --- a/swh/web/common/service.py +++ b/swh/web/common/service.py @@ -266,6 +266,22 @@ return map(converters.from_origin, origins) +def search_origin_metadata(fulltext, offset=0, limit=50): + """Search for origins whose metadata match a provided string pattern. + + Args: + fulltext: the string pattern to search for in origin metadata + offset: number of found origins to skip before returning results + limit: the maximum number of found origins to return + + Returns: + list of origin metadata as dict. + + """ + return idx_storage.origin_intrinsic_metadata_search_fulltext( + conjunction=[fulltext], limit=limit) + + def lookup_person(person_id): """Return information about the person with id person_id. diff --git a/swh/web/tests/api/views/test_origin.py b/swh/web/tests/api/views/test_origin.py --- a/swh/web/tests/api/views/test_origin.py +++ b/swh/web/tests/api/views/test_origin.py @@ -244,3 +244,47 @@ }) mock_service.lookup_origin.assert_called_with({'id': '4321'}) + + @patch('swh.web.common.service.idx_storage') + def test_api_origin_metadata_search(self, mock_idx_storage): + # given + mock_idx_storage.origin_intrinsic_metadata_search_fulltext \ + .return_value = [{ + 'from_revision': + b'p&\xb7\xc1\xa2\xafVR\x1e\x95\x1c\x01\xed \xf2U\xfa\x05B8', + 'metadata': {'author': 'Jane Doe'}, + 'origin_id': 54974445, + 'tool': { + 'configuration': { + 'context': ['NpmMapping', 'CodemetaMapping'], + 'type': 'local' + }, + 'id': 3, + 'name': 'swh-metadata-detector', + 'version': '0.0.1' + } + }] + + # when + rv = self.client.get( + '/api/1/origin/metadata-search/?fulltext=Jane%20Doe') + + # then + self.assertEqual(rv.status_code, 200) + self.assertEqual(rv['Content-Type'], 'application/json') + self.assertEqual(rv.data, { + 'origin_id': 54974445, + 'metadata': {'author': 'Jane Doe'}, + 'tool': { + 'configuration': { + 'context': ['NpmMapping', 'CodemetaMapping'], + 'type': 'local' + }, + 'from_revision': 'foo', + 'id': 3, + 'name': 'swh-metadata-detector', + 'version': '0.0.1', + } + }) + mock_idx_storage.origin_intrinsic_metadata_search_fulltext \ + .assert_called_with({'conjunction': ['Jane Doe']})