diff --git a/swh/web/api/views/origin.py b/swh/web/api/views/origin.py --- a/swh/web/api/views/origin.py +++ b/swh/web/api/views/origin.py @@ -6,6 +6,7 @@ from distutils.util import strtobool from swh.web.common import service +from swh.web.common.exc import BadInputExc from swh.web.common.utils import ( reverse, get_origin_visits ) @@ -174,6 +175,56 @@ return result +@api_route(r'/origin/metadata-search/', + 'api-origin-metadata-search') +@api_doc('/origin/metadata-search/', noargs=True) +def api_origin_metadata_search(request): + """ + .. http:get:: /api/1/origin/metadata-search/ + + Search for software origins whose metadata (expressed as a + JSON-LD/CodeMeta dictionary) match the provided criteria. + For now, only full-text search on this dictionary is supported. + + :query str fulltext: a string that will be matched against origin metadata; + results are ranked and ordered starting with the best ones. + :query int limit: the maximum number of found origins to return + (bounded to 100) + + :>jsonarr number origin_id: the origin unique identifier + :>jsonarr dict metadata: metadata of the origin (as a JSON-LD/CodeMeta dictionary) + :>jsonarr string from_revision: the revision used to extract these + metadata (the current HEAD or one of the former HEADs) + :>jsonarr dict tool: the tool used to extract these metadata + + :reqheader Accept: the requested response content type, + either ``application/json`` (default) or ``application/yaml`` + :resheader Content-Type: this depends on :http:header:`Accept` header of request + + **Allowed HTTP Methods:** :http:method:`get`, :http:method:`head`, :http:method:`options` + + :statuscode 200: no error + + **Example:** + + .. parsed-literal:: + + :swh_web_api:`origin/metadata-search/?limit=2&fulltext=Jane%20Doe` + """ # noqa + fulltext = request.query_params.get('fulltext', None) + limit = min(int(request.query_params.get('limit', '70')), 100) + + if not fulltext: + content = '"fulltext" must be provided and non-empty.' + raise BadInputExc(content) + + results = api_lookup(service.search_origin_metadata, fulltext, limit) + + return { + 'results': results, + } + + @api_route(r'/origin/(?P[0-9]+)/visits/', 'api-origin-visits') @api_doc('/origin/visits/') def api_origin_visits(request, origin_id): diff --git a/swh/web/common/service.py b/swh/web/common/service.py --- a/swh/web/common/service.py +++ b/swh/web/common/service.py @@ -266,6 +266,25 @@ return map(converters.from_origin, origins) +def search_origin_metadata(fulltext, limit=50): + """Search for origins whose metadata match a provided string pattern. + + Args: + fulltext: the string pattern to search for in origin metadata + offset: number of found origins to skip before returning results + limit: the maximum number of found origins to return + + Returns: + list of origin metadata as dict. + + """ + results = idx_storage.origin_intrinsic_metadata_search_fulltext( + conjunction=[fulltext], limit=limit) + for result in results: + result['from_revision'] = hashutil.hash_to_hex(result['from_revision']) + return results + + def lookup_person(person_id): """Return information about the person with id person_id. diff --git a/swh/web/tests/api/views/test_origin.py b/swh/web/tests/api/views/test_origin.py --- a/swh/web/tests/api/views/test_origin.py +++ b/swh/web/tests/api/views/test_origin.py @@ -244,3 +244,109 @@ }) mock_service.lookup_origin.assert_called_with({'id': '4321'}) + + @patch('swh.web.common.service.idx_storage') + def test_api_origin_metadata_search(self, mock_idx_storage): + # given + mock_idx_storage.origin_intrinsic_metadata_search_fulltext \ + .return_value = [{ + 'from_revision': + b'p&\xb7\xc1\xa2\xafVR\x1e\x95\x1c\x01\xed \xf2U\xfa\x05B8', + 'metadata': {'author': 'Jane Doe'}, + 'origin_id': 54974445, + 'tool': { + 'configuration': { + 'context': ['NpmMapping', 'CodemetaMapping'], + 'type': 'local' + }, + 'id': 3, + 'name': 'swh-metadata-detector', + 'version': '0.0.1' + } + }] + + # when + rv = self.client.get( + '/api/1/origin/metadata-search/?fulltext=Jane%20Doe') + + # then + self.assertEqual(rv.status_code, 200, rv.content) + self.assertEqual(rv['Content-Type'], 'application/json') + expected_data = [{ + 'origin_id': 54974445, + 'metadata': {'author': 'Jane Doe'}, + 'from_revision': '7026b7c1a2af56521e951c01ed20f255fa054238', + 'tool': { + 'configuration': { + 'context': ['NpmMapping', 'CodemetaMapping'], + 'type': 'local' + }, + 'id': 3, + 'name': 'swh-metadata-detector', + 'version': '0.0.1', + } + }] + self.assertEqual(rv.data, expected_data) + mock_idx_storage.origin_intrinsic_metadata_search_fulltext \ + .assert_called_with(conjunction=['Jane Doe'], limit=70) + + @patch('swh.web.common.service.idx_storage') + def test_api_origin_metadata_search_limit(self, mock_idx_storage): + # given + mock_idx_storage.origin_intrinsic_metadata_search_fulltext \ + .return_value = [{ + 'from_revision': + b'p&\xb7\xc1\xa2\xafVR\x1e\x95\x1c\x01\xed \xf2U\xfa\x05B8', + 'metadata': {'author': 'Jane Doe'}, + 'origin_id': 54974445, + 'tool': { + 'configuration': { + 'context': ['NpmMapping', 'CodemetaMapping'], + 'type': 'local' + }, + 'id': 3, + 'name': 'swh-metadata-detector', + 'version': '0.0.1' + } + }] + + # when + rv = self.client.get( + '/api/1/origin/metadata-search/?fulltext=Jane%20Doe') + + # then + self.assertEqual(rv.status_code, 200, rv.content) + self.assertEqual(rv['Content-Type'], 'application/json') + self.assertEqual(len(rv.data), 1) + mock_idx_storage.origin_intrinsic_metadata_search_fulltext \ + .assert_called_with(conjunction=['Jane Doe'], limit=70) + + # when + rv = self.client.get( + '/api/1/origin/metadata-search/?fulltext=Jane%20Doe&limit=10') + + # then + self.assertEqual(rv.status_code, 200, rv.content) + self.assertEqual(rv['Content-Type'], 'application/json') + self.assertEqual(len(rv.data), 1) + mock_idx_storage.origin_intrinsic_metadata_search_fulltext \ + .assert_called_with(conjunction=['Jane Doe'], limit=10) + + # when + rv = self.client.get( + '/api/1/origin/metadata-search/?fulltext=Jane%20Doe&limit=987') + + # then + self.assertEqual(rv.status_code, 200, rv.content) + self.assertEqual(rv['Content-Type'], 'application/json') + self.assertEqual(len(rv.data), 1) + mock_idx_storage.origin_intrinsic_metadata_search_fulltext \ + .assert_called_with(conjunction=['Jane Doe'], limit=100) + + @patch('swh.web.common.service.idx_storage') + def test_api_origin_metadata_search_invalid(self, mock_idx_storage): + rv = self.client.get('/api/1/origin/metadata-search/') + + # then + self.assertEqual(rv.status_code, 400, rv.content) + mock_idx_storage.assert_not_called()