diff --git a/swh/search/elasticsearch.py b/swh/search/elasticsearch.py --- a/swh/search/elasticsearch.py +++ b/swh/search/elasticsearch.py @@ -120,6 +120,17 @@ self._backend.indices.put_mapping( index=self._get_origin_index(), body={ + "dynamic_templates": [ + { + "booleans_as_string": { + # All fields stored as string in the metadata + # even the booleans + "match_mapping_type": "boolean", + "path_match": "intrinsic_metadata.*", + "mapping": {"type": "keyword"}, + } + } + ], "date_detection": False, "properties": { # sha1 of the URL; used as the document id diff --git a/swh/search/tests/test_search.py b/swh/search/tests/test_search.py --- a/swh/search/tests/test_search.py +++ b/swh/search/tests/test_search.py @@ -497,11 +497,11 @@ assert actual_page.next_page_token is None assert actual_page.results == [origin1_foobar] - def test_origin_intrinsic_metadata_date(self): + def test_origin_intrinsic_metadata_string_mapping(self): """Checks inserting a date-like in a field does not update the mapping to require every document uses a date in that field; or that search queries use a date either. - Likewise for numeric fields.""" + Likewise for numeric and boolean fields.""" origin1 = {"url": "http://origin1"} origin2 = {"url": "http://origin2"} @@ -513,12 +513,12 @@ "@context": "https://doi.org/10.5063/schema/codemeta-2.0", "dateCreated": "2021-02-18T10:16:52", "version": "1.0", + "isAccessibleForFree": True, }, } ] ) self.search.flush() - self.search.origin_update( [ { @@ -528,6 +528,7 @@ "dateCreated": "a long time ago", "address": "in a galaxy far, far away", "version": "a new hope", + "isAccessibleForFree": "it depends", }, }, ] @@ -542,6 +543,14 @@ assert actual_page.next_page_token is None assert actual_page.results == [origin2] + actual_page = self.search.origin_search(metadata_pattern="true") + assert actual_page.next_page_token is None + assert actual_page.results == [origin1] + + actual_page = self.search.origin_search(metadata_pattern="it depends") + assert actual_page.next_page_token is None + assert actual_page.results == [origin2] + def test_origin_intrinsic_metadata_update(self): origin = {"url": "http://origin1"} origin_data = {