Page MenuHomeSoftware Heritage
Paste P954

TestElasticsearchSearch.test_origin_intrinsic_metadata_date
ActivePublic

Authored by vlorentz on Feb 18 2021, 11:32 AM.
self = <swh.search.tests.test_elasticsearch.TestElasticsearchSearch testMethod=test_origin_intrinsic_metadata_date>
def test_origin_intrinsic_metadata_date(self):
"""Checks inserting a date-like in a field does not update the mapping to
require every document uses a date in that field; or that search queries
use a date either"""
origin1 = {"url": "http://origin1"}
origin2 = {"url": "http://origin2"}
self.search.origin_update(
[
{
"url": f"http://origin{i}",
"intrinsic_metadata": {
"@context": "https://doi.org/10.5063/schema/codemeta-2.0",
"dateCreated": "{1980+i}-02-18 10:16:52"
},
}
for i in range(1000)
]
)
self.search.flush()
self.search.origin_update(
[
{
**origin2,
"intrinsic_metadata": {
"@context": "https://doi.org/10.5063/schema/codemeta-2.0",
"dateCreated": "a long time ago",
"address": "in a galaxy far, far away",
},
},
]
)
self.search.flush()
from pprint import pprint
pprint(self.search._backend.indices.get_mapping())
#pprint(list(self.search.origin_dump()))
actual_page = self.search.origin_search(metadata_pattern="2021")
assert actual_page.next_page_token is None
results = [r["url"] for r in actual_page.results]
> assert actual_page.results == [{"url": "http://origin41"}]
E AssertionError: assert [] == [{'url': 'http://origin41'}]
E Right contains one more item: {'url': 'http://origin41'}
E Use -v to get the full diff
swh/search/tests/test_search.py:361: AssertionError
---------------------------------------------------------------------------------------------------------------------------- Captured stdout call -----------------------------------------------------------------------------------------------------------------------------
{'test_origin': {'mappings': {'properties': {'has_visits': {'type': 'boolean'},
'intrinsic_metadata': {'properties': {'@context': {'type': 'keyword'},
'http://schema': {'properties': {'org/address': {'properties': {'@value': {'fields': {'keyword': {'ignore_above': 256,
'type': 'keyword'}},
'type': 'text'}}},
'org/dateCreated': {'properties': {'@type': {'fields': {'keyword': {'ignore_above': 256,
'type': 'keyword'}},
'type': 'text'},
'@value': {'fields': {'keyword': {'ignore_above': 256,
'type': 'keyword'}},
'type': 'text'}}}}}},
'type': 'nested'},
'sha1': {'type': 'keyword'},
'url': {'analyzer': 'simple',
'fields': {'as_you_type': {'analyzer': 'simple',
'doc_values': False,
'max_shingle_size': 3,
'type': 'search_as_you_type'}},
'type': 'text'}}}}}