Changeset View
Changeset View
Standalone View
Standalone View
swh/search/tests/test_search.py
Show First 20 Lines • Show All 403 Lines • ▼ Show 20 Lines | def test_origin_last_revision_date_update_search(self): | ||||
date_type="last_revision_date" | date_type="last_revision_date" | ||||
) | ) | ||||
def test_origin_last_release_date_update_search(self): | def test_origin_last_release_date_update_search(self): | ||||
self._test_origin_last_revision_release_date_update_search( | self._test_origin_last_revision_release_date_update_search( | ||||
date_type="last_revision_date" | date_type="last_revision_date" | ||||
) | ) | ||||
def test_origin_instrinsic_metadata_dates_filter_sorting_search(self): | |||||
DATE_0 = "1999-06-28" | |||||
DATE_1 = "2001-02-13" | |||||
DATE_2 = "2005-10-02" | |||||
ORIGINS = [ | |||||
{ | |||||
"url": "http://foobar.0.com", | |||||
"intrinsic_metadata": { | |||||
"@context": "https://doi.org/10.5063/schema/codemeta-2.0", | |||||
"dateCreated": DATE_0, | |||||
"dateModified": DATE_1, | |||||
"datePublished": DATE_2, | |||||
}, | |||||
}, | |||||
{ | |||||
"url": "http://foobar.1.com", | |||||
"intrinsic_metadata": { | |||||
"@context": "https://doi.org/10.5063/schema/codemeta-2.0", | |||||
"dateCreated": DATE_1, | |||||
"dateModified": DATE_2, | |||||
"datePublished": DATE_2, | |||||
}, | |||||
}, | |||||
{ | |||||
"url": "http://foobar.2.com", | |||||
"intrinsic_metadata": { | |||||
"@context": "https://doi.org/10.5063/schema/codemeta-2.0", | |||||
"dateCreated": DATE_2, | |||||
"dateModified": DATE_2, | |||||
"datePublished": DATE_2, | |||||
}, | |||||
}, | |||||
] | |||||
self.search.origin_update(ORIGINS) | |||||
self.search.flush() | |||||
def _check_results(origin_indices, sort_results=True, **kwargs): | |||||
page = self.search.origin_search(url_pattern="foobar", **kwargs) | |||||
results = [r["url"] for r in page.results] | |||||
if sort_results: | |||||
assert sorted(results) == sorted( | |||||
[ORIGINS[index]["url"] for index in origin_indices] | |||||
) | |||||
else: | |||||
assert results == [ORIGINS[index]["url"] for index in origin_indices] | |||||
_check_results(min_date_created=DATE_0, origin_indices=[0, 1, 2]) | |||||
_check_results(min_date_created=DATE_1, origin_indices=[1, 2]) | |||||
_check_results(min_date_created=DATE_2, origin_indices=[2]) | |||||
_check_results(min_date_modified=DATE_0, origin_indices=[0, 1, 2]) | |||||
_check_results(min_date_modified=DATE_1, origin_indices=[0, 1, 2]) | |||||
_check_results(min_date_modified=DATE_2, origin_indices=[1, 2]) | |||||
_check_results(min_date_published=DATE_0, origin_indices=[0, 1, 2]) | |||||
_check_results(min_date_published=DATE_1, origin_indices=[0, 1, 2]) | |||||
_check_results(min_date_published=DATE_2, origin_indices=[0, 1, 2]) | |||||
# Sorting | |||||
_check_results( | |||||
sort_by=["-date_created"], origin_indices=[2, 1, 0], sort_results=False | |||||
) | |||||
_check_results( | |||||
sort_by=["date_created"], origin_indices=[0, 1, 2], sort_results=False | |||||
) | |||||
def test_origin_keywords_search(self): | def test_origin_keywords_search(self): | ||||
ORIGINS = [ | ORIGINS = [ | ||||
{ | { | ||||
"url": "http://foobar.1.com", | "url": "http://foobar.1.com", | ||||
"intrinsic_metadata": { | "intrinsic_metadata": { | ||||
"@context": "https://doi.org/10.5063/schema/codemeta-2.0", | "@context": "https://doi.org/10.5063/schema/codemeta-2.0", | ||||
"description": "Django is a backend framework for applications", | "description": "Django is a backend framework for applications", | ||||
"keywords": "django,backend,server,web,framework", | "keywords": "django,backend,server,web,framework", | ||||
▲ Show 20 Lines • Show All 489 Lines • ▼ Show 20 Lines | def test_origin_intrinsic_metadata_string_mapping(self): | ||||
"version": "a new hope", | "version": "a new hope", | ||||
"isAccessibleForFree": "it depends", | "isAccessibleForFree": "it depends", | ||||
}, | }, | ||||
}, | }, | ||||
] | ] | ||||
) | ) | ||||
self.search.flush() | self.search.flush() | ||||
actual_page = self.search.origin_search(metadata_pattern="2021") | actual_page = self.search.origin_search(metadata_pattern="1.0") | ||||
assert actual_page.next_page_token is None | assert actual_page.next_page_token is None | ||||
assert actual_page.results == [origin1] | assert actual_page.results == [origin1] | ||||
actual_page = self.search.origin_search(metadata_pattern="long time ago") | actual_page = self.search.origin_search(metadata_pattern="long") | ||||
assert actual_page.next_page_token is None | assert actual_page.next_page_token is None | ||||
assert actual_page.results == [origin2] | assert ( | ||||
actual_page.results == [] | |||||
vlorentz: I don't understand. Are we rejecting the *entire* document because one of its values was badly… | |||||
Done Inline ActionsNo. We just reject that field (pop it out of intrinsic_metadata before expanding and storing the intrinsic_metadata) Also, this rejection this is only for date{Created,Modified,Published} KShivendu: No. We just reject that field (pop it out of intrinsic_metadata before expanding and storing… | |||||
Not Done Inline Actionsnvm, I misunderstood the test vlorentz: nvm, I misunderstood the test | |||||
) # "%Y-%m-%d" not followed, so value is rejected | |||||
actual_page = self.search.origin_search(metadata_pattern="true") | actual_page = self.search.origin_search(metadata_pattern="true") | ||||
assert actual_page.next_page_token is None | assert actual_page.next_page_token is None | ||||
assert actual_page.results == [origin1] | assert actual_page.results == [origin1] | ||||
actual_page = self.search.origin_search(metadata_pattern="it depends") | actual_page = self.search.origin_search(metadata_pattern="it depends") | ||||
assert actual_page.next_page_token is None | assert actual_page.next_page_token is None | ||||
assert actual_page.results == [origin2] | assert actual_page.results == [origin2] | ||||
▲ Show 20 Lines • Show All 152 Lines • Show Last 20 Lines |
I don't understand. Are we rejecting the *entire* document because one of its values was badly formatted?