Changeset View
Changeset View
Standalone View
Standalone View
swh/search/tests/test_search.py
# Copyright (C) 2019-2022 The Software Heritage developers | # Copyright (C) 2019-2022 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
from collections import Counter | from collections import Counter | ||||
from datetime import datetime, timedelta, timezone | from datetime import datetime, timedelta, timezone | ||||
import hashlib | |||||
from itertools import permutations | from itertools import permutations | ||||
from hypothesis import given, settings, strategies | from hypothesis import given, settings, strategies | ||||
import pytest | import pytest | ||||
from swh.core.api.classes import stream_results | from swh.core.api.classes import stream_results | ||||
▲ Show 20 Lines • Show All 1,206 Lines • ▼ Show 20 Lines | def test_search_blocklisted_update(self): | ||||
[{**origin1, "has_visits": True, "visit_types": ["git"]}] | [{**origin1, "has_visits": True, "visit_types": ["git"]}] | ||||
) | ) | ||||
self.search.flush() | self.search.flush() | ||||
result_page = self.search.origin_search(url_pattern="origin") | result_page = self.search.origin_search(url_pattern="origin") | ||||
assert result_page.next_page_token is None | assert result_page.next_page_token is None | ||||
assert result_page.results == [] | assert result_page.results == [] | ||||
def test_filter_keyword_in_filter(self): | def test_search_filter_keyword_in_filter(self): | ||||
origin1 = { | origin1 = { | ||||
"url": "foo language in ['foo baz'] bar", | "url": "foo language in ['foo baz'] bar", | ||||
} | } | ||||
self.search.origin_update([origin1]) | self.search.origin_update([origin1]) | ||||
self.search.flush() | self.search.flush() | ||||
result_page = self.search.origin_search(url_pattern="language in ['foo bar']") | result_page = self.search.origin_search(url_pattern="language in ['foo bar']") | ||||
assert result_page.next_page_token is None | assert result_page.next_page_token is None | ||||
assert result_page.results == [origin1] | assert result_page.results == [origin1] | ||||
result_page = self.search.origin_search(url_pattern="baaz") | result_page = self.search.origin_search(url_pattern="baaz") | ||||
assert result_page.next_page_token is None | assert result_page.next_page_token is None | ||||
assert result_page.results == [] | assert result_page.results == [] | ||||
def test_origin_get(self): | |||||
"""Checks the same field can have a concrete value, an object, or an array | |||||
in different documents.""" | |||||
origin1 = {"url": "http://origin1"} | |||||
origin2 = {"url": "http://origin2"} | |||||
origin3 = {"url": "http://origin3"} | |||||
origins = [ | |||||
{ | |||||
**origin1, | |||||
"jsonld": { | |||||
"@context": "https://doi.org/10.5063/schema/codemeta-2.0", | |||||
"author": { | |||||
"familyName": "Foo", | |||||
"givenName": "Bar", | |||||
}, | |||||
}, | |||||
}, | |||||
{ | |||||
**origin2, | |||||
"jsonld": { | |||||
"@context": "https://doi.org/10.5063/schema/codemeta-2.0", | |||||
"author": "Bar Baz", | |||||
}, | |||||
}, | |||||
{ | |||||
**origin3, | |||||
"jsonld": { | |||||
"@context": "https://doi.org/10.5063/schema/codemeta-2.0", | |||||
"author": ["Baz", "Qux"], | |||||
}, | |||||
}, | |||||
] | |||||
expanded_origins = [ | |||||
{ | |||||
**origin1, | |||||
"sha1": hashlib.sha1(origin1["url"].encode()).hexdigest(), | |||||
"jsonld": [ | |||||
{ | |||||
"http://schema.org/author": [ | |||||
{ | |||||
"@list": [ | |||||
{ | |||||
"http://schema.org/familyName": [ | |||||
{"@value": "Foo"} | |||||
], | |||||
"http://schema.org/givenName": [ | |||||
{"@value": "Bar"} | |||||
], | |||||
} | |||||
] | |||||
} | |||||
], | |||||
} | |||||
], | |||||
}, | |||||
{ | |||||
**origin2, | |||||
"sha1": hashlib.sha1(origin2["url"].encode()).hexdigest(), | |||||
"jsonld": [ | |||||
{ | |||||
"http://schema.org/author": [ | |||||
{"@list": [{"@value": "Bar Baz"}]} | |||||
], | |||||
} | |||||
], | |||||
}, | |||||
{ | |||||
**origin3, | |||||
"sha1": hashlib.sha1(origin3["url"].encode()).hexdigest(), | |||||
"jsonld": [ | |||||
{ | |||||
"http://schema.org/author": [ | |||||
{"@list": [{"@value": "Baz"}, {"@value": "Qux"}]} | |||||
], | |||||
} | |||||
], | |||||
}, | |||||
] | |||||
self.search.origin_update(origins) | |||||
self.search.flush() | |||||
assert self.search.origin_get(origin1["url"]) == expanded_origins[0] | |||||
olasd: spurious print | |||||
assert self.search.origin_get(origin2["url"]) == expanded_origins[1] | |||||
assert self.search.origin_get(origin3["url"]) == expanded_origins[2] | |||||
assert self.search.origin_get("http://origin4") is None | |||||
def test_visit_types_count(self): | def test_visit_types_count(self): | ||||
assert self.search.visit_types_count() == Counter() | assert self.search.visit_types_count() == Counter() | ||||
origins = [ | origins = [ | ||||
{"url": "http://foobar.baz", "visit_types": ["git"], "blocklisted": True} | {"url": "http://foobar.baz", "visit_types": ["git"], "blocklisted": True} | ||||
] | ] | ||||
for idx, visit_type in enumerate(["git", "hg", "svn"]): | for idx, visit_type in enumerate(["git", "hg", "svn"]): | ||||
Show All 38 Lines |
spurious print