diff --git a/swh/search/in_memory.py b/swh/search/in_memory.py --- a/swh/search/in_memory.py +++ b/swh/search/in_memory.py @@ -11,6 +11,26 @@ from swh.model.identifiers import origin_identifier from swh.search.interface import PagedResult +_words_regexp = re.compile(r"\w+") + + +def _dict_words_set(d): + """Recursively extract set of words from dict content.""" + values = set() + + def extract(obj, words): + if isinstance(obj, dict): + for k, v in obj.items(): + extract(v, words) + elif isinstance(obj, list): + for item in obj: + extract(item, words) + else: + words.update(_words_regexp.findall(str(obj).lower())) + return words + + return extract(d, values) + class InMemorySearch: def __init__(self): @@ -76,10 +96,20 @@ hits = filter(predicate, hits) if metadata_pattern: - raise NotImplementedError( - "Metadata search is not implemented in the in-memory backend." + metadata_pattern_words = set( + _words_regexp.findall(metadata_pattern.lower()) ) + def predicate(match): + if "intrinsic_metadata" not in match: + return False + + return metadata_pattern_words.issubset( + _dict_words_set(match["intrinsic_metadata"]) + ) + + hits = filter(predicate, hits) + if not url_pattern and not metadata_pattern: raise ValueError( "At least one of url_pattern and metadata_pattern must be provided." diff --git a/swh/search/tests/test_in_memory.py b/swh/search/tests/test_in_memory.py --- a/swh/search/tests/test_in_memory.py +++ b/swh/search/tests/test_in_memory.py @@ -23,31 +23,3 @@ def reset(self): self.search.deinitialize() self.search.initialize() - - @pytest.mark.skip("Not implemented in the in-memory search") - def test_origin_intrinsic_metadata_description(self): - pass - - @pytest.mark.skip("Not implemented in the in-memory search") - def test_origin_intrinsic_metadata_all_terms(self): - pass - - @pytest.mark.skip("Not implemented in the in-memory search") - def test_origin_intrinsic_metadata_nested(self): - pass - - @pytest.mark.skip("Not implemented in the in-memory search") - def test_origin_intrinsic_metadata_paging(self): - pass - - @pytest.mark.skip("Not implemented in the in-memory search") - def test_origin_intrinsic_metadata_inconsistent_type(self): - pass - - @pytest.mark.skip("Not implemented in the in-memory search") - def test_origin_intrinsic_metadata_matches_cross_fields(self): - pass - - @pytest.mark.skip("Not implemented in the in-memory search") - def test_origin_intrinsic_metadata_long_description(self): - pass diff --git a/swh/search/tests/test_search.py b/swh/search/tests/test_search.py --- a/swh/search/tests/test_search.py +++ b/swh/search/tests/test_search.py @@ -289,7 +289,9 @@ actual_page = self.search.origin_search(metadata_pattern="bar") assert actual_page.next_page_token is None - assert actual_page.results == [origin2_barbaz, origin1_foobar] + results = [r["url"] for r in actual_page.results] + expected_results = [o["url"] for o in [origin2_barbaz, origin1_foobar]] + assert sorted(results) == sorted(expected_results) actual_page = self.search.origin_search(metadata_pattern="baz") assert actual_page.next_page_token is None