Changeset View
Changeset View
Standalone View
Standalone View
swh/search/translator.py
Show First 20 Lines • Show All 146 Lines • ▼ Show 20 Lines | def _parse_filter(self, filter): | ||||
"url.as_you_type._2gram", | "url.as_you_type._2gram", | ||||
"url.as_you_type._3gram", | "url.as_you_type._3gram", | ||||
], | ], | ||||
} | } | ||||
} | } | ||||
elif name == "metadata": | elif name == "metadata": | ||||
return { | return { | ||||
"nested": { | "nested": { | ||||
"path": "intrinsic_metadata", | "path": "jsonld", | ||||
"query": { | "query": { | ||||
"multi_match": { | "multi_match": { | ||||
"query": value, | "query": value, | ||||
# Makes it so that the "foo bar" query returns | # Makes it so that the "foo bar" query returns | ||||
# documents which contain "foo" in a field and "bar" | # documents which contain "foo" in a field and "bar" | ||||
# in a different field | # in a different field | ||||
"type": "cross_fields", | "type": "cross_fields", | ||||
# All keywords must be found in a document for it to | # All keywords must be found in a document for it to | ||||
# be considered a match. | # be considered a match. | ||||
# TODO: allow missing keywords? | # TODO: allow missing keywords? | ||||
"operator": "and", | "operator": "and", | ||||
# Searches on all fields of the intrinsic_metadata dict, | # Searches on all fields of the JSON-LD dict, | ||||
# recursively. | # recursively. | ||||
"fields": ["intrinsic_metadata.*"], | "fields": ["jsonld.*"], | ||||
# date{Created,Modified,Published} are of type date | # date{Created,Modified,Published} are of type date | ||||
"lenient": True, | "lenient": True, | ||||
} | } | ||||
}, | }, | ||||
} | } | ||||
} | } | ||||
if category == "booleanFilter": | if category == "booleanFilter": | ||||
Show All 36 Lines | def _parse_filter(self, filter): | ||||
return {"terms": {"visit_types": value}} | return {"terms": {"visit_types": value}} | ||||
if category == "unboundedListFilter": | if category == "unboundedListFilter": | ||||
value_array = value | value_array = value | ||||
if name == "keyword": | if name == "keyword": | ||||
return { | return { | ||||
"nested": { | "nested": { | ||||
"path": "intrinsic_metadata", | "path": "jsonld", | ||||
"query": { | "query": { | ||||
"multi_match": { | "multi_match": { | ||||
"query": " ".join(value_array), | "query": " ".join(value_array), | ||||
"fields": [ | "fields": [ | ||||
get_expansion("keywords", ".") + "^2", | get_expansion("keywords", ".") + "^2", | ||||
get_expansion("descriptions", "."), | get_expansion("descriptions", "."), | ||||
# "^2" boosts an origin's score by 2x | # "^2" boosts an origin's score by 2x | ||||
# if it the queried keywords are | # if it the queried keywords are | ||||
# found in its intrinsic_metadata.keywords | # found in its jsonld.keywords | ||||
], | ], | ||||
} | } | ||||
}, | }, | ||||
} | } | ||||
} | } | ||||
elif name in ["language", "license"]: | elif name in ["language", "license"]: | ||||
name_mapping = { | name_mapping = { | ||||
"language": "programming_languages", | "language": "programming_languages", | ||||
"license": "licenses", | "license": "licenses", | ||||
} | } | ||||
name = name_mapping[name] | name = name_mapping[name] | ||||
return { | return { | ||||
"nested": { | "nested": { | ||||
"path": "intrinsic_metadata", | "path": "jsonld", | ||||
"query": { | "query": { | ||||
"bool": { | "bool": { | ||||
"should": [ | "should": [ | ||||
{"match": {get_expansion(name, "."): val}} | {"match": {get_expansion(name, "."): val}} | ||||
for val in value_array | for val in value_array | ||||
], | ], | ||||
} | } | ||||
}, | }, | ||||
} | } | ||||
} | } | ||||
if category == "dateFilter": | if category == "dateFilter": | ||||
if name in ["created", "modified", "published"]: | if name in ["created", "modified", "published"]: | ||||
if op in ["=", "!="]: | if op in ["=", "!="]: | ||||
return { | return { | ||||
"nested": { | "nested": { | ||||
"path": "intrinsic_metadata", | "path": "jsonld", | ||||
"query": { | "query": { | ||||
"bool": { | "bool": { | ||||
("must" if op == "=" else "must_not"): [ | ("must" if op == "=" else "must_not"): [ | ||||
{ | { | ||||
"range": { | "range": { | ||||
get_expansion(f"date_{name}", "."): { | get_expansion(f"date_{name}", "."): { | ||||
"gte": value, | "gte": value, | ||||
"lte": value, | "lte": value, | ||||
} | } | ||||
} | } | ||||
} | } | ||||
], | ], | ||||
} | } | ||||
}, | }, | ||||
} | } | ||||
} | } | ||||
return { | return { | ||||
"nested": { | "nested": { | ||||
"path": "intrinsic_metadata", | "path": "jsonld", | ||||
"query": { | "query": { | ||||
"bool": { | "bool": { | ||||
"must": [ | "must": [ | ||||
{ | { | ||||
"range": { | "range": { | ||||
get_expansion(f"date_{name}", "."): { | get_expansion(f"date_{name}", "."): { | ||||
self.RANGE_OPERATOR_MAP[op]: value, | self.RANGE_OPERATOR_MAP[op]: value, | ||||
} | } | ||||
Show All 38 Lines |