Changeset View
Changeset View
Standalone View
Standalone View
swh/search/translator.py
Show First 20 Lines • Show All 64 Lines • ▼ Show 20 Lines | def _traverse(self, node): | ||||
if len(node.children) == 3: | if len(node.children) == 3: | ||||
# filters => filters conj_op filters | # filters => filters conj_op filters | ||||
filters1 = self._traverse(node.children[0]) | filters1 = self._traverse(node.children[0]) | ||||
conj_op = self._get_value(node.children[1]) | conj_op = self._get_value(node.children[1]) | ||||
filters2 = self._traverse(node.children[2]) | filters2 = self._traverse(node.children[2]) | ||||
if conj_op == "and": | if conj_op == "and": | ||||
# "must" is equivalent to "AND" | |||||
return {"bool": {"must": [filters1, filters2]}} | return {"bool": {"must": [filters1, filters2]}} | ||||
if conj_op == "or": | if conj_op == "or": | ||||
# "should" is equivalent to "OR" | |||||
return {"bool": {"should": [filters1, filters2]}} | return {"bool": {"should": [filters1, filters2]}} | ||||
if node.type == "filter": | if node.type == "filter": | ||||
filter_category = node.children[0] | filter_category = node.children[0] | ||||
return self._parse_filter(filter_category) | return self._parse_filter(filter_category) | ||||
if node.type == "sortBy": | if node.type == "sortBy": | ||||
return self._parse_filter(node) | return self._parse_filter(node) | ||||
Show All 16 Lines | def _get_value(self, node): | ||||
return [self._get_value(child) for child in node.children if child.is_named] | return [self._get_value(child) for child in node.children if child.is_named] | ||||
start = node.start_point[1] | start = node.start_point[1] | ||||
end = node.end_point[1] | end = node.end_point[1] | ||||
value = self.query[start:end] | value = self.query[start:end] | ||||
if len(value) > 1 and ( | if len(value) > 1 and ( | ||||
(value[0] == "'" and value[1] == "'") or (value[0] and value[-1] == '"') | (value[0] == "'" and value[-1] == "'") or (value[0] and value[-1] == '"') | ||||
): | ): | ||||
return value[1:-1] | return value[1:-1] | ||||
if node.type in ["number", "numberVal"]: | if node.type in ["number", "numberVal"]: | ||||
return int(value) | return int(value) | ||||
# TODO: Escape special characters (`'`, `"` and `\`) | |||||
return value | return value | ||||
def _parse_filter(self, filter): | def _parse_filter(self, filter): | ||||
if filter.type == "boundedListFilter": | if filter.type == "boundedListFilter": | ||||
filter = filter.children[0] | filter = filter.children[0] | ||||
children = filter.children | children = filter.children | ||||
Show All 18 Lines | def _parse_filter(self, filter): | ||||
} | } | ||||
elif name == "metadata": | elif name == "metadata": | ||||
return { | return { | ||||
"nested": { | "nested": { | ||||
"path": "intrinsic_metadata", | "path": "intrinsic_metadata", | ||||
"query": { | "query": { | ||||
"multi_match": { | "multi_match": { | ||||
"query": value, | "query": value, | ||||
# Makes it so that the "foo bar" query returns | |||||
# documents which contain "foo" in a field and "bar" | |||||
# in a different field | |||||
"type": "cross_fields", | "type": "cross_fields", | ||||
# All keywords must be found in a document for it to | |||||
# be considered a match. | |||||
# TODO: allow missing keywords? | |||||
"operator": "and", | "operator": "and", | ||||
# Searches on all fields of the intrinsic_metadata dict, | |||||
# recursively. | |||||
"fields": ["intrinsic_metadata.*"], | "fields": ["intrinsic_metadata.*"], | ||||
# date{Created,Modified,Published} are of type date | |||||
"lenient": True, | "lenient": True, | ||||
} | } | ||||
}, | }, | ||||
} | } | ||||
} | } | ||||
if category == "booleanFilter": | if category == "booleanFilter": | ||||
if name == "visited": | if name == "visited": | ||||
Show All 26 Lines | def _parse_filter(self, filter): | ||||
"nested": { | "nested": { | ||||
"path": "intrinsic_metadata", | "path": "intrinsic_metadata", | ||||
"query": { | "query": { | ||||
"multi_match": { | "multi_match": { | ||||
"query": " ".join(value_array), | "query": " ".join(value_array), | ||||
"fields": [ | "fields": [ | ||||
get_expansion("keywords", ".") + "^2", | get_expansion("keywords", ".") + "^2", | ||||
get_expansion("descriptions", "."), | get_expansion("descriptions", "."), | ||||
# "^2" boosts an origin's score by 2x | |||||
# if it the queried keywords are | |||||
# found in its intrinsic_metadata.keywords | |||||
], | ], | ||||
} | } | ||||
}, | }, | ||||
} | } | ||||
} | } | ||||
elif name in ["language", "license"]: | elif name in ["language", "license"]: | ||||
name_mapping = { | name_mapping = { | ||||
"language": "programming_languages", | "language": "programming_languages", | ||||
▲ Show 20 Lines • Show All 88 Lines • Show Last 20 Lines |