Changeset View
Changeset View
Standalone View
Standalone View
swh/search/elasticsearch.py
Show First 20 Lines • Show All 72 Lines • ▼ Show 20 Lines | def initialize(self) -> None: | ||||
'type': 'keyword', | 'type': 'keyword', | ||||
} | } | ||||
}, | }, | ||||
}, | }, | ||||
} | } | ||||
} | } | ||||
) | ) | ||||
@remote_api_endpoint('flush') | |||||
def flush(self) -> None: | |||||
"""Blocks until all previous calls to _update() are completely | |||||
applied.""" | |||||
self._backend.indices.refresh(index='_all') | |||||
@remote_api_endpoint('origin/update') | @remote_api_endpoint('origin/update') | ||||
def origin_update(self, documents: Iterable[dict]) -> None: | def origin_update(self, documents: Iterable[dict]) -> None: | ||||
documents = map(_sanitize_origin, documents) | documents = map(_sanitize_origin, documents) | ||||
documents_with_sha1 = ((origin_identifier(document), document) | documents_with_sha1 = ((origin_identifier(document), document) | ||||
for document in documents) | for document in documents) | ||||
actions = [ | actions = [ | ||||
{ | { | ||||
'_op_type': 'update', | '_op_type': 'update', | ||||
'_id': sha1, | '_id': sha1, | ||||
'_index': 'origin', | '_index': 'origin', | ||||
'doc': { | 'doc': { | ||||
**document, | **document, | ||||
'sha1': sha1, | 'sha1': sha1, | ||||
}, | }, | ||||
'doc_as_upsert': True, | 'doc_as_upsert': True, | ||||
} | } | ||||
for (sha1, document) in documents_with_sha1 | for (sha1, document) in documents_with_sha1 | ||||
] | ] | ||||
# TODO: make refresh='wait_for' configurable (we don't need it | bulk(self._backend, actions, index='origin') | ||||
# in production, it will probably be a performance issue) | |||||
bulk(self._backend, actions, index='origin', refresh='wait_for') | |||||
def origin_dump(self) -> Iterator[model.Origin]: | def origin_dump(self) -> Iterator[model.Origin]: | ||||
"""Returns all content in Elasticsearch's index. Not exposed | """Returns all content in Elasticsearch's index. Not exposed | ||||
publicly; but useful for tests.""" | publicly; but useful for tests.""" | ||||
results = scan(self._backend, index='*') | results = scan(self._backend, index='*') | ||||
for hit in results: | for hit in results: | ||||
yield self._backend.termvectors( | yield self._backend.termvectors( | ||||
index='origin', id=hit['_id'], | index='origin', id=hit['_id'], | ||||
▲ Show 20 Lines • Show All 116 Lines • Show Last 20 Lines |