Page MenuHomeSoftware Heritage
Paste P563

dump_elasticsearch.py
ActivePublic

Authored by vlorentz on Nov 25 2019, 2:07 PM.
import json
from pprint import pprint
import re
import elasticsearch
import elasticsearch.helpers
query = {
"query": {
"bool": {
"must": [
{
"match": {
"systemd_unit": {
"query": "gunicorn-swh-webapp.service"
}
}
},
{
"match": {
"beat.hostname": {
"query": "moma"
}
}
},
]
}
}
}
client = elasticsearch.Elasticsearch(hosts=[
'esnode1.internal.softwareheritage.org',
'esnode2.internal.softwareheritage.org',
'esnode3.internal.softwareheritage.org'])
print('[')
first_item = True
for result in elasticsearch.helpers.scan(client, query, index='systemlogs-2019.11.*'):
if 'gunicorn.access' not in result['_source']['message']:
continue
if not first_item:
print(',')
else:
first_item = False
print(json.dumps(result))
print(']')