Page MenuHomeSoftware Heritage
Paste P522

swh-web origin requests
ActivePublic

Authored by anlambert on Sep 9 2019, 2:53 PM.
import json
import requests
from pprint import pprint
def print_requested_urls(response):
results = json.loads(response.text)
for result in results['hits']['hits']:
print(result['_source']['swh_atoms_u'])
return results
query = {
'size' : 1000,
'sort': ['@timestamp'],
'query': {
'bool':{
'must': [
{
'query_string': {
'query':'hostname:moma AND systemd_unit:"gunicorn-swh-webapp.service"',
'analyze_wildcard': True,
'default_field':'*'
}
},
{
'match_phrase': {
'hostname': {
'query':'moma'
}
}
},
{
'match_phrase': {
'systemd_unit': {
'query':'gunicorn-swh-webapp.service'
}
}
},
{
'query_string': {
# get requests to origin endpoints, discarding pid resolving one
'query':'origin AND NOT swh',
'default_field' : 'swh_atoms_u'
}
},
{
'range':{
'@timestamp': {
'gte':1536487687063,
'lte':1568023687063,
'format':'epoch_millis'
}
}
}
]
}
}
}
es_url = 'http://esnode1.internal.softwareheritage.org:9200/systemlogs-*/_search?scroll=1m'
response = requests.post(es_url, json=query)
results = print_requested_urls(response)
while results['hits']['hits']:
query = {
'scroll': '1m',
'scroll_id': results['_scroll_id']
}
es_url = 'http://esnode1.internal.softwareheritage.org:9200/_search/scroll'
response = requests.post(es_url, json=query)
results = print_requested_urls(response)