Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Paste
P522
swh-web origin requests
Active
Public
Actions
Authored by
anlambert
on Sep 9 2019, 2:53 PM.
Edit Paste
Archive Paste
View Raw File
Subscribe
Mute Notifications
Award Token
Flag For Later
Tags
None
Subscribers
None
import
json
import
requests
from
pprint
import
pprint
def
print_requested_urls
(
response
):
results
=
json
.
loads
(
response
.
text
)
for
result
in
results
[
'hits'
][
'hits'
]:
print
(
result
[
'_source'
][
'swh_atoms_u'
])
return
results
query
=
{
'size'
:
1000
,
'sort'
:
[
'@timestamp'
],
'query'
:
{
'bool'
:{
'must'
:
[
{
'query_string'
:
{
'query'
:
'hostname:moma AND systemd_unit:"gunicorn-swh-webapp.service"'
,
'analyze_wildcard'
:
True
,
'default_field'
:
'*'
}
},
{
'match_phrase'
:
{
'hostname'
:
{
'query'
:
'moma'
}
}
},
{
'match_phrase'
:
{
'systemd_unit'
:
{
'query'
:
'gunicorn-swh-webapp.service'
}
}
},
{
'query_string'
:
{
# get requests to origin endpoints, discarding pid resolving one
'query'
:
'origin AND NOT swh'
,
'default_field'
:
'swh_atoms_u'
}
},
{
'range'
:{
'@timestamp'
:
{
'gte'
:
1536487687063
,
'lte'
:
1568023687063
,
'format'
:
'epoch_millis'
}
}
}
]
}
}
}
es_url
=
'http://esnode1.internal.softwareheritage.org:9200/systemlogs-*/_search?scroll=1m'
response
=
requests
.
post
(
es_url
,
json
=
query
)
results
=
print_requested_urls
(
response
)
while
results
[
'hits'
][
'hits'
]:
query
=
{
'scroll'
:
'1m'
,
'scroll_id'
:
results
[
'_scroll_id'
]
}
es_url
=
'http://esnode1.internal.softwareheritage.org:9200/_search/scroll'
response
=
requests
.
post
(
es_url
,
json
=
query
)
results
=
print_requested_urls
(
response
)
Event Timeline
anlambert
created this paste.
Sep 9 2019, 2:53 PM
2019-09-09 14:53:05 (UTC+2)
anlambert
updated the paste's language from
autodetect
to
python
.
anlambert
mentioned this in
D1969: Remove all API endpoints using an origin id.
.
Sep 9 2019, 3:11 PM
2019-09-09 15:11:46 (UTC+2)
Log In to Comment