Changeset View
Changeset View
Standalone View
Standalone View
swh/graph/cli.py
# Copyright (C) 2019 The Software Heritage developers | # Copyright (C) 2019 The Software Heritage developers | ||||
# See the AUTHORS file at the top-level directory of this distribution | # See the AUTHORS file at the top-level directory of this distribution | ||||
# License: GNU General Public License version 3, or any later version | # License: GNU General Public License version 3, or any later version | ||||
# See top-level LICENSE file for more information | # See top-level LICENSE file for more information | ||||
import aiohttp | import aiohttp | ||||
import click | import click | ||||
import logging | |||||
import sys | import sys | ||||
from pathlib import Path | from pathlib import Path | ||||
from typing import Any, Dict, Tuple | from typing import Any, Dict, Tuple | ||||
import swh.model.exceptions | import swh.model.exceptions | ||||
from swh.core import config | from swh.core import config | ||||
▲ Show 20 Lines • Show All 155 Lines • ▼ Show 20 Lines | with open(filename, 'wb') as f: | ||||
NodeToPidMap.write_record(f, pid) | NodeToPidMap.write_record(f, pid) | ||||
else: | else: | ||||
raise ValueError('invalid map type: ' + map_type) | raise ValueError('invalid map type: ' + map_type) | ||||
@map.command('lookup') | @map.command('lookup') | ||||
@click.option('--graph', '-g', required=True, metavar='GRAPH', | @click.option('--graph', '-g', required=True, metavar='GRAPH', | ||||
help='compressed graph basename') | help='compressed graph basename') | ||||
@click.argument('identifier', required=True) | @click.argument('identifiers', nargs=-1) | ||||
def map_lookup(graph, identifier): | def map_lookup(graph, identifiers): | ||||
"""Lookup an identifier using on-disk maps. | """Lookup identifiers using on-disk maps. | ||||
Depending on the identifier type lookup either a PID into a PID->node (and | Depending on the identifier type lookup either a PID into a PID->node (and | ||||
return the node integer identifier) or, vice-versa, lookup a node integer | return the node integer identifier) or, vice-versa, lookup a node integer | ||||
identifier into a node->PID (and return the PID). The desired behavior is | identifier into a node->PID (and return the PID). The desired behavior is | ||||
chosen depending on the syntax of the given identifier. | chosen depending on the syntax of each given identifier. | ||||
Identifiers can be passed either directly on the command line or on | |||||
standard input, separate by blanks. Logical lines (as returned by | |||||
readline()) in stdin will be preserved in stdout. | |||||
""" | """ | ||||
success = True # no identifiers failed to be looked up | |||||
def lookup(identifier): | |||||
nonlocal success | |||||
is_pid = None | is_pid = None | ||||
try: | try: | ||||
int(identifier) | int(identifier) | ||||
is_pid = False | is_pid = False | ||||
except ValueError: | except ValueError: | ||||
try: | try: | ||||
parse_persistent_identifier(identifier) | parse_persistent_identifier(identifier) | ||||
is_pid = True | is_pid = True | ||||
except swh.model.exceptions.ValidationError: | except swh.model.exceptions.ValidationError: | ||||
raise ValueError(f'invalid identifier: {identifier}') | success = False | ||||
logging.error(f'invalid identifier: {identifier}') | |||||
raise ValueError | |||||
try: | |||||
if is_pid: | if is_pid: | ||||
seirl: Isn't there an overhead to the `mmap` call here? Couldn't the mappings be created in the… | |||||
print(PidToNodeMap(f'{graph}.{PID2NODE_EXT}')[identifier]) | return PidToNodeMap(f'{graph}.{PID2NODE_EXT}')[identifier] | ||||
else: | else: | ||||
print(NodeToPidMap(f'{graph}.{NODE2PID_EXT}')[int(identifier)]) | return NodeToPidMap(f'{graph}.{NODE2PID_EXT}')[int(identifier)] | ||||
except KeyError: | |||||
success = False | |||||
logging.error(f'identifier not found: {identifier}') | |||||
raise ValueError | |||||
if identifiers: # lookup identifiers passed via CLI | |||||
for identifier in identifiers: | |||||
try: | |||||
print(lookup(identifier)) | |||||
except ValueError: | |||||
pass | |||||
else: # lookup identifiers passed via stdin, preserving logical lines | |||||
line = sys.stdin.readline() # walrus, we miss you ! | |||||
Done Inline Actionsfor line in sys.stdin: seirl: `for line in sys.stdin:` | |||||
Done Inline Actions(WTH was I thinking?) zack: (WTH was I thinking?) | |||||
while (line): | |||||
for identifier in line.rstrip().split(): | |||||
try: | |||||
Done Inline ActionsYou're going to get a trailing space on all lines here, it might be significant at some point. It should be possible to str.join instead? seirl: You're going to get a trailing space on all lines here, it might be significant at some point. | |||||
Done Inline ActionsDone. Note that, as a consequence of this change, logical lines that contain non resolvable IDs will become ambiguous in the output feed (e.g., if you 4 IDs and one failed to resolve, you cannot determine which one failed to in the output). IMO that's fine, as inputs with non resolvable IDs are doomed to fail anyway. zack: Done.
Note that, as a consequence of this change, logical lines that contain non resolvable… | |||||
print('{} '.format(lookup(identifier)), end='') | |||||
except ValueError: | |||||
pass | |||||
print() | |||||
line = sys.stdin.readline() | |||||
sys.exit(0 if success else 1) | |||||
@cli.command(name='rpc-serve') | @cli.command(name='rpc-serve') | ||||
@click.option('--host', '-h', default='0.0.0.0', | @click.option('--host', '-h', default='0.0.0.0', | ||||
metavar='IP', show_default=True, | metavar='IP', show_default=True, | ||||
help='host IP address to bind the server on') | help='host IP address to bind the server on') | ||||
@click.option('--port', '-p', default=5009, type=click.INT, | @click.option('--port', '-p', default=5009, type=click.INT, | ||||
metavar='PORT', show_default=True, | metavar='PORT', show_default=True, | ||||
▲ Show 20 Lines • Show All 53 Lines • Show Last 20 Lines |
Isn't there an overhead to the mmap call here? Couldn't the mappings be created in the closure instead maybe?