Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9345701
D2288.id7869.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
9 KB
Subscribers
None
D2288.id7869.diff
View Options
diff --git a/swh/graph/backend.py b/swh/graph/backend.py
--- a/swh/graph/backend.py
+++ b/swh/graph/backend.py
@@ -6,9 +6,7 @@
import asyncio
import contextlib
import io
-import logging
import os
-import pathlib
import struct
import subprocess
import sys
@@ -16,6 +14,7 @@
from py4j.java_gateway import JavaGateway
+from swh.graph.config import check_config
from swh.graph.pid import NodeToPidMap, PidToNodeMap
from swh.model.identifiers import PID_TYPES
@@ -26,30 +25,6 @@
PID2NODE_EXT = 'pid2node.bin'
-def find_graph_jar():
- """find swh-graph.jar, containing the Java part of swh-graph
-
- look both in development directories and installed data (for in-production
- deployments who fecthed the JAR from pypi)
-
- """
- swh_graph_root = pathlib.Path(__file__).parents[2]
- try_paths = [
- swh_graph_root / 'java/target/',
- pathlib.Path(sys.prefix) / 'share/swh-graph/',
- pathlib.Path(sys.prefix) / 'local/share/swh-graph/',
- ]
- for path in try_paths:
- glob = list(path.glob('swh-graph-*.jar'))
- if glob:
- if len(glob) > 1:
- logging.warn('found multiple swh-graph JARs, '
- 'arbitrarily picking one')
- logging.info('using swh-graph JAR: {0}'.format(glob[0]))
- return str(glob[0])
- raise RuntimeError('swh-graph JAR not found. Have you run `make java`?')
-
-
def _get_pipe_stderr():
# Get stderr if possible, or pipe to stdout if running with Jupyter.
try:
@@ -61,28 +36,17 @@
class Backend:
- def __init__(self, graph_path):
+ def __init__(self, graph_path, config=None):
self.gateway = None
self.entry = None
self.graph_path = graph_path
+ self.config = check_config(config or {})
def __enter__(self):
- # TODO: make all of that configurable with sane defaults
- java_opts = [
- '-Xmx200G',
- '-server',
- '-XX:PretenureSizeThreshold=512M',
- '-XX:MaxNewSize=4G',
- '-XX:+UseLargePages',
- '-XX:+UseTransparentHugePages',
- '-XX:+UseNUMA',
- '-XX:+UseTLAB',
- '-XX:+ResizeTLAB',
- ]
self.gateway = JavaGateway.launch_gateway(
java_path=None,
- javaopts=java_opts,
- classpath=find_graph_jar(),
+ javaopts=self.config['java_tool_options'].split(),
+ classpath=self.config['classpath'],
die_on_exit=True,
redirect_stdout=sys.stdout,
redirect_stderr=_get_pipe_stderr(),
diff --git a/swh/graph/cli.py b/swh/graph/cli.py
--- a/swh/graph/cli.py
+++ b/swh/graph/cli.py
@@ -215,7 +215,7 @@
@click.pass_context
def serve(ctx, host, port, graph):
"""run the graph REST service"""
- backend = Backend(graph_path=graph)
+ backend = Backend(graph_path=graph, config=ctx.obj['config'])
app = make_app(backend=backend)
with backend:
diff --git a/swh/graph/config.py b/swh/graph/config.py
new file mode 100644
--- /dev/null
+++ b/swh/graph/config.py
@@ -0,0 +1,104 @@
+# Copyright (C) 2019 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import logging
+import psutil
+import sys
+from pathlib import Path
+
+
+def find_graph_jar():
+ """find swh-graph.jar, containing the Java part of swh-graph
+
+ look both in development directories and installed data (for in-production
+ deployments who fecthed the JAR from pypi)
+
+ """
+ swh_graph_root = Path(__file__).parents[2]
+ try_paths = [
+ swh_graph_root / 'java/target/',
+ Path(sys.prefix) / 'share/swh-graph/',
+ Path(sys.prefix) / 'local/share/swh-graph/',
+ ]
+ for path in try_paths:
+ glob = list(path.glob('swh-graph-*.jar'))
+ if glob:
+ if len(glob) > 1:
+ logging.warn('found multiple swh-graph JARs, '
+ 'arbitrarily picking one')
+ logging.info('using swh-graph JAR: {0}'.format(glob[0]))
+ return str(glob[0])
+ raise RuntimeError('swh-graph JAR not found. Have you run `make java`?')
+
+
+def check_config(conf):
+ """check configuration and propagate defaults
+ """
+ conf = conf.copy()
+ if 'batch_size' not in conf:
+ conf['batch_size'] = '1000000000' # 1 billion
+ if 'max_ram' not in conf:
+ conf['max_ram'] = str(psutil.virtual_memory().total)
+ if 'java_tool_options' not in conf:
+ conf['java_tool_options'] = ' '.join([
+ '-Xmx{max_ram}',
+ '-XX:PretenureSizeThreshold=512M',
+ '-XX:MaxNewSize=4G',
+ '-XX:+UseLargePages',
+ '-XX:+UseTransparentHugePages',
+ '-XX:+UseNUMA',
+ '-XX:+UseTLAB',
+ '-XX:+ResizeTLAB',
+ ])
+ conf['java_tool_options'] = conf['java_tool_options'].format(
+ max_ram=conf['max_ram'])
+ if 'java' not in conf:
+ conf['java'] = 'java'
+ if 'classpath' not in conf:
+ conf['classpath'] = find_graph_jar()
+
+ return conf
+
+
+def check_config_compress(config, graph_name, in_dir, out_dir):
+ """check compression-specific configuration and initialize its execution
+ environment.
+ """
+ conf = check_config(config)
+
+ conf['graph_name'] = graph_name
+ conf['in_dir'] = str(in_dir)
+ conf['out_dir'] = str(out_dir)
+ out_dir.mkdir(parents=True, exist_ok=True)
+ if 'tmp_dir' not in conf:
+ tmp_dir = out_dir / 'tmp'
+ conf['tmp_dir'] = str(tmp_dir)
+ else:
+ tmp_dir = Path(conf['tmp_dir'])
+ tmp_dir.mkdir(parents=True, exist_ok=True)
+
+ if 'logback' not in conf:
+ logback_confpath = tmp_dir / 'logback.xml'
+ with open(logback_confpath, 'w') as conffile:
+ conffile.write("""
+<configuration>
+ <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
+ <encoder>
+ <pattern>%d %r %p [%t] %logger{1} - %m%n</pattern>
+ </encoder>
+ </appender>
+ <root level="INFO">
+ <appender-ref ref="STDOUT"/>
+ </root>
+</configuration>
+""")
+ conf['logback'] = str(logback_confpath)
+
+ conf['java_tool_options'] += ' -Dlogback.configurationFile={logback}'
+ conf['java_tool_options'] = conf['java_tool_options'].format(
+ logback=conf['logback'])
+
+ print(conf)
+ return conf
diff --git a/swh/graph/tests/test_cli.py b/swh/graph/tests/test_cli.py
--- a/swh/graph/tests/test_cli.py
+++ b/swh/graph/tests/test_cli.py
@@ -43,7 +43,6 @@
graph:
compress:
batch_size: 1000
- java_tool_options: -Dlogback.configurationFile={logback}
""")
tmpconf.close()
self.conffile = Path(tmpconf.name)
diff --git a/swh/graph/webgraph.py b/swh/graph/webgraph.py
--- a/swh/graph/webgraph.py
+++ b/swh/graph/webgraph.py
@@ -15,12 +15,9 @@
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Set
-
-import psutil
-
from click import ParamType
-from swh.graph.backend import find_graph_jar
+from swh.graph.config import check_config_compress
class CompressionStep(Enum):
@@ -162,60 +159,6 @@
return rc
-def check_config(conf, graph_name, in_dir, out_dir):
- """check compression configuration, propagate defaults, and initialize
- execution environment
-
- """
- conf = conf.copy()
- conf['graph_name'] = graph_name
- conf['in_dir'] = str(in_dir)
- conf['out_dir'] = str(out_dir)
- out_dir.mkdir(parents=True, exist_ok=True)
- if 'tmp_dir' not in conf:
- tmp_dir = out_dir / 'tmp'
- conf['tmp_dir'] = str(tmp_dir)
- else:
- tmp_dir = Path(conf['tmp_dir'])
- tmp_dir.mkdir(parents=True, exist_ok=True)
- if 'batch_size' not in conf:
- conf['batch_size'] = '1000000000' # 1 billion
- if 'logback' not in conf:
- logback_confpath = tmp_dir / 'logback.xml'
- with open(logback_confpath, 'w') as conffile:
- conffile.write("""
-<configuration>
- <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
- <encoder>
- <pattern>%d %r %p [%t] %logger{1} - %m%n</pattern>
- </encoder>
- </appender>
- <root level="INFO">
- <appender-ref ref="STDOUT"/>
- </root>
-</configuration>
-""")
- conf['logback'] = str(logback_confpath)
- if 'max_ram' not in conf:
- conf['max_ram'] = str(psutil.virtual_memory().total)
- if 'java_tool_options' not in conf:
- assert 'logback' in conf
- conf['java_tool_options'] = ' '.join([
- '-Xmx{max_ram}', '-XX:PretenureSizeThreshold=512M',
- '-XX:MaxNewSize=4G', '-XX:+UseLargePages',
- '-XX:+UseTransparentHugePages', '-XX:+UseNUMA', '-XX:+UseTLAB',
- '-XX:+ResizeTLAB', '-Dlogback.configurationFile={logback}'
- ])
- conf['java_tool_options'] = conf['java_tool_options'].format(
- max_ram=conf['max_ram'], logback=conf['logback'])
- if 'java' not in conf:
- conf['java'] = 'java'
- if 'classpath' not in conf:
- conf['classpath'] = find_graph_jar()
-
- return conf
-
-
def compress(graph_name: str, in_dir: Path, out_dir: Path,
steps: Set[CompressionStep] = set(COMP_SEQ),
conf: Dict[str, str] = {}):
@@ -248,7 +191,7 @@
if not steps:
steps = set(COMP_SEQ)
- conf = check_config(conf, graph_name, in_dir, out_dir)
+ conf = check_config_compress(conf, graph_name, in_dir, out_dir)
compression_start_time = datetime.now()
logging.info(f'starting compression at {compression_start_time}')
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Thu, Jul 3, 3:28 PM (1 w, 2 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3218546
Attached To
D2288: graph backend: use global config
Event Timeline
Log In to Comment