Page MenuHomeSoftware Heritage

D8561.id30894.diff
No OneTemporary

D8561.id30894.diff

diff --git a/swh/graph/cli.py b/swh/graph/cli.py
--- a/swh/graph/cli.py
+++ b/swh/graph/cli.py
@@ -1,9 +1,11 @@
-# Copyright (C) 2019-2020 The Software Heritage developers
+# Copyright (C) 2019-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
+import logging
from pathlib import Path
+import shlex
from typing import TYPE_CHECKING, Any, Dict, Set, Tuple
# WARNING: do not import unnecessary things here to keep cli startup time under
@@ -16,6 +18,8 @@
if TYPE_CHECKING:
from swh.graph.webgraph import CompressionStep # noqa
+logger = logging.getLogger(__name__)
+
class StepOption(click.ParamType):
"""click type for specifying a compression step on the CLI
@@ -171,13 +175,17 @@
config = ctx.obj["config"]
config.setdefault("graph", {})
config["graph"]["path"] = graph
+
+ logger.debug("Building gPRC server command line")
cmd, port = build_grpc_server_cmdline(**config["graph"])
java_bin = cmd[0]
if java_home is not None:
java_bin = str(Path(java_home) / "bin" / java_bin)
- print(f"Starting the GRPC server on 0.0.0.0:{port}")
+ # XXX: shlex.join() is in 3.8
+ # logger.info("Starting gRPC server: %s", shlex.join(cmd))
+ logger.info("Starting gRPC server: %s", " ".join(shlex.quote(x) for x in cmd))
os.execvp(java_bin, cmd)
diff --git a/swh/graph/config.py b/swh/graph/config.py
--- a/swh/graph/config.py
+++ b/swh/graph/config.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2019 The Software Heritage developers
+# Copyright (C) 2019-2022 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -9,6 +9,8 @@
import psutil
+logger = logging.getLogger(__name__)
+
def find_graph_jar():
"""find swh-graph.jar, containing the Java part of swh-graph
@@ -17,6 +19,7 @@
deployments who fecthed the JAR from pypi)
"""
+ logger.debug("Looking for swh-graph JAR")
swh_graph_root = Path(__file__).parents[2]
try_paths = [
swh_graph_root / "java/target/",
@@ -24,13 +27,14 @@
Path(sys.prefix) / "local/share/swh-graph/",
]
for path in try_paths:
+ logger.debug("Looking for swh-graph JAR in %s", path)
glob = list(path.glob("swh-graph-*.jar"))
if glob:
if len(glob) > 1:
- logging.warning(
+ logger.warning(
"found multiple swh-graph JARs, " "arbitrarily picking one"
)
- logging.info("using swh-graph JAR: {0}".format(glob[0]))
+ logger.info("using swh-graph JAR: {0}".format(glob[0]))
return str(glob[0])
raise RuntimeError("swh-graph JAR not found. Have you run `make java`?")
@@ -42,10 +46,13 @@
# Use 0.1% of the RAM as a batch size:
# ~1 billion for big servers, ~10 million for small desktop machines
conf["batch_size"] = min(int(psutil.virtual_memory().total / 1000), 2**30 - 1)
+ logger.debug("batch_size not configured, defaulting to %s", conf["batch_size"])
if "llp_gammas" not in conf:
conf["llp_gammas"] = "-0,-1,-2,-3,-4"
+ logger.debug("llp_gammas not configured, defaulting to %s", conf["llp_gammas"])
if "max_ram" not in conf:
conf["max_ram"] = str(int(psutil.virtual_memory().total * 0.9))
+ logger.debug("max_ram not configured, defaulting to %s", conf["max_ram"])
if "java_tool_options" not in conf:
conf["java_tool_options"] = " ".join(
[
@@ -59,6 +66,10 @@
"-XX:+ResizeTLAB",
]
)
+ logger.debug(
+ "java_tool_options not providing, defaulting to %s",
+ conf["java_tool_options"],
+ )
conf["java_tool_options"] = conf["java_tool_options"].format(
max_ram=conf["max_ram"]
)
diff --git a/swh/graph/grpc_server.py b/swh/graph/grpc_server.py
--- a/swh/graph/grpc_server.py
+++ b/swh/graph/grpc_server.py
@@ -16,12 +16,17 @@
from swh.graph.config import check_config
+logger = logging.getLogger(__name__)
+
def build_grpc_server_cmdline(**config):
port = config.pop("port", None)
if port is None:
port = aiohttp.test_utils.unused_port()
+ logger.debug("Port not configured, using random port %s", port)
+ logger.debug("Checking configuration and populating default values")
config = check_config(config)
+ logger.debug("Configuration: %r", config)
cmd = [
"java",
"--class-path",
@@ -39,8 +44,8 @@
cmd, port = build_grpc_server_cmdline(**config)
print(cmd)
# XXX: shlex.join() is in 3.8
- # logging.info("Starting RPC server: %s", shlex.join(cmd))
- logging.info("Starting GRPC server: %s", " ".join(shlex.quote(x) for x in cmd))
+ # logger.info("Starting gRPC server: %s", shlex.join(cmd))
+ logger.info("Starting gRPC server: %s", " ".join(shlex.quote(x) for x in cmd))
server = subprocess.Popen(cmd)
return server, port
@@ -50,5 +55,5 @@
try:
server.wait(timeout=timeout)
except subprocess.TimeoutExpired:
- logging.warning("Server did not terminate, sending kill signal...")
+ logger.warning("Server did not terminate, sending kill signal...")
server.kill()

File Metadata

Mime Type
text/plain
Expires
Thu, Dec 19, 2:47 PM (20 h, 50 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3219925

Event Timeline