diff --git a/docker/README.rst b/docker/README.rst --- a/docker/README.rst +++ b/docker/README.rst @@ -558,6 +558,8 @@ (swh) ~/swh-environment$ swh scheduler task respawn 1 +.. _docker-persistence: + Data persistence for a development setting ------------------------------------------ @@ -695,6 +697,38 @@ ~/swh-environment/docker$ docker-compose -f docker-compose.yml -f docker-compose.counters.yml up -d [...] + +Efficient graph traversals +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:ref:`swh-graph ` is a work-in-progress alternative to swh-storage +to perform large graph traversals/queries on the merkle DAG. + +For example, it can be used by the vault, as it needs to query all objects +in the sub-DAG of a given node. + +You can use it with:: + + ~/swh-environment/docker$ docker-compose -f docker-compose.yml -f docker-compose.graph.yml up -d + +On the first start, it will run some precomputation based on all objects already +in your local SWH instance; so it may take a long time if you loaded many +repositories. (Expect 5 to 10s per repository.) + +It **does not update automatically** when you load new repositories. +You need to restart it every time you want to update it. + +You can :ref:`mount a docker volume ` on +:file:`/srv/softwareheritage/graph` to avoid recomputing this graph +on every start. +Then, you need to explicitly request recomputing the graph before restarts +if you want to update it:: + + ~/swh-environment/docker$ docker-compose -f docker-compose.yml -f docker-compose.graph.yml run swh-graph update + ~/swh-environment/docker$ docker-compose -f docker-compose.yml -f docker-compose.graph.yml stop swh-graph + ~/swh-environment/docker$ docker-compose -f docker-compose.yml -f docker-compose.graph.yml up swh-graph -d + + Keycloak ^^^^^^^^ @@ -713,6 +747,7 @@ All emails sent by Keycloak can be easily read from the MailHog Web UI located at http://localhost:8025/. + Using Sentry ------------ diff --git a/docker/services/swh-graph/entrypoint.sh b/docker/services/swh-graph/entrypoint.sh --- a/docker/services/swh-graph/entrypoint.sh +++ b/docker/services/swh-graph/entrypoint.sh @@ -7,20 +7,30 @@ DATADIR=/srv/softwareheritage/graph +update_graph() { + mkdir -p $DATADIR/ + rm -rf $DATADIR/* # cleanup results from previous runs + mkdir $DATADIR/g/ + echo "Exporting edges and nodes" + swh dataset -C $SWH_CONFIG_FILENAME graph export $DATADIR/g --processes=4 + echo "Sorting edges and nodes" + swh dataset graph sort $DATADIR/g/edges + echo "Compressing graph" + swh graph compress --graph $DATADIR/g/edges/graph --outdir $DATADIR/compressed +} + case "$1" in "shell") exec bash -i ;; + "update") + update_graph + ;; *) - mkdir -p $DATADIR/ - rm -rf $DATADIR/* # cleanup results from previous runs - mkdir $DATADIR/g/ - echo "Exporting edges and nodes" - swh dataset -C $SWH_CONFIG_FILENAME graph export $DATADIR/g --processes=4 - echo "Sorting edges and nodes" - swh dataset graph sort $DATADIR/g/edges - echo "Compressing graph" - swh graph compress --graph $DATADIR/g/edges/graph --outdir $DATADIR/compressed + if [[ ! -d $DATADIR/compressed ]] ; then + # Generate the graph if it wasn't already + update_graph + fi echo "Starting the swh-graph API server" exec gunicorn --bind 0.0.0.0:5009 \ --worker-class aiohttp.worker.GunicornWebWorker \