diff --git a/vlorentz/cassandra_stream_graph.sh b/vlorentz/cassandra_stream_graph.sh index dbe6637..c80371b 100644 --- a/vlorentz/cassandra_stream_graph.sh +++ b/vlorentz/cassandra_stream_graph.sh @@ -1,22 +1,34 @@ #!/bin/bash set -e +PYTHON=python3 +NB_PARTITIONS=16 +TABLES="content directory directory_entry revision release snapshot snapshot_branch origin_visit origin" +TABLES="revision" + tmp_dir=$(mktemp -td swh-graph-export.XXXXXXXXXX) trap "rm -rf ${tmp_dir}; pkill -P $$" EXIT nodes_fifo="${tmp_dir}/nodes.csv.fifo" edges_fifo="${tmp_dir}/edges.csv.fifo" mkfifo "${nodes_fifo}" mkfifo "${edges_fifo}" -NB_PARTITIONS=16 -for ((partition_id=0;partition_id nodes.csv.gz & +cat "${edges_fifo}" | pigz -c > edges.csv.gz & -cat "${nodes_fifo}" > nodes.csv & -cat "${edges_fifo}" > edges.csv & +for table in ${TABLES}; do + echo "Exporting ${table}" + pids="" + for ((partition_id=0;partition_id