diff --git a/api/server/src/test/java/org/softwareheritage/graph/datasets/.gitignore b/api/server/src/test/java/org/softwareheritage/graph/datasets/.gitignore new file mode 100644 index 0000000..a1bf9ba --- /dev/null +++ b/api/server/src/test/java/org/softwareheritage/graph/datasets/.gitignore @@ -0,0 +1,12 @@ +dockerfiles/ +*.nodes.csv +*.csv.gz + +dir_to_dir/ +dir_to_file/ +dir_to_rev/ +origin_to_snapshot/ +release_to_obj/ +rev_to_dir/ +rev_to_rev/ +snapshot_to_obj/ diff --git a/api/server/src/test/java/org/softwareheritage/graph/datasets/generate_datasets.sh b/api/server/src/test/java/org/softwareheritage/graph/datasets/generate_datasets.sh new file mode 100755 index 0000000..8d08972 --- /dev/null +++ b/api/server/src/test/java/org/softwareheritage/graph/datasets/generate_datasets.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +# Build Docker work environment +toplevel_dir=`git rev-parse --show-toplevel` +mkdir -p dockerfiles +cp $toplevel_dir/compression/{compress_graph.sh,Dockerfile} dockerfiles/ +docker build --tag swh-graph-test dockerfiles + +# Compress each existing dataset +for dataset in dir_to_dir dir_to_file dir_to_rev origin_to_snapshot \ + release_to_obj rev_to_dir rev_to_rev snapshot_to_obj; do + if [ -f "$dataset.edges.csv" ] ; then + # Setup input for compression script + tr ' ' '\n' < $dataset.edges.csv | sort -u > $dataset.nodes.csv + gzip --force --keep $dataset.edges.csv + gzip --force --keep $dataset.nodes.csv + + echo "Compressing $dataset..." + mkdir -p $dataset + docker run \ + --name swh-graph-test --rm --tty --interactive \ + --volume $(pwd):/data swh-graph-test:latest \ + ./compress_graph.sh /data/$dataset /data/$dataset > /dev/null + fi +done