diff --git a/api/server/src/test/java/org/softwareheritage/graph/dataset/.gitignore b/api/server/src/test/java/org/softwareheritage/graph/dataset/.gitignore new file mode 100644 index 0000000..d41a8bc --- /dev/null +++ b/api/server/src/test/java/org/softwareheritage/graph/dataset/.gitignore @@ -0,0 +1,16 @@ +dockerfiles/ + +# Generated input files +*.csv.gz +*.nodes.csv + +# Generated WebGraph files +*.graph +*.indegree +*.mph +*.obl +*.offsets +*.order +*.outdegree +*.properties +*.stats diff --git a/api/server/src/test/java/org/softwareheritage/graph/dataset/generate_graph.sh b/api/server/src/test/java/org/softwareheritage/graph/dataset/generate_graph.sh new file mode 100755 index 0000000..23120d0 --- /dev/null +++ b/api/server/src/test/java/org/softwareheritage/graph/dataset/generate_graph.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +# Build Docker work environment +toplevel_dir=`git rev-parse --show-toplevel` +mkdir -p dockerfiles +cp $toplevel_dir/compression/{compress_graph.sh,Dockerfile} dockerfiles/ +docker build --tag swh-graph-test dockerfiles + +# Setup input for compression script +tr ' ' '\n' < graph.edges.csv | sort -u > graph.nodes.csv +gzip --force --keep graph.edges.csv +gzip --force --keep graph.nodes.csv + +docker run \ + --name swh-graph-test --rm --tty --interactive \ + --volume $(pwd):/data swh-graph-test:latest \ + ./compress_graph.sh /data/graph /data/ diff --git a/api/server/src/test/java/org/softwareheritage/graph/dataset/graph.edges.csv b/api/server/src/test/java/org/softwareheritage/graph/dataset/graph.edges.csv new file mode 100644 index 0000000..654d462 --- /dev/null +++ b/api/server/src/test/java/org/softwareheritage/graph/dataset/graph.edges.csv @@ -0,0 +1,9 @@ +swh:1:dir:0000000000000000000000000000000000000001 swh:1:dir:0000000000000000000000000000000000000002 +swh:1:dir:0000000000000000000000000000000000000002 swh:1:dir:0000000000000000000000000000000000000003 +swh:1:dir:0000000000000000000000000000000000000002 swh:1:dir:0000000000000000000000000000000000000005 +swh:1:dir:0000000000000000000000000000000000000001 swh:1:dir:0000000000000000000000000000000000000008 +swh:1:dir:0000000000000000000000000000000000000003 swh:1:cnt:0000000000000000000000000000000000000004 +swh:1:dir:0000000000000000000000000000000000000005 swh:1:cnt:0000000000000000000000000000000000000006 +swh:1:dir:0000000000000000000000000000000000000002 swh:1:cnt:0000000000000000000000000000000000000007 +swh:1:dir:0000000000000000000000000000000000000008 swh:1:cnt:0000000000000000000000000000000000000009 +swh:1:dir:0000000000000000000000000000000000000001 swh:1:cnt:0000000000000000000000000000000000000010 diff --git a/api/server/src/test/java/org/softwareheritage/graph/datasets/.gitignore b/api/server/src/test/java/org/softwareheritage/graph/datasets/.gitignore deleted file mode 100644 index a1bf9ba..0000000 --- a/api/server/src/test/java/org/softwareheritage/graph/datasets/.gitignore +++ /dev/null @@ -1,12 +0,0 @@ -dockerfiles/ -*.nodes.csv -*.csv.gz - -dir_to_dir/ -dir_to_file/ -dir_to_rev/ -origin_to_snapshot/ -release_to_obj/ -rev_to_dir/ -rev_to_rev/ -snapshot_to_obj/ diff --git a/api/server/src/test/java/org/softwareheritage/graph/datasets/dir_to_dir.edges.csv b/api/server/src/test/java/org/softwareheritage/graph/datasets/dir_to_dir.edges.csv deleted file mode 100644 index 9273caf..0000000 --- a/api/server/src/test/java/org/softwareheritage/graph/datasets/dir_to_dir.edges.csv +++ /dev/null @@ -1,4 +0,0 @@ -0000000000000000000000000000000000000001 0000000000000000000000000000000000000002 -0000000000000000000000000000000000000002 0000000000000000000000000000000000000003 -0000000000000000000000000000000000000002 0000000000000000000000000000000000000005 -0000000000000000000000000000000000000001 0000000000000000000000000000000000000008 diff --git a/api/server/src/test/java/org/softwareheritage/graph/datasets/dir_to_file.edges.csv b/api/server/src/test/java/org/softwareheritage/graph/datasets/dir_to_file.edges.csv deleted file mode 100644 index 5c8cc27..0000000 --- a/api/server/src/test/java/org/softwareheritage/graph/datasets/dir_to_file.edges.csv +++ /dev/null @@ -1,5 +0,0 @@ -0000000000000000000000000000000000000003 0000000000000000000000000000000000000004 -0000000000000000000000000000000000000005 0000000000000000000000000000000000000006 -0000000000000000000000000000000000000002 0000000000000000000000000000000000000007 -0000000000000000000000000000000000000008 0000000000000000000000000000000000000009 -0000000000000000000000000000000000000001 0000000000000000000000000000000000000010 diff --git a/api/server/src/test/java/org/softwareheritage/graph/datasets/generate_datasets.sh b/api/server/src/test/java/org/softwareheritage/graph/datasets/generate_datasets.sh deleted file mode 100755 index 8d08972..0000000 --- a/api/server/src/test/java/org/softwareheritage/graph/datasets/generate_datasets.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/bash - -# Build Docker work environment -toplevel_dir=`git rev-parse --show-toplevel` -mkdir -p dockerfiles -cp $toplevel_dir/compression/{compress_graph.sh,Dockerfile} dockerfiles/ -docker build --tag swh-graph-test dockerfiles - -# Compress each existing dataset -for dataset in dir_to_dir dir_to_file dir_to_rev origin_to_snapshot \ - release_to_obj rev_to_dir rev_to_rev snapshot_to_obj; do - if [ -f "$dataset.edges.csv" ] ; then - # Setup input for compression script - tr ' ' '\n' < $dataset.edges.csv | sort -u > $dataset.nodes.csv - gzip --force --keep $dataset.edges.csv - gzip --force --keep $dataset.nodes.csv - - echo "Compressing $dataset..." - mkdir -p $dataset - docker run \ - --name swh-graph-test --rm --tty --interactive \ - --volume $(pwd):/data swh-graph-test:latest \ - ./compress_graph.sh /data/$dataset /data/$dataset > /dev/null - fi -done