diff --git a/seirl/ase-vault-test/fetch_vault.sh b/seirl/ase-vault-test/fetch_vault.sh new file mode 100755 index 0000000..590e942 --- /dev/null +++ b/seirl/ase-vault-test/fetch_vault.sh @@ -0,0 +1,10 @@ +#/bin/bash + +cook() { + dir_id="$1" + curl -v --insecure -H 'Host: archive.softwareheritage.org' \ + -XGET "https://moma.internal.softwareheritage.org/api/1/vault/directory/$dir_id/raw/" \ + > $dir_id.tar.gz +} + +cook "$1" diff --git a/seirl/ase-vault-test/get_all_trees.sh b/seirl/ase-vault-test/get_all_trees.sh index 0990c19..290bf16 100755 --- a/seirl/ase-vault-test/get_all_trees.sh +++ b/seirl/ase-vault-test/get_all_trees.sh @@ -1,35 +1,37 @@ #!/bin/bash github_query() { query=$( cat ) jq -n --arg v "$query" '{"query": $v}' \ | curl -s -H "Authorization: bearer $( cat ~/.github_access_token )" \ -X POST -d @- https://api.github.com/graphql } extract_trees () { - jq --raw-output '.data.search.edges[].node.ref.target.tree.oid // empty' + jq --raw-output '.data.search.edges[].node.ref.target.tree.oid // empty' \ + | sort | uniq + } cd "$( dirname $0 )" tmpf=$( mktemp --suffix github-graphql-search ) cursor="" -for i in $( seq 1 5 ); do +for i in $( seq 1 10 ); do echo >&2 "Requesting page $i..." cat github_search.graphql \ | sed 's/\(search(.\+\))/\1'"$cursor)/" \ - | github_query \ + | tee /dev/null | github_query | tee /dev/null \ > "$tmpf" extract_trees < "$tmpf" hasNext=$( jq --raw-output '.data.search.pageInfo.hasNextPage' "$tmpf" ) if [ "$hasNext" != "true" ]; then break; fi - cursor=$( jq --raw-output '.data.search.pageInfo.startCursor' "$tmpf" ) + cursor=$( jq --raw-output '.data.search.pageInfo.endCursor' "$tmpf" ) cursor=", after:\"$cursor\"" done rm "$tmpf"