diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -27,8 +27,7 @@ - id: mypy name: mypy entry: mypy - args: [swh] - pass_filenames: false + pass_filenames: true language: system types: [python] diff --git a/docker/tests/conftest.py b/docker/tests/conftest.py --- a/docker/tests/conftest.py +++ b/docker/tests/conftest.py @@ -43,7 +43,7 @@ # start the whole cluster subprocess.check_output(["docker-compose", "up", "-d"]) yield - # and strop it + # and stop it subprocess.check_call(["docker-compose", "down", "-v"]) diff --git a/docker/tests/run_tests.sh b/docker/tests/run_tests.sh deleted file mode 100755 --- a/docker/tests/run_tests.sh +++ /dev/null @@ -1,185 +0,0 @@ -#!/bin/bash - -# Main script to run high level tests on the Software Heritage stack - -# Use a temporary directory as working directory -WORKDIR=/tmp/swh-docker-dev_tests -# Create it if it does not exist -mkdir $WORKDIR 2>/dev/null -# Ensure it is empty before running the tests -rm -rf $WORKDIR/* - -# We want the script to exit at the first encountered error -set -e - -# Get test scripts directory -TEST_SCRIPTS_DIR=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd) - -# Set the docker-compose.yml file to use -export COMPOSE_FILE=$TEST_SCRIPTS_DIR/../docker-compose.yml - -# Useful global variables -SWH_WEB_API_BASEURL="http://localhost:5080/api/1" -CURRENT_TEST_SCRIPT="" - -# Colored output related variables and functions (only if stdout is a terminal) -if test -t 1; then - GREEN='\033[0;32m' - RED='\033[0;31m' - NC='\033[0m' -else - DOCO_OPTIONS='--ansi never' -fi - -# Remove previously dumped service logs file if any -rm -f $TEST_SCRIPTS_DIR/swh-docker-compose.logs - -function colored_output { - local msg="$2" - if [ "$CURRENT_TEST_SCRIPT" != "" ]; then - msg="[$CURRENT_TEST_SCRIPT] $msg" - fi - echo -e "${1}${msg}${NC}" -} - -function status_message { - colored_output ${GREEN} "$1" -} - -function error_message { - colored_output ${RED} "$1" -} - -function dump_docker_logs { - error_message "Dumping logs for all services in file $TEST_SCRIPTS_DIR/swh-docker-compose.logs" - docker-compose logs > $TEST_SCRIPTS_DIR/swh-docker-compose.logs -} - -# Exit handler that will get called when this script terminates -function finish { - if [ $? -ne 0 ] && [ "$CURRENT_TEST_SCRIPT" != "" ]; then - local SCRIPT_NAME=$CURRENT_TEST_SCRIPT - CURRENT_TEST_SCRIPT="" - error_message "An error occurred when running test script ${SCRIPT_NAME}" - dump_docker_logs - fi - docker-compose $DOCO_OPTIONS down -v - rm -rf $WORKDIR -} -trap finish EXIT - -# Docker-compose events listener that will be executed in background -# Parameters: -# $1: PID of parent process -function listen_docker_events { - docker-compose $DOCO_OPTIONS events | while read event - do - service=$(echo $event | cut -d " " -f7 | sed 's/^name=swh-docker-dev_\(.*\)_1)/\1/') - event_type=$(echo $event | cut -d ' ' -f4) - # "docker-compose down" has been called, exiting this child process - if [ "$event_type" = "kill" ] ; then - exit - # a swh service crashed, sending signal to parent process to exit with error - elif [ "$event_type" = "die" ]; then - if [[ "$service" =~ ^swh.* ]]; then - exit_code=$(docker-compose ps | grep $service | awk '{print $4}') - if [ "$exit_code" != "0" ]; then - error_message "Service $service died unexpectedly, exiting" - dump_docker_logs - kill -s SIGUSR1 $1; exit - fi - fi - fi - done -} -trap "exit 1" SIGUSR1 - -declare -A SERVICE_LOGS_NB_LINES_READ - -# Function to wait for a specific string to be outputted in a specific -# docker-compose service logs. -# When called multiple times on the same service, only the newly outputted -# logs since the last call will be processed. -# Parameters: -# $1: a timeout value in seconds to stop waiting and exit with error -# $2: docker-compose service name -# $3: the string to look for in the produced logs -function wait_for_service_output { - local nb_lines_to_skip=0 - if [[ -v "SERVICE_LOGS_NB_LINES_READ[$2]" ]]; then - let nb_lines_to_skip=${SERVICE_LOGS_NB_LINES_READ[$2]}+1 - fi - SECONDS=0 - local service_logs=$(docker-compose $DOCO_OPTIONS logs $2 | tail -n +$nb_lines_to_skip) - until echo -ne "$service_logs" | grep -m 1 "$3" >/dev/null ; do - sleep 1; - if (( $SECONDS > $1 )); then - error_message "Could not find pattern \"$3\" in $2 service logs after $1 seconds" - exit 1 - fi - let nb_lines_to_skip+=$(echo -ne "$service_logs" | wc -l) - service_logs=$(docker-compose $DOCO_OPTIONS logs $2 | tail -n +$nb_lines_to_skip) - done - let nb_lines_to_skip+=$(echo -ne "$service_logs" | wc -l) - SERVICE_LOGS_NB_LINES_READ[$2]=$nb_lines_to_skip -} - -# Function to make an HTTP request and gets its response. -# It should be used the following way: -# response=$(http_request ) -# Parameters: -# $1: http method name (GET, POST, ...) -# $2: request url -function http_request { - local response=$(curl -sS -X $1 $2) - echo $response -} - -# Function to check that an HTTP request ends up with no errors. -# If the HTTP response code is different from 200, an error will -# be raised and the main script will terminate -# Parameters: -# $1: http method name (GET, POST, ...) -# $2: request url -function http_request_check { - curl -sSf -X $1 $2 > /dev/null -} - -# Function to run the content of a script dedicated to test a specific -# part of the Software Heritage stack. -function run_test_script { - local SCRIPT_NAME=$(basename $1) - status_message "Executing test script $SCRIPT_NAME" - CURRENT_TEST_SCRIPT=$SCRIPT_NAME - source $1 -} - -# Move to work directory -cd $WORKDIR - -# Start the docker-compose event handler as a background process -status_message "Starting docker-compose events listener" -listen_docker_events $$ & - -# Start the docker-compose environment including the full Software Heritage stack -status_message "Starting swh docker-compose environment" -docker-compose $DOCO_OPTIONS up -d - -# Print logs to stdout -docker-compose $DOCO_OPTIONS logs -f & - -# Ensure all swh services are up before running tests -status_message "Waiting for swh services to be up" -docker-compose $DOCO_OPTIONS exec -T swh-storage wait-for-it localhost:5002 -s --timeout=0 -docker-compose $DOCO_OPTIONS exec -T swh-objstorage wait-for-it localhost:5003 -s --timeout=0 -docker-compose $DOCO_OPTIONS exec -T swh-web wait-for-it localhost:5004 -s --timeout=0 -docker-compose $DOCO_OPTIONS exec -T swh-vault wait-for-it localhost:5005 -s --timeout=0 -docker-compose $DOCO_OPTIONS exec -T swh-deposit wait-for-it localhost:5006 -s --timeout=0 -docker-compose $DOCO_OPTIONS exec -T swh-idx-storage wait-for-it localhost:5007 -s --timeout=0 -docker-compose $DOCO_OPTIONS exec -T swh-scheduler wait-for-it localhost:5008 -s --timeout=0 - -# Execute test scripts -for test_script in $TEST_SCRIPTS_DIR/test_*.sh; do - run_test_script ${test_script} - CURRENT_TEST_SCRIPT="" -done diff --git a/docker/tests/test_01_loader_git.sh b/docker/tests/test_01_loader_git.sh deleted file mode 100755 --- a/docker/tests/test_01_loader_git.sh +++ /dev/null @@ -1,70 +0,0 @@ -#!/bin/bash -shopt -s nullglob extglob - -TEST_GIT_REPO_NAME="swh-loader-core" -TEST_GIT_REPO_URL="https://forge.softwareheritage.org/source/${TEST_GIT_REPO_NAME}.git" - -status_message "Scheduling the loading of the git repository located at ${TEST_GIT_REPO_URL}" - -docker-compose $DOCO_OPTIONS exec -T swh-scheduler swh scheduler task add load-git url=$TEST_GIT_REPO_URL - -status_message "Waiting for the git loading task to complete" - -wait_for_service_output 300 swh-loader "swh.loader.git.tasks.UpdateGitRepository.*succeeded\|Traceback" - -status_message "The loading task has been successfully executed" - -status_message "Getting all git objects contained in the repository" -git clone $TEST_GIT_REPO_URL -cd $TEST_GIT_REPO_NAME -cd "$(git rev-parse --git-path objects)" -for p in pack/pack-*([0-9a-f]).idx ; do - git show-index < $p | cut -f 2 -d ' ' > $WORKDIR/git_objects -done -for o in [0-9a-f][0-9a-f]/*([0-9a-f]) ; do - echo ${o/\/} >> $WORKDIR/git_objects -done - -declare -ga CONTENTS -declare -ga DIRECTORIES -declare -ga REVISIONS -declare -ga RELEASES - -while IFS='' read -r object || [[ -n "$object" ]]; do - object_type=$(git cat-file -t $object) - if [ "$object_type" = "blob" ]; then - CONTENTS+=($object) - elif [ "$object_type" = "tree" ]; then - DIRECTORIES+=($object) - elif [ "$object_type" = "commit" ]; then - REVISIONS+=($object) - elif [ "$object_type" = "tag" ]; then - RELEASES+=($object) - fi -done < $WORKDIR/git_objects - -status_message "Checking all git objects have been successfully loaded into the archive" - -status_message "Checking contents" -for content in "${CONTENTS[@]}"; do - http_request_check GET ${SWH_WEB_API_BASEURL}/content/sha1_git:$content/ -done -status_message "All contents have been successfully loaded into the archive" - -status_message "Checking directories" -for directory in "${DIRECTORIES[@]}"; do - http_request_check GET ${SWH_WEB_API_BASEURL}/directory/$directory/ -done -status_message "All directories have been successfully loaded into the archive" - -status_message "Checking revisions" -for revision in "${REVISIONS[@]}"; do - http_request_check GET ${SWH_WEB_API_BASEURL}/revision/$revision/ -done -status_message "All revisions have been successfully loaded into the archive" - -status_message "Checking releases" -for release in "${RELEASES[@]}"; do - http_request_check GET ${SWH_WEB_API_BASEURL}/release/$release/ -done -status_message "All releases have been successfully loaded into the archive" diff --git a/docker/tests/test_02_vault.sh b/docker/tests/test_02_vault.sh deleted file mode 100644 --- a/docker/tests/test_02_vault.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash - -directory=${DIRECTORIES[$RANDOM % ${#DIRECTORIES[@]}]} -revision=${REVISIONS[$RANDOM % ${#REVISIONS[@]}]} - -status_message "Requesting the vault to cook a random directory stored into the archive" -http_request_check POST ${SWH_WEB_API_BASEURL}/vault/directory/$directory/ - -status_message "Waiting for the directory cooking task to complete" -wait_for_service_output 300 swh-vault-worker "swh.vault.cooking_tasks.SWHCookingTask.*succeeded" -status_message "The directory cooking task has been successfully executed" - -status_message "Checking that the cooked directory tarball can be downloaded" -http_request_check GET ${SWH_WEB_API_BASEURL}/vault/directory/$directory/raw/ -status_message "The cooked directory tarball is available for download" - -status_message "Requesting the vault to cook a random revision stored into the archive" -http_request_check POST ${SWH_WEB_API_BASEURL}/vault/revision/$revision/gitfast/ - -status_message "Waiting for the revision cooking task to complete" -wait_for_service_output 300 swh-vault-worker "swh.vault.cooking_tasks.SWHCookingTask.*succeeded" -status_message "The revision cooking task has been successfully executed" - -status_message "Checking that the cooked revision tarball can be downloaded" -http_request_check GET ${SWH_WEB_API_BASEURL}/vault/revision/$revision/gitfast/raw/ -status_message "The cooked revision tarball is available for download" diff --git a/docker/tests/test_git_loader.py b/docker/tests/test_git_loader.py --- a/docker/tests/test_git_loader.py +++ b/docker/tests/test_git_loader.py @@ -5,6 +5,9 @@ from urllib.parse import quote_plus +from dulwich import porcelain +from dulwich.repo import MemoryRepo + from .conftest import apiget @@ -12,8 +15,8 @@ url = git_origin print(f"Retrieve references available at {url}") - gitrefs = scheduler_host.check_output(f"git ls-remote {url}") - gitrefs = [x.split() for x in gitrefs.splitlines()] + repo = MemoryRepo() + gitrefs = porcelain.fetch(repo, url).refs print(f"Look for origin {url}") # use quote_plus to prevent urljoin from messing with the 'http://' part of @@ -30,22 +33,24 @@ print(f'snapshot has {len(snapshot["branches"])} branches') branches = snapshot["branches"] - # check every branch reported by git ls-remote is present in the snapshot - for rev, branch_name in gitrefs: + # check every fetched branch is present in the snapshot + for branch_name, rev in gitrefs.items(): # for tags, only check for final revision id - if branch_name.startswith("refs/tags/") and not branch_name.endswith("^{}"): + if branch_name.startswith(b"refs/tags/") and not branch_name.endswith(b"^{}"): continue - rev_desc = apiget(f"revision/{rev}") + rev_desc = apiget(f"revision/{rev.decode()}") assert rev_desc["type"] == "git" tag_revision = {} tag_release = {} - for rev, tag in gitrefs: - if tag.startswith("refs/tags/"): - if tag.endswith("^{}"): - tag_revision[tag[:-3]] = rev + for tag, rev in gitrefs.items(): + if tag.startswith(b"refs/tags/"): + tag_str = tag.decode() + rev_str = rev.decode() + if tag.endswith(b"^{}"): + tag_revision[tag_str[:-3]] = rev_str else: - tag_release[tag] = rev + tag_release[tag_str] = rev_str for tag, revision in tag_revision.items(): # check that every release tag listed in the snapshot is known by the @@ -59,3 +64,16 @@ tag_desc = branches[tag] assert tag_desc["target_type"] == "release" assert tag_desc["target"] == release_id + + print("Check every git object stored in the repository has been loaded") + for sha1 in repo.object_store: + obj = repo.get_object(sha1) + sha1_str = sha1.decode() + if obj.type_name == b"blob": + apiget(f"content/sha1_git:{sha1_str}") + elif obj.type_name == b"commit": + apiget(f"revision/{sha1_str}") + elif obj.type_name == b"tree": + apiget(f"directory/{sha1_str}") + elif obj.type_name == b"tag": + apiget(f"release/{sha1_str}") diff --git a/docker/tox.ini b/docker/tox.ini --- a/docker/tox.ini +++ b/docker/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist=flake8,py3,shell_tests +envlist=flake8,py3 [testenv] skip_install = true @@ -9,11 +9,8 @@ pytest-testinfra docker-compose pdbpp + dulwich commands = - ## this 'build' step is disabled for now because it fails with a somewhat - ## cryptic traceback about the secretservice library. So it must be managed - ## by hand from outside the tox invocation. - # docker-compose build swh-storage pytest {posargs} [testenv:flake8] @@ -22,14 +19,6 @@ commands = {envpython} -m flake8 tests -[testenv:shell_tests] -deps = - docker-compose -whitelist_externals = - /bin/bash -commands = - /bin/bash tests/run_tests.sh - [flake8] # E203: whitespaces before ':' # E231: missing whitespace after ',' diff --git a/mypy.ini b/mypy.ini new file mode 100644 --- /dev/null +++ b/mypy.ini @@ -0,0 +1,6 @@ +[mypy] +namespace_packages = True +warn_unused_ignores = True + +[mypy-testinfra.*] +ignore_missing_imports = True