Page MenuHomeSoftware Heritage

D8222.id29662.diff
No OneTemporary

D8222.id29662.diff

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -27,8 +27,7 @@
- id: mypy
name: mypy
entry: mypy
- args: [swh]
- pass_filenames: false
+ pass_filenames: true
language: system
types: [python]
diff --git a/docker/tests/conftest.py b/docker/tests/conftest.py
--- a/docker/tests/conftest.py
+++ b/docker/tests/conftest.py
@@ -43,7 +43,7 @@
# start the whole cluster
subprocess.check_output(["docker-compose", "up", "-d"])
yield
- # and strop it
+ # and stop it
subprocess.check_call(["docker-compose", "down", "-v"])
diff --git a/docker/tests/run_tests.sh b/docker/tests/run_tests.sh
deleted file mode 100755
--- a/docker/tests/run_tests.sh
+++ /dev/null
@@ -1,185 +0,0 @@
-#!/bin/bash
-
-# Main script to run high level tests on the Software Heritage stack
-
-# Use a temporary directory as working directory
-WORKDIR=/tmp/swh-docker-dev_tests
-# Create it if it does not exist
-mkdir $WORKDIR 2>/dev/null
-# Ensure it is empty before running the tests
-rm -rf $WORKDIR/*
-
-# We want the script to exit at the first encountered error
-set -e
-
-# Get test scripts directory
-TEST_SCRIPTS_DIR=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd)
-
-# Set the docker-compose.yml file to use
-export COMPOSE_FILE=$TEST_SCRIPTS_DIR/../docker-compose.yml
-
-# Useful global variables
-SWH_WEB_API_BASEURL="http://localhost:5080/api/1"
-CURRENT_TEST_SCRIPT=""
-
-# Colored output related variables and functions (only if stdout is a terminal)
-if test -t 1; then
- GREEN='\033[0;32m'
- RED='\033[0;31m'
- NC='\033[0m'
-else
- DOCO_OPTIONS='--ansi never'
-fi
-
-# Remove previously dumped service logs file if any
-rm -f $TEST_SCRIPTS_DIR/swh-docker-compose.logs
-
-function colored_output {
- local msg="$2"
- if [ "$CURRENT_TEST_SCRIPT" != "" ]; then
- msg="[$CURRENT_TEST_SCRIPT] $msg"
- fi
- echo -e "${1}${msg}${NC}"
-}
-
-function status_message {
- colored_output ${GREEN} "$1"
-}
-
-function error_message {
- colored_output ${RED} "$1"
-}
-
-function dump_docker_logs {
- error_message "Dumping logs for all services in file $TEST_SCRIPTS_DIR/swh-docker-compose.logs"
- docker-compose logs > $TEST_SCRIPTS_DIR/swh-docker-compose.logs
-}
-
-# Exit handler that will get called when this script terminates
-function finish {
- if [ $? -ne 0 ] && [ "$CURRENT_TEST_SCRIPT" != "" ]; then
- local SCRIPT_NAME=$CURRENT_TEST_SCRIPT
- CURRENT_TEST_SCRIPT=""
- error_message "An error occurred when running test script ${SCRIPT_NAME}"
- dump_docker_logs
- fi
- docker-compose $DOCO_OPTIONS down -v
- rm -rf $WORKDIR
-}
-trap finish EXIT
-
-# Docker-compose events listener that will be executed in background
-# Parameters:
-# $1: PID of parent process
-function listen_docker_events {
- docker-compose $DOCO_OPTIONS events | while read event
- do
- service=$(echo $event | cut -d " " -f7 | sed 's/^name=swh-docker-dev_\(.*\)_1)/\1/')
- event_type=$(echo $event | cut -d ' ' -f4)
- # "docker-compose down" has been called, exiting this child process
- if [ "$event_type" = "kill" ] ; then
- exit
- # a swh service crashed, sending signal to parent process to exit with error
- elif [ "$event_type" = "die" ]; then
- if [[ "$service" =~ ^swh.* ]]; then
- exit_code=$(docker-compose ps | grep $service | awk '{print $4}')
- if [ "$exit_code" != "0" ]; then
- error_message "Service $service died unexpectedly, exiting"
- dump_docker_logs
- kill -s SIGUSR1 $1; exit
- fi
- fi
- fi
- done
-}
-trap "exit 1" SIGUSR1
-
-declare -A SERVICE_LOGS_NB_LINES_READ
-
-# Function to wait for a specific string to be outputted in a specific
-# docker-compose service logs.
-# When called multiple times on the same service, only the newly outputted
-# logs since the last call will be processed.
-# Parameters:
-# $1: a timeout value in seconds to stop waiting and exit with error
-# $2: docker-compose service name
-# $3: the string to look for in the produced logs
-function wait_for_service_output {
- local nb_lines_to_skip=0
- if [[ -v "SERVICE_LOGS_NB_LINES_READ[$2]" ]]; then
- let nb_lines_to_skip=${SERVICE_LOGS_NB_LINES_READ[$2]}+1
- fi
- SECONDS=0
- local service_logs=$(docker-compose $DOCO_OPTIONS logs $2 | tail -n +$nb_lines_to_skip)
- until echo -ne "$service_logs" | grep -m 1 "$3" >/dev/null ; do
- sleep 1;
- if (( $SECONDS > $1 )); then
- error_message "Could not find pattern \"$3\" in $2 service logs after $1 seconds"
- exit 1
- fi
- let nb_lines_to_skip+=$(echo -ne "$service_logs" | wc -l)
- service_logs=$(docker-compose $DOCO_OPTIONS logs $2 | tail -n +$nb_lines_to_skip)
- done
- let nb_lines_to_skip+=$(echo -ne "$service_logs" | wc -l)
- SERVICE_LOGS_NB_LINES_READ[$2]=$nb_lines_to_skip
-}
-
-# Function to make an HTTP request and gets its response.
-# It should be used the following way:
-# response=$(http_request <method> <url>)
-# Parameters:
-# $1: http method name (GET, POST, ...)
-# $2: request url
-function http_request {
- local response=$(curl -sS -X $1 $2)
- echo $response
-}
-
-# Function to check that an HTTP request ends up with no errors.
-# If the HTTP response code is different from 200, an error will
-# be raised and the main script will terminate
-# Parameters:
-# $1: http method name (GET, POST, ...)
-# $2: request url
-function http_request_check {
- curl -sSf -X $1 $2 > /dev/null
-}
-
-# Function to run the content of a script dedicated to test a specific
-# part of the Software Heritage stack.
-function run_test_script {
- local SCRIPT_NAME=$(basename $1)
- status_message "Executing test script $SCRIPT_NAME"
- CURRENT_TEST_SCRIPT=$SCRIPT_NAME
- source $1
-}
-
-# Move to work directory
-cd $WORKDIR
-
-# Start the docker-compose event handler as a background process
-status_message "Starting docker-compose events listener"
-listen_docker_events $$ &
-
-# Start the docker-compose environment including the full Software Heritage stack
-status_message "Starting swh docker-compose environment"
-docker-compose $DOCO_OPTIONS up -d
-
-# Print logs to stdout
-docker-compose $DOCO_OPTIONS logs -f &
-
-# Ensure all swh services are up before running tests
-status_message "Waiting for swh services to be up"
-docker-compose $DOCO_OPTIONS exec -T swh-storage wait-for-it localhost:5002 -s --timeout=0
-docker-compose $DOCO_OPTIONS exec -T swh-objstorage wait-for-it localhost:5003 -s --timeout=0
-docker-compose $DOCO_OPTIONS exec -T swh-web wait-for-it localhost:5004 -s --timeout=0
-docker-compose $DOCO_OPTIONS exec -T swh-vault wait-for-it localhost:5005 -s --timeout=0
-docker-compose $DOCO_OPTIONS exec -T swh-deposit wait-for-it localhost:5006 -s --timeout=0
-docker-compose $DOCO_OPTIONS exec -T swh-idx-storage wait-for-it localhost:5007 -s --timeout=0
-docker-compose $DOCO_OPTIONS exec -T swh-scheduler wait-for-it localhost:5008 -s --timeout=0
-
-# Execute test scripts
-for test_script in $TEST_SCRIPTS_DIR/test_*.sh; do
- run_test_script ${test_script}
- CURRENT_TEST_SCRIPT=""
-done
diff --git a/docker/tests/test_01_loader_git.sh b/docker/tests/test_01_loader_git.sh
deleted file mode 100755
--- a/docker/tests/test_01_loader_git.sh
+++ /dev/null
@@ -1,70 +0,0 @@
-#!/bin/bash
-shopt -s nullglob extglob
-
-TEST_GIT_REPO_NAME="swh-loader-core"
-TEST_GIT_REPO_URL="https://forge.softwareheritage.org/source/${TEST_GIT_REPO_NAME}.git"
-
-status_message "Scheduling the loading of the git repository located at ${TEST_GIT_REPO_URL}"
-
-docker-compose $DOCO_OPTIONS exec -T swh-scheduler swh scheduler task add load-git url=$TEST_GIT_REPO_URL
-
-status_message "Waiting for the git loading task to complete"
-
-wait_for_service_output 300 swh-loader "swh.loader.git.tasks.UpdateGitRepository.*succeeded\|Traceback"
-
-status_message "The loading task has been successfully executed"
-
-status_message "Getting all git objects contained in the repository"
-git clone $TEST_GIT_REPO_URL
-cd $TEST_GIT_REPO_NAME
-cd "$(git rev-parse --git-path objects)"
-for p in pack/pack-*([0-9a-f]).idx ; do
- git show-index < $p | cut -f 2 -d ' ' > $WORKDIR/git_objects
-done
-for o in [0-9a-f][0-9a-f]/*([0-9a-f]) ; do
- echo ${o/\/} >> $WORKDIR/git_objects
-done
-
-declare -ga CONTENTS
-declare -ga DIRECTORIES
-declare -ga REVISIONS
-declare -ga RELEASES
-
-while IFS='' read -r object || [[ -n "$object" ]]; do
- object_type=$(git cat-file -t $object)
- if [ "$object_type" = "blob" ]; then
- CONTENTS+=($object)
- elif [ "$object_type" = "tree" ]; then
- DIRECTORIES+=($object)
- elif [ "$object_type" = "commit" ]; then
- REVISIONS+=($object)
- elif [ "$object_type" = "tag" ]; then
- RELEASES+=($object)
- fi
-done < $WORKDIR/git_objects
-
-status_message "Checking all git objects have been successfully loaded into the archive"
-
-status_message "Checking contents"
-for content in "${CONTENTS[@]}"; do
- http_request_check GET ${SWH_WEB_API_BASEURL}/content/sha1_git:$content/
-done
-status_message "All contents have been successfully loaded into the archive"
-
-status_message "Checking directories"
-for directory in "${DIRECTORIES[@]}"; do
- http_request_check GET ${SWH_WEB_API_BASEURL}/directory/$directory/
-done
-status_message "All directories have been successfully loaded into the archive"
-
-status_message "Checking revisions"
-for revision in "${REVISIONS[@]}"; do
- http_request_check GET ${SWH_WEB_API_BASEURL}/revision/$revision/
-done
-status_message "All revisions have been successfully loaded into the archive"
-
-status_message "Checking releases"
-for release in "${RELEASES[@]}"; do
- http_request_check GET ${SWH_WEB_API_BASEURL}/release/$release/
-done
-status_message "All releases have been successfully loaded into the archive"
diff --git a/docker/tests/test_02_vault.sh b/docker/tests/test_02_vault.sh
deleted file mode 100644
--- a/docker/tests/test_02_vault.sh
+++ /dev/null
@@ -1,26 +0,0 @@
-#!/bin/bash
-
-directory=${DIRECTORIES[$RANDOM % ${#DIRECTORIES[@]}]}
-revision=${REVISIONS[$RANDOM % ${#REVISIONS[@]}]}
-
-status_message "Requesting the vault to cook a random directory stored into the archive"
-http_request_check POST ${SWH_WEB_API_BASEURL}/vault/directory/$directory/
-
-status_message "Waiting for the directory cooking task to complete"
-wait_for_service_output 300 swh-vault-worker "swh.vault.cooking_tasks.SWHCookingTask.*succeeded"
-status_message "The directory cooking task has been successfully executed"
-
-status_message "Checking that the cooked directory tarball can be downloaded"
-http_request_check GET ${SWH_WEB_API_BASEURL}/vault/directory/$directory/raw/
-status_message "The cooked directory tarball is available for download"
-
-status_message "Requesting the vault to cook a random revision stored into the archive"
-http_request_check POST ${SWH_WEB_API_BASEURL}/vault/revision/$revision/gitfast/
-
-status_message "Waiting for the revision cooking task to complete"
-wait_for_service_output 300 swh-vault-worker "swh.vault.cooking_tasks.SWHCookingTask.*succeeded"
-status_message "The revision cooking task has been successfully executed"
-
-status_message "Checking that the cooked revision tarball can be downloaded"
-http_request_check GET ${SWH_WEB_API_BASEURL}/vault/revision/$revision/gitfast/raw/
-status_message "The cooked revision tarball is available for download"
diff --git a/docker/tests/test_git_loader.py b/docker/tests/test_git_loader.py
--- a/docker/tests/test_git_loader.py
+++ b/docker/tests/test_git_loader.py
@@ -5,6 +5,9 @@
from urllib.parse import quote_plus
+from dulwich import porcelain
+from dulwich.repo import MemoryRepo
+
from .conftest import apiget
@@ -12,8 +15,8 @@
url = git_origin
print(f"Retrieve references available at {url}")
- gitrefs = scheduler_host.check_output(f"git ls-remote {url}")
- gitrefs = [x.split() for x in gitrefs.splitlines()]
+ repo = MemoryRepo()
+ gitrefs = porcelain.fetch(repo, url).refs
print(f"Look for origin {url}")
# use quote_plus to prevent urljoin from messing with the 'http://' part of
@@ -30,22 +33,24 @@
print(f'snapshot has {len(snapshot["branches"])} branches')
branches = snapshot["branches"]
- # check every branch reported by git ls-remote is present in the snapshot
- for rev, branch_name in gitrefs:
+ # check every fetched branch is present in the snapshot
+ for branch_name, rev in gitrefs.items():
# for tags, only check for final revision id
- if branch_name.startswith("refs/tags/") and not branch_name.endswith("^{}"):
+ if branch_name.startswith(b"refs/tags/") and not branch_name.endswith(b"^{}"):
continue
- rev_desc = apiget(f"revision/{rev}")
+ rev_desc = apiget(f"revision/{rev.decode()}")
assert rev_desc["type"] == "git"
tag_revision = {}
tag_release = {}
- for rev, tag in gitrefs:
- if tag.startswith("refs/tags/"):
- if tag.endswith("^{}"):
- tag_revision[tag[:-3]] = rev
+ for tag, rev in gitrefs.items():
+ if tag.startswith(b"refs/tags/"):
+ tag_str = tag.decode()
+ rev_str = rev.decode()
+ if tag.endswith(b"^{}"):
+ tag_revision[tag_str[:-3]] = rev_str
else:
- tag_release[tag] = rev
+ tag_release[tag_str] = rev_str
for tag, revision in tag_revision.items():
# check that every release tag listed in the snapshot is known by the
@@ -59,3 +64,16 @@
tag_desc = branches[tag]
assert tag_desc["target_type"] == "release"
assert tag_desc["target"] == release_id
+
+ print("Check every git object stored in the repository has been loaded")
+ for sha1 in repo.object_store:
+ obj = repo.get_object(sha1)
+ sha1_str = sha1.decode()
+ if obj.type_name == b"blob":
+ apiget(f"content/sha1_git:{sha1_str}")
+ elif obj.type_name == b"commit":
+ apiget(f"revision/{sha1_str}")
+ elif obj.type_name == b"tree":
+ apiget(f"directory/{sha1_str}")
+ elif obj.type_name == b"tag":
+ apiget(f"release/{sha1_str}")
diff --git a/docker/tox.ini b/docker/tox.ini
--- a/docker/tox.ini
+++ b/docker/tox.ini
@@ -1,5 +1,5 @@
[tox]
-envlist=flake8,py3,shell_tests
+envlist=flake8,py3
[testenv]
skip_install = true
@@ -9,11 +9,8 @@
pytest-testinfra
docker-compose
pdbpp
+ dulwich
commands =
- ## this 'build' step is disabled for now because it fails with a somewhat
- ## cryptic traceback about the secretservice library. So it must be managed
- ## by hand from outside the tox invocation.
- # docker-compose build swh-storage
pytest {posargs}
[testenv:flake8]
@@ -22,14 +19,6 @@
commands =
{envpython} -m flake8 tests
-[testenv:shell_tests]
-deps =
- docker-compose
-whitelist_externals =
- /bin/bash
-commands =
- /bin/bash tests/run_tests.sh
-
[flake8]
# E203: whitespaces before ':' <https://github.com/psf/black/issues/315>
# E231: missing whitespace after ','
diff --git a/mypy.ini b/mypy.ini
new file mode 100644
--- /dev/null
+++ b/mypy.ini
@@ -0,0 +1,6 @@
+[mypy]
+namespace_packages = True
+warn_unused_ignores = True
+
+[mypy-testinfra.*]
+ignore_missing_imports = True

File Metadata

Mime Type
text/plain
Expires
Jul 3 2025, 7:50 AM (10 w, 5 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3254192

Event Timeline