diff --git a/docker/services/swh-scheduler/entrypoint.sh b/docker/services/swh-scheduler/entrypoint.sh --- a/docker/services/swh-scheduler/entrypoint.sh +++ b/docker/services/swh-scheduler/entrypoint.sh @@ -3,33 +3,33 @@ set -e source /srv/softwareheritage/utils/pyutils.sh -setup_pip - source /srv/softwareheritage/utils/pgsql.sh + setup_pgsql +setup_pip -case "$1" in - "shell") - exec bash -i - ;; - "swh-scheduler") - exec $@ - ;; - *) - wait_pgsql - echo Setup the swh-scheduler API database - PGPASSWORD=${POSTGRES_PASSWORD} swh db-init \ - --db-name ${POSTGRES_DB} scheduler +if [ "$1" = 'shell' ] ; then + shift + if (( $# == 0)); then + exec bash -i + else + "$@" + fi +else + wait_pgsql - echo Starting the swh-scheduler API server - exec gunicorn --bind 0.0.0.0:5008 \ - --log-level DEBUG \ - --threads 2 \ - --workers 2 \ - --reload \ - --timeout 3600 \ - --config 'python:swh.core.api.gunicorn_config' \ - 'swh.scheduler.api.server:make_app_from_configfile()' + echo Setup the swh-scheduler API database + PGPASSWORD=${POSTGRES_PASSWORD} swh db-init \ + --db-name ${POSTGRES_DB} scheduler -esac + echo Starting the swh-scheduler API server + exec gunicorn --bind 0.0.0.0:5008 \ + --log-level DEBUG \ + --threads 2 \ + --workers 2 \ + --reload \ + --timeout 3600 \ + --config 'python:swh.core.api.gunicorn_config' \ + 'swh.scheduler.api.server:make_app_from_configfile()' +fi diff --git a/docker/tests/__init__.py b/docker/tests/__init__.py new file mode 100644 diff --git a/docker/tests/conftest.py b/docker/tests/conftest.py new file mode 100644 --- /dev/null +++ b/docker/tests/conftest.py @@ -0,0 +1,126 @@ +# Copyright (C) 2019-2020 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +import re +import subprocess +import time + +import requests + +from urllib.parse import urljoin + +import pytest +import testinfra + + +APIURL = 'http://127.0.0.1:5080/api/1/' + +SAMPLE_METADATA = '''\ + + + Test Software + swh + test-software + + No One + + +''' + + +# scope='session' so we use the same container for all the tests; +@pytest.fixture(scope='session') +def docker_compose(request): + # start the whole cluster + subprocess.check_output(['docker-compose', 'up', '-d']) + yield + # and strop it + subprocess.check_call(['docker-compose', 'down']) + + +@pytest.fixture +def scheduler_host(request, docker_compose): + # run a container in which test commands are executed + docker_id = subprocess.check_output( + ['docker-compose', 'run', '-d', + 'swh-scheduler', 'shell', 'sleep', '1h']).decode().strip() + scheduler_host = testinfra.get_host("docker://" + docker_id) + scheduler_host.check_output('wait-for-it swh-scheduler:5008 -t 30') + scheduler_host.check_output('wait-for-it swh-storage:5002 -t 30') + + # return a testinfra connection to the container + yield scheduler_host + + # at the end of the test suite, destroy the container + subprocess.check_call(['docker', 'rm', '-f', docker_id]) + + +# scope='session' so we use the same container for all the tests; +@pytest.fixture +def deposit_host(request, docker_compose): + # run a container in which test commands are executed + docker_id = subprocess.check_output( + ['docker-compose', 'run', '-d', + 'swh-deposit', 'shell', 'sleep', '1h']).decode().strip() + deposit_host = testinfra.get_host("docker://" + docker_id) + deposit_host.check_output( + 'echo \'print("Hello World!")\n\' > /tmp/hello.py') + deposit_host.check_output( + 'tar -C /tmp -czf /tmp/archive.tgz /tmp/hello.py') + deposit_host.check_output( + f'echo \'{SAMPLE_METADATA}\' > /tmp/metadata.xml') + deposit_host.check_output('wait-for-it swh-deposit:5006 -t 30') + # return a testinfra connection to the container + yield deposit_host + + # at the end of the test suite, destroy the container + subprocess.check_call(['docker', 'rm', '-f', docker_id]) + + +@pytest.fixture +def git_url(): + return 'https://forge.softwareheritage.org/source/swh-core' + + +@pytest.fixture +def git_origin(scheduler_host, git_url): + task = scheduler_host.check_output( + 'swh scheduler task add load-git ' + f'url={git_url}' + ) + taskid = re.search(r'^Task (?P\d+)$', task, + flags=re.MULTILINE).group('id') + assert int(taskid) > 0 + + for i in range(60): + status = scheduler_host.check_output( + f'swh scheduler task list --list-runs --task-id {taskid}') + if 'Executions:' in status: + if '[eventful]' in status: + break + if '[started]' in status: + time.sleep(1) + continue + if '[failed]' in status: + loader_logs = subprocess.check_output( + ['docker-compose', 'logs', 'swh-loader']) + assert False, ('Loading execution failed\n' + f'status: {status}\n' + f'loader logs: {loader_logs}') + assert False, f'Loading execution failed, task status is {status}' + return git_url + + +# Utility functions + +def apiget(path: str, verb: str = 'GET', **kwargs): + """Query the API at path and return the json result or raise an + AssertionError""" + + url = urljoin(APIURL, path) + resp = requests.request(verb, url, **kwargs) + assert resp.status_code == 200, f'failed to retrieve {url} ({resp})' + return resp.json() diff --git a/docker/tests/test_deposit.py b/docker/tests/test_deposit.py --- a/docker/tests/test_deposit.py +++ b/docker/tests/test_deposit.py @@ -4,52 +4,8 @@ # See top-level LICENSE file for more information import json -import subprocess import time -import pytest -import testinfra - - -SAMPLE_METADATA = '''\ - - - Test Software - swh - test-software - - No One - - -''' - - -# scope='session' so we use the same container for all the tests; -@pytest.fixture(scope='session') -def deposit_host(request): - # start the whole cluster - subprocess.check_output(['docker-compose', 'up', '-d']) - # run a container in which test commands are executed - docker_id = subprocess.check_output( - ['docker-compose', 'run', '-d', - 'swh-deposit', 'shell', 'sleep', '1h']).decode().strip() - deposit_host = testinfra.get_host("docker://" + docker_id) - deposit_host.check_output( - 'echo \'print("Hello World!")\n\' > /tmp/hello.py') - deposit_host.check_output( - 'tar -C /tmp -czf /tmp/archive.tgz /tmp/hello.py') - deposit_host.check_output( - f'echo \'{SAMPLE_METADATA}\' > /tmp/metadata.xml') - deposit_host.check_output('wait-for-it swh-deposit:5006 -t 30') - # return a testinfra connection to the container - yield deposit_host - - # at the end of the test suite, destroy the container - subprocess.check_call(['docker', 'rm', '-f', docker_id]) - # and the wole cluster - subprocess.check_call(['docker-compose', 'down']) - def test_admin_collection(deposit_host): # 'deposit_host' binds to the container diff --git a/docker/tests/test_git_loader.py b/docker/tests/test_git_loader.py new file mode 100644 --- /dev/null +++ b/docker/tests/test_git_loader.py @@ -0,0 +1,62 @@ +# Copyright (C) 2019-2020 The Software Heritage developers +# See the AUTHORS file at the top-level directory of this distribution +# License: GNU General Public License version 3, or any later version +# See top-level LICENSE file for more information + +from urllib.parse import quote_plus + +from .conftest import apiget + + +def test_git_loader(scheduler_host, git_origin): + url = git_origin + + print(f'Retrieve references available at {url}') + gitrefs = scheduler_host.check_output(f'git ls-remote {url}') + gitrefs = [x.split() for x in gitrefs.splitlines()] + + print(f'Look for origin {url}') + # use quote_plus to prevent urljoin from messing with the 'http://' part of + # the url + origin = apiget(f'origin/{quote_plus(url)}/get') + assert origin['url'] == url + + visit = apiget(f'origin/{quote_plus(url)}/visit/latest') + assert visit['status'] == 'full' + + print(f'Check every identified git ref has been loaded') + snapshot = apiget(f'snapshot/{visit["snapshot"]}') + + print(f'snapshot has {len(snapshot["branches"])} branches') + branches = snapshot["branches"] + + # check every branch reported by git ls-remote is present in the snapshot + for rev, branch_name in gitrefs: + # for tags, only check for final revision id + if branch_name.startswith('refs/tags/') \ + and not branch_name.endswith('^{}'): + continue + rev_desc = apiget(f'revision/{rev}') + assert rev_desc['type'] == 'git' + + tag_revision = {} + tag_release = {} + for rev, tag in gitrefs: + if tag.startswith('refs/tags/'): + if tag.endswith('^{}'): + tag_revision[tag[:-3]] = rev + else: + tag_release[tag] = rev + + for tag, revision in tag_revision.items(): + # check that every release tag listed in the snapshot is known by the + # archive and consistant + release_id = tag_release[tag] + release = apiget(f'release/{release_id}') + assert release['id'] == release_id + assert release['target_type'] == 'revision' + assert release['target'] == revision + # and compare this with what git ls-remote reported + tag_desc = branches[tag] + assert tag_desc['target_type'] == 'release' + assert tag_desc['target'] == release_id