Page MenuHomeSoftware Heritage

D2552.diff
No OneTemporary

D2552.diff

diff --git a/docker/services/swh-scheduler/entrypoint.sh b/docker/services/swh-scheduler/entrypoint.sh
--- a/docker/services/swh-scheduler/entrypoint.sh
+++ b/docker/services/swh-scheduler/entrypoint.sh
@@ -3,33 +3,33 @@
set -e
source /srv/softwareheritage/utils/pyutils.sh
-setup_pip
-
source /srv/softwareheritage/utils/pgsql.sh
+
setup_pgsql
+setup_pip
-case "$1" in
- "shell")
- exec bash -i
- ;;
- "swh-scheduler")
- exec $@
- ;;
- *)
- wait_pgsql
- echo Setup the swh-scheduler API database
- PGPASSWORD=${POSTGRES_PASSWORD} swh db-init \
- --db-name ${POSTGRES_DB} scheduler
+if [ "$1" = 'shell' ] ; then
+ shift
+ if (( $# == 0)); then
+ exec bash -i
+ else
+ "$@"
+ fi
+else
+ wait_pgsql
- echo Starting the swh-scheduler API server
- exec gunicorn --bind 0.0.0.0:5008 \
- --log-level DEBUG \
- --threads 2 \
- --workers 2 \
- --reload \
- --timeout 3600 \
- --config 'python:swh.core.api.gunicorn_config' \
- 'swh.scheduler.api.server:make_app_from_configfile()'
+ echo Setup the swh-scheduler API database
+ PGPASSWORD=${POSTGRES_PASSWORD} swh db-init \
+ --db-name ${POSTGRES_DB} scheduler
-esac
+ echo Starting the swh-scheduler API server
+ exec gunicorn --bind 0.0.0.0:5008 \
+ --log-level DEBUG \
+ --threads 2 \
+ --workers 2 \
+ --reload \
+ --timeout 3600 \
+ --config 'python:swh.core.api.gunicorn_config' \
+ 'swh.scheduler.api.server:make_app_from_configfile()'
+fi
diff --git a/docker/tests/__init__.py b/docker/tests/__init__.py
new file mode 100644
diff --git a/docker/tests/conftest.py b/docker/tests/conftest.py
new file mode 100644
--- /dev/null
+++ b/docker/tests/conftest.py
@@ -0,0 +1,126 @@
+# Copyright (C) 2019-2020 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+import re
+import subprocess
+import time
+
+import requests
+
+from urllib.parse import urljoin
+
+import pytest
+import testinfra
+
+
+APIURL = 'http://127.0.0.1:5080/api/1/'
+
+SAMPLE_METADATA = '''\
+<?xml version="1.0" encoding="utf-8"?>
+<entry xmlns="http://www.w3.org/2005/Atom"
+ xmlns:codemeta="https://doi.org/10.5063/SCHEMA/CODEMETA-2.0">
+ <title>Test Software</title>
+ <client>swh</client>
+ <external_identifier>test-software</external_identifier>
+ <codemeta:author>
+ <codemeta:name>No One</codemeta:name>
+ </codemeta:author>
+</entry>
+'''
+
+
+# scope='session' so we use the same container for all the tests;
+@pytest.fixture(scope='session')
+def docker_compose(request):
+ # start the whole cluster
+ subprocess.check_output(['docker-compose', 'up', '-d'])
+ yield
+ # and strop it
+ subprocess.check_call(['docker-compose', 'down'])
+
+
+@pytest.fixture
+def scheduler_host(request, docker_compose):
+ # run a container in which test commands are executed
+ docker_id = subprocess.check_output(
+ ['docker-compose', 'run', '-d',
+ 'swh-scheduler', 'shell', 'sleep', '1h']).decode().strip()
+ scheduler_host = testinfra.get_host("docker://" + docker_id)
+ scheduler_host.check_output('wait-for-it swh-scheduler:5008 -t 30')
+ scheduler_host.check_output('wait-for-it swh-storage:5002 -t 30')
+
+ # return a testinfra connection to the container
+ yield scheduler_host
+
+ # at the end of the test suite, destroy the container
+ subprocess.check_call(['docker', 'rm', '-f', docker_id])
+
+
+# scope='session' so we use the same container for all the tests;
+@pytest.fixture
+def deposit_host(request, docker_compose):
+ # run a container in which test commands are executed
+ docker_id = subprocess.check_output(
+ ['docker-compose', 'run', '-d',
+ 'swh-deposit', 'shell', 'sleep', '1h']).decode().strip()
+ deposit_host = testinfra.get_host("docker://" + docker_id)
+ deposit_host.check_output(
+ 'echo \'print("Hello World!")\n\' > /tmp/hello.py')
+ deposit_host.check_output(
+ 'tar -C /tmp -czf /tmp/archive.tgz /tmp/hello.py')
+ deposit_host.check_output(
+ f'echo \'{SAMPLE_METADATA}\' > /tmp/metadata.xml')
+ deposit_host.check_output('wait-for-it swh-deposit:5006 -t 30')
+ # return a testinfra connection to the container
+ yield deposit_host
+
+ # at the end of the test suite, destroy the container
+ subprocess.check_call(['docker', 'rm', '-f', docker_id])
+
+
+@pytest.fixture
+def git_url():
+ return 'https://forge.softwareheritage.org/source/swh-core'
+
+
+@pytest.fixture
+def git_origin(scheduler_host, git_url):
+ task = scheduler_host.check_output(
+ 'swh scheduler task add load-git '
+ f'url={git_url}'
+ )
+ taskid = re.search(r'^Task (?P<id>\d+)$', task,
+ flags=re.MULTILINE).group('id')
+ assert int(taskid) > 0
+
+ for i in range(60):
+ status = scheduler_host.check_output(
+ f'swh scheduler task list --list-runs --task-id {taskid}')
+ if 'Executions:' in status:
+ if '[eventful]' in status:
+ break
+ if '[started]' in status:
+ time.sleep(1)
+ continue
+ if '[failed]' in status:
+ loader_logs = subprocess.check_output(
+ ['docker-compose', 'logs', 'swh-loader'])
+ assert False, ('Loading execution failed\n'
+ f'status: {status}\n'
+ f'loader logs: {loader_logs}')
+ assert False, f'Loading execution failed, task status is {status}'
+ return git_url
+
+
+# Utility functions
+
+def apiget(path: str, verb: str = 'GET', **kwargs):
+ """Query the API at path and return the json result or raise an
+ AssertionError"""
+
+ url = urljoin(APIURL, path)
+ resp = requests.request(verb, url, **kwargs)
+ assert resp.status_code == 200, f'failed to retrieve {url} ({resp})'
+ return resp.json()
diff --git a/docker/tests/test_deposit.py b/docker/tests/test_deposit.py
--- a/docker/tests/test_deposit.py
+++ b/docker/tests/test_deposit.py
@@ -4,52 +4,8 @@
# See top-level LICENSE file for more information
import json
-import subprocess
import time
-import pytest
-import testinfra
-
-
-SAMPLE_METADATA = '''\
-<?xml version="1.0" encoding="utf-8"?>
-<entry xmlns="http://www.w3.org/2005/Atom"
- xmlns:codemeta="https://doi.org/10.5063/SCHEMA/CODEMETA-2.0">
- <title>Test Software</title>
- <client>swh</client>
- <external_identifier>test-software</external_identifier>
- <codemeta:author>
- <codemeta:name>No One</codemeta:name>
- </codemeta:author>
-</entry>
-'''
-
-
-# scope='session' so we use the same container for all the tests;
-@pytest.fixture(scope='session')
-def deposit_host(request):
- # start the whole cluster
- subprocess.check_output(['docker-compose', 'up', '-d'])
- # run a container in which test commands are executed
- docker_id = subprocess.check_output(
- ['docker-compose', 'run', '-d',
- 'swh-deposit', 'shell', 'sleep', '1h']).decode().strip()
- deposit_host = testinfra.get_host("docker://" + docker_id)
- deposit_host.check_output(
- 'echo \'print("Hello World!")\n\' > /tmp/hello.py')
- deposit_host.check_output(
- 'tar -C /tmp -czf /tmp/archive.tgz /tmp/hello.py')
- deposit_host.check_output(
- f'echo \'{SAMPLE_METADATA}\' > /tmp/metadata.xml')
- deposit_host.check_output('wait-for-it swh-deposit:5006 -t 30')
- # return a testinfra connection to the container
- yield deposit_host
-
- # at the end of the test suite, destroy the container
- subprocess.check_call(['docker', 'rm', '-f', docker_id])
- # and the wole cluster
- subprocess.check_call(['docker-compose', 'down'])
-
def test_admin_collection(deposit_host):
# 'deposit_host' binds to the container
diff --git a/docker/tests/test_git_loader.py b/docker/tests/test_git_loader.py
new file mode 100644
--- /dev/null
+++ b/docker/tests/test_git_loader.py
@@ -0,0 +1,62 @@
+# Copyright (C) 2019-2020 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+
+from urllib.parse import quote_plus
+
+from .conftest import apiget
+
+
+def test_git_loader(scheduler_host, git_origin):
+ url = git_origin
+
+ print(f'Retrieve references available at {url}')
+ gitrefs = scheduler_host.check_output(f'git ls-remote {url}')
+ gitrefs = [x.split() for x in gitrefs.splitlines()]
+
+ print(f'Look for origin {url}')
+ # use quote_plus to prevent urljoin from messing with the 'http://' part of
+ # the url
+ origin = apiget(f'origin/{quote_plus(url)}/get')
+ assert origin['url'] == url
+
+ visit = apiget(f'origin/{quote_plus(url)}/visit/latest')
+ assert visit['status'] == 'full'
+
+ print(f'Check every identified git ref has been loaded')
+ snapshot = apiget(f'snapshot/{visit["snapshot"]}')
+
+ print(f'snapshot has {len(snapshot["branches"])} branches')
+ branches = snapshot["branches"]
+
+ # check every branch reported by git ls-remote is present in the snapshot
+ for rev, branch_name in gitrefs:
+ # for tags, only check for final revision id
+ if branch_name.startswith('refs/tags/') \
+ and not branch_name.endswith('^{}'):
+ continue
+ rev_desc = apiget(f'revision/{rev}')
+ assert rev_desc['type'] == 'git'
+
+ tag_revision = {}
+ tag_release = {}
+ for rev, tag in gitrefs:
+ if tag.startswith('refs/tags/'):
+ if tag.endswith('^{}'):
+ tag_revision[tag[:-3]] = rev
+ else:
+ tag_release[tag] = rev
+
+ for tag, revision in tag_revision.items():
+ # check that every release tag listed in the snapshot is known by the
+ # archive and consistant
+ release_id = tag_release[tag]
+ release = apiget(f'release/{release_id}')
+ assert release['id'] == release_id
+ assert release['target_type'] == 'revision'
+ assert release['target'] == revision
+ # and compare this with what git ls-remote reported
+ tag_desc = branches[tag]
+ assert tag_desc['target_type'] == 'release'
+ assert tag_desc['target'] == release_id

File Metadata

Mime Type
text/plain
Expires
Nov 5 2024, 4:14 PM (12 w, 4 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3223414

Event Timeline