Page MenuHomeSoftware Heritage

No OneTemporary


diff --git a/docker/services/swh-scheduler/ b/docker/services/swh-scheduler/
--- a/docker/services/swh-scheduler/
+++ b/docker/services/swh-scheduler/
@@ -3,33 +3,33 @@
set -e
source /srv/softwareheritage/utils/
source /srv/softwareheritage/utils/
-case "$1" in
- "shell")
- exec bash -i
- ;;
- "swh-scheduler")
- exec $@
- ;;
- *)
- wait_pgsql
- echo Setup the swh-scheduler API database
- --db-name ${POSTGRES_DB} scheduler
+if [ "$1" = 'shell' ] ; then
+ shift
+ if (( $# == 0)); then
+ exec bash -i
+ else
+ "$@"
+ fi
+ wait_pgsql
- echo Starting the swh-scheduler API server
- exec gunicorn --bind \
- --log-level DEBUG \
- --threads 2 \
- --workers 2 \
- --reload \
- --timeout 3600 \
- --config 'python:swh.core.api.gunicorn_config' \
- 'swh.scheduler.api.server:make_app_from_configfile()'
+ echo Setup the swh-scheduler API database
+ --db-name ${POSTGRES_DB} scheduler
+ echo Starting the swh-scheduler API server
+ exec gunicorn --bind \
+ --log-level DEBUG \
+ --threads 2 \
+ --workers 2 \
+ --reload \
+ --timeout 3600 \
+ --config 'python:swh.core.api.gunicorn_config' \
+ 'swh.scheduler.api.server:make_app_from_configfile()'
diff --git a/docker/tests/ b/docker/tests/
new file mode 100644
diff --git a/docker/tests/ b/docker/tests/
new file mode 100644
--- /dev/null
+++ b/docker/tests/
@@ -0,0 +1,126 @@
+# Copyright (C) 2019-2020 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+import re
+import subprocess
+import time
+import requests
+from urllib.parse import urljoin
+import pytest
+import testinfra
+APIURL = ''
+<?xml version="1.0" encoding="utf-8"?>
+<entry xmlns=""
+ xmlns:codemeta="">
+ <title>Test Software</title>
+ <client>swh</client>
+ <external_identifier>test-software</external_identifier>
+ <codemeta:author>
+ <codemeta:name>No One</codemeta:name>
+ </codemeta:author>
+# scope='session' so we use the same container for all the tests;
+def docker_compose(request):
+ # start the whole cluster
+ subprocess.check_output(['docker-compose', 'up', '-d'])
+ yield
+ # and strop it
+ subprocess.check_call(['docker-compose', 'down'])
+def scheduler_host(request, docker_compose):
+ # run a container in which test commands are executed
+ docker_id = subprocess.check_output(
+ ['docker-compose', 'run', '-d',
+ 'swh-scheduler', 'shell', 'sleep', '1h']).decode().strip()
+ scheduler_host = testinfra.get_host("docker://" + docker_id)
+ scheduler_host.check_output('wait-for-it swh-scheduler:5008 -t 30')
+ scheduler_host.check_output('wait-for-it swh-storage:5002 -t 30')
+ # return a testinfra connection to the container
+ yield scheduler_host
+ # at the end of the test suite, destroy the container
+ subprocess.check_call(['docker', 'rm', '-f', docker_id])
+# scope='session' so we use the same container for all the tests;
+def deposit_host(request, docker_compose):
+ # run a container in which test commands are executed
+ docker_id = subprocess.check_output(
+ ['docker-compose', 'run', '-d',
+ 'swh-deposit', 'shell', 'sleep', '1h']).decode().strip()
+ deposit_host = testinfra.get_host("docker://" + docker_id)
+ deposit_host.check_output(
+ 'echo \'print("Hello World!")\n\' > /tmp/')
+ deposit_host.check_output(
+ 'tar -C /tmp -czf /tmp/archive.tgz /tmp/')
+ deposit_host.check_output(
+ f'echo \'{SAMPLE_METADATA}\' > /tmp/metadata.xml')
+ deposit_host.check_output('wait-for-it swh-deposit:5006 -t 30')
+ # return a testinfra connection to the container
+ yield deposit_host
+ # at the end of the test suite, destroy the container
+ subprocess.check_call(['docker', 'rm', '-f', docker_id])
+def git_url():
+ return ''
+def git_origin(scheduler_host, git_url):
+ task = scheduler_host.check_output(
+ 'swh scheduler task add load-git '
+ f'url={git_url}'
+ )
+ taskid ='^Task (?P<id>\d+)$', task,
+ flags=re.MULTILINE).group('id')
+ assert int(taskid) > 0
+ for i in range(60):
+ status = scheduler_host.check_output(
+ f'swh scheduler task list --list-runs --task-id {taskid}')
+ if 'Executions:' in status:
+ if '[eventful]' in status:
+ break
+ if '[started]' in status:
+ time.sleep(1)
+ continue
+ if '[failed]' in status:
+ loader_logs = subprocess.check_output(
+ ['docker-compose', 'logs', 'swh-loader'])
+ assert False, ('Loading execution failed\n'
+ f'status: {status}\n'
+ f'loader logs: {loader_logs}')
+ assert False, f'Loading execution failed, task status is {status}'
+ return git_url
+# Utility functions
+def apiget(path: str, verb: str = 'GET', **kwargs):
+ """Query the API at path and return the json result or raise an
+ AssertionError"""
+ url = urljoin(APIURL, path)
+ resp = requests.request(verb, url, **kwargs)
+ assert resp.status_code == 200, f'failed to retrieve {url} ({resp})'
+ return resp.json()
diff --git a/docker/tests/ b/docker/tests/
--- a/docker/tests/
+++ b/docker/tests/
@@ -4,52 +4,8 @@
# See top-level LICENSE file for more information
import json
-import subprocess
import time
-import pytest
-import testinfra
-<?xml version="1.0" encoding="utf-8"?>
-<entry xmlns=""
- xmlns:codemeta="">
- <title>Test Software</title>
- <client>swh</client>
- <external_identifier>test-software</external_identifier>
- <codemeta:author>
- <codemeta:name>No One</codemeta:name>
- </codemeta:author>
-# scope='session' so we use the same container for all the tests;
-def deposit_host(request):
- # start the whole cluster
- subprocess.check_output(['docker-compose', 'up', '-d'])
- # run a container in which test commands are executed
- docker_id = subprocess.check_output(
- ['docker-compose', 'run', '-d',
- 'swh-deposit', 'shell', 'sleep', '1h']).decode().strip()
- deposit_host = testinfra.get_host("docker://" + docker_id)
- deposit_host.check_output(
- 'echo \'print("Hello World!")\n\' > /tmp/')
- deposit_host.check_output(
- 'tar -C /tmp -czf /tmp/archive.tgz /tmp/')
- deposit_host.check_output(
- f'echo \'{SAMPLE_METADATA}\' > /tmp/metadata.xml')
- deposit_host.check_output('wait-for-it swh-deposit:5006 -t 30')
- # return a testinfra connection to the container
- yield deposit_host
- # at the end of the test suite, destroy the container
- subprocess.check_call(['docker', 'rm', '-f', docker_id])
- # and the wole cluster
- subprocess.check_call(['docker-compose', 'down'])
def test_admin_collection(deposit_host):
# 'deposit_host' binds to the container
diff --git a/docker/tests/ b/docker/tests/
new file mode 100644
--- /dev/null
+++ b/docker/tests/
@@ -0,0 +1,62 @@
+# Copyright (C) 2019-2020 The Software Heritage developers
+# See the AUTHORS file at the top-level directory of this distribution
+# License: GNU General Public License version 3, or any later version
+# See top-level LICENSE file for more information
+from urllib.parse import quote_plus
+from .conftest import apiget
+def test_git_loader(scheduler_host, git_origin):
+ url = git_origin
+ print(f'Retrieve references available at {url}')
+ gitrefs = scheduler_host.check_output(f'git ls-remote {url}')
+ gitrefs = [x.split() for x in gitrefs.splitlines()]
+ print(f'Look for origin {url}')
+ # use quote_plus to prevent urljoin from messing with the 'http://' part of
+ # the url
+ origin = apiget(f'origin/{quote_plus(url)}/get')
+ assert origin['url'] == url
+ visit = apiget(f'origin/{quote_plus(url)}/visit/latest')
+ assert visit['status'] == 'full'
+ print(f'Check every identified git ref has been loaded')
+ snapshot = apiget(f'snapshot/{visit["snapshot"]}')
+ print(f'snapshot has {len(snapshot["branches"])} branches')
+ branches = snapshot["branches"]
+ # check every branch reported by git ls-remote is present in the snapshot
+ for rev, branch_name in gitrefs:
+ # for tags, only check for final revision id
+ if branch_name.startswith('refs/tags/') \
+ and not branch_name.endswith('^{}'):
+ continue
+ rev_desc = apiget(f'revision/{rev}')
+ assert rev_desc['type'] == 'git'
+ tag_revision = {}
+ tag_release = {}
+ for rev, tag in gitrefs:
+ if tag.startswith('refs/tags/'):
+ if tag.endswith('^{}'):
+ tag_revision[tag[:-3]] = rev
+ else:
+ tag_release[tag] = rev
+ for tag, revision in tag_revision.items():
+ # check that every release tag listed in the snapshot is known by the
+ # archive and consistant
+ release_id = tag_release[tag]
+ release = apiget(f'release/{release_id}')
+ assert release['id'] == release_id
+ assert release['target_type'] == 'revision'
+ assert release['target'] == revision
+ # and compare this with what git ls-remote reported
+ tag_desc = branches[tag]
+ assert tag_desc['target_type'] == 'release'
+ assert tag_desc['target'] == release_id

File Metadata

Mime Type
Nov 5 2024, 4:14 PM (12 w, 4 d ago)
Storage Engine
Storage Format
Raw Data
Storage Handle

Event Timeline