diff --git a/docker/tests/conftest.py b/docker/tests/conftest.py
--- a/docker/tests/conftest.py
+++ b/docker/tests/conftest.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2019-2020 The Software Heritage developers
+# Copyright (C) 2019-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -17,9 +17,9 @@
import testinfra
-APIURL = 'http://127.0.0.1:5080/api/1/'
+APIURL = "http://127.0.0.1:5080/api/1/"
-SAMPLE_METADATA = '''\
+SAMPLE_METADATA = """\
@@ -30,20 +30,20 @@
No One
-'''
+"""
# scope='session' so we use the same container for all the tests;
-@pytest.fixture(scope='session')
+@pytest.fixture(scope="session")
def docker_compose(request):
# start the whole cluster
- subprocess.check_output(['docker-compose', 'up', '-d'])
+ subprocess.check_output(["docker-compose", "up", "-d"])
yield
# and strop it
- subprocess.check_call(['docker-compose', 'down'])
+ subprocess.check_call(["docker-compose", "down"])
-@pytest.fixture(scope='session')
+@pytest.fixture(scope="session")
def wfi_timeout():
"""
wait-for-it timeout in seconds
@@ -51,95 +51,98 @@
return 60
-@pytest.fixture(scope='session')
+@pytest.fixture(scope="session")
def scheduler_host(request, docker_compose, wfi_timeout):
# run a container in which test commands are executed
- docker_id = subprocess.check_output(
- ['docker-compose', 'run', '-d',
- 'swh-scheduler', 'shell', 'sleep', '1h']).decode().strip()
+ docker_id = (
+ subprocess.check_output(
+ ["docker-compose", "run", "-d", "swh-scheduler", "shell", "sleep", "1h"]
+ )
+ .decode()
+ .strip()
+ )
scheduler_host = testinfra.get_host("docker://" + docker_id)
- scheduler_host.check_output(
- f'wait-for-it swh-scheduler:5008 -t {wfi_timeout}')
- scheduler_host.check_output(
- f'wait-for-it swh-storage:5002 -t {wfi_timeout}')
+ scheduler_host.check_output(f"wait-for-it swh-scheduler:5008 -t {wfi_timeout}")
+ scheduler_host.check_output(f"wait-for-it swh-storage:5002 -t {wfi_timeout}")
# return a testinfra connection to the container
yield scheduler_host
# at the end of the test suite, destroy the container
- subprocess.check_call(['docker', 'rm', '-f', docker_id])
+ subprocess.check_call(["docker", "rm", "-f", docker_id])
# scope='session' so we use the same container for all the tests;
-@pytest.fixture(scope='session')
+@pytest.fixture(scope="session")
def deposit_host(request, docker_compose, wfi_timeout):
# run a container in which test commands are executed
- docker_id = subprocess.check_output(
- ['docker-compose', 'run', '-d',
- 'swh-deposit', 'shell', 'sleep', '1h']).decode().strip()
+ docker_id = (
+ subprocess.check_output(
+ ["docker-compose", "run", "-d", "swh-deposit", "shell", "sleep", "1h"]
+ )
+ .decode()
+ .strip()
+ )
deposit_host = testinfra.get_host("docker://" + docker_id)
- deposit_host.check_output(
- 'echo \'print("Hello World!")\n\' > /tmp/hello.py')
- deposit_host.check_output(
- 'tar -C /tmp -czf /tmp/archive.tgz /tmp/hello.py')
- deposit_host.check_output(
- f'echo \'{SAMPLE_METADATA}\' > /tmp/metadata.xml')
- deposit_host.check_output(
- f'wait-for-it swh-deposit:5006 -t {wfi_timeout}')
+ deposit_host.check_output("echo 'print(\"Hello World!\")\n' > /tmp/hello.py")
+ deposit_host.check_output("tar -C /tmp -czf /tmp/archive.tgz /tmp/hello.py")
+ deposit_host.check_output(f"echo '{SAMPLE_METADATA}' > /tmp/metadata.xml")
+ deposit_host.check_output(f"wait-for-it swh-deposit:5006 -t {wfi_timeout}")
# return a testinfra connection to the container
yield deposit_host
# at the end of the test suite, destroy the container
- subprocess.check_call(['docker', 'rm', '-f', docker_id])
+ subprocess.check_call(["docker", "rm", "-f", docker_id])
-@pytest.fixture(scope='session')
+@pytest.fixture(scope="session")
def git_url():
- return 'https://forge.softwareheritage.org/source/swh-core'
+ return "https://forge.softwareheritage.org/source/swh-core"
-@pytest.fixture(scope='session')
+@pytest.fixture(scope="session")
def git_origin(scheduler_host, git_url):
- task = scheduler_host.check_output(
- f'swh scheduler task add load-git url={git_url}'
- )
- taskid = re.search(r'^Task (?P\d+)$', task,
- flags=re.MULTILINE).group('id')
+ task = scheduler_host.check_output(f"swh scheduler task add load-git url={git_url}")
+ taskid = re.search(r"^Task (?P\d+)$", task, flags=re.MULTILINE).group("id")
assert int(taskid) > 0
for i in range(60):
status = scheduler_host.check_output(
- f'swh scheduler task list --list-runs --task-id {taskid}')
- if 'Executions:' in status:
- if '[eventful]' in status:
+ f"swh scheduler task list --list-runs --task-id {taskid}"
+ )
+ if "Executions:" in status:
+ if "[eventful]" in status:
break
- if '[started]' in status or '[scheduled]' in status:
+ if "[started]" in status or "[scheduled]" in status:
time.sleep(1)
continue
- if '[failed]' in status:
+ if "[failed]" in status:
loader_logs = subprocess.check_output(
- ['docker-compose', 'logs', 'swh-loader'])
- assert False, ('Loading execution failed\n'
- f'status: {status}\n'
- f'loader logs: '
- + loader_logs.decode(errors='replace'))
- assert False, f'Loading execution failed, task status is {status}'
+ ["docker-compose", "logs", "swh-loader"]
+ )
+ assert False, (
+ "Loading execution failed\n"
+ f"status: {status}\n"
+ f"loader logs: " + loader_logs.decode(errors="replace")
+ )
+ assert False, f"Loading execution failed, task status is {status}"
return git_url
# Utility functions
-def apiget(path: str, verb: str = 'GET', **kwargs):
+
+def apiget(path: str, verb: str = "GET", **kwargs):
"""Query the API at path and return the json result or raise an
AssertionError"""
url = urljoin(APIURL, path)
resp = requests.request(verb, url, **kwargs)
- assert resp.status_code == 200, f'failed to retrieve {url}: {resp.text}'
+ assert resp.status_code == 200, f"failed to retrieve {url}: {resp.text}"
return resp.json()
-def pollapi(path: str, verb: str = 'GET', **kwargs):
+def pollapi(path: str, verb: str = "GET", **kwargs):
"""Poll the API at path until it returns an OK result"""
url = urljoin(APIURL, path)
for i in range(60):
@@ -152,13 +155,14 @@
return resp
-def getdirectory(dirid: str, currentpath: str = '') \
- -> Generator[Tuple[str, Mapping], None, None]:
+def getdirectory(
+ dirid: str, currentpath: str = ""
+) -> Generator[Tuple[str, Mapping], None, None]:
"""Recursively retrieve directory description from the archive"""
- directory = apiget(f'directory/{dirid}')
+ directory = apiget(f"directory/{dirid}")
for direntry in directory:
- path = join(currentpath, direntry['name'])
- if direntry['type'] != 'dir':
+ path = join(currentpath, direntry["name"])
+ if direntry["type"] != "dir":
yield (path, direntry)
else:
- yield from getdirectory(direntry['target'], path)
+ yield from getdirectory(direntry["target"], path)
diff --git a/docker/tests/test_deposit.py b/docker/tests/test_deposit.py
--- a/docker/tests/test_deposit.py
+++ b/docker/tests/test_deposit.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2019-2020 The Software Heritage developers
+# Copyright (C) 2019-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -9,32 +9,39 @@
def test_admin_collection(deposit_host):
# 'deposit_host' binds to the container
- assert deposit_host.check_output(
- 'swh deposit admin collection list') == 'test'
+ assert deposit_host.check_output("swh deposit admin collection list") == "test"
def test_admin_user(deposit_host):
- assert deposit_host.check_output('swh deposit admin user list') == 'test'
+ assert deposit_host.check_output("swh deposit admin user list") == "test"
def test_create_deposit_simple(deposit_host):
deposit = deposit_host.check_output(
- 'swh deposit upload --format json --username test --password test '
- '--url http://nginx:5080/deposit/1 '
- '--archive /tmp/archive.tgz '
- '--name test_deposit --author somebody')
+ "swh deposit upload --format json --username test --password test "
+ "--url http://nginx:5080/deposit/1 "
+ "--archive /tmp/archive.tgz "
+ "--name test_deposit --author somebody"
+ )
deposit = json.loads(deposit)
- assert set(deposit.keys()) == {'deposit_id', 'deposit_status',
- 'deposit_status_detail', 'deposit_date'}
- assert deposit['deposit_status'] == 'deposited'
- deposit_id = deposit['deposit_id']
+ assert set(deposit.keys()) == {
+ "deposit_id",
+ "deposit_status",
+ "deposit_status_detail",
+ "deposit_date",
+ }
+ assert deposit["deposit_status"] == "deposited"
+ deposit_id = deposit["deposit_id"]
for i in range(60):
- status = json.loads(deposit_host.check_output(
- 'swh deposit status --format json --username test --password test '
- '--url http://nginx:5080/deposit/1 --deposit-id %s' % deposit_id))
- if status['deposit_status'] == 'done':
+ status = json.loads(
+ deposit_host.check_output(
+ "swh deposit status --format json --username test --password test "
+ "--url http://nginx:5080/deposit/1 --deposit-id %s" % deposit_id
+ )
+ )
+ if status["deposit_status"] == "done":
break
time.sleep(1)
else:
@@ -43,22 +50,30 @@
def test_create_deposit_with_metadata(deposit_host):
deposit = deposit_host.check_output(
- 'swh deposit upload --format json --username test --password test '
- '--url http://nginx:5080/deposit/1 '
- '--archive /tmp/archive.tgz '
- '--metadata /tmp/metadata.xml')
+ "swh deposit upload --format json --username test --password test "
+ "--url http://nginx:5080/deposit/1 "
+ "--archive /tmp/archive.tgz "
+ "--metadata /tmp/metadata.xml"
+ )
deposit = json.loads(deposit)
- assert set(deposit.keys()) == {'deposit_id', 'deposit_status',
- 'deposit_status_detail', 'deposit_date'}
- assert deposit['deposit_status'] == 'deposited'
- deposit_id = deposit['deposit_id']
+ assert set(deposit.keys()) == {
+ "deposit_id",
+ "deposit_status",
+ "deposit_status_detail",
+ "deposit_date",
+ }
+ assert deposit["deposit_status"] == "deposited"
+ deposit_id = deposit["deposit_id"]
for i in range(60):
- status = json.loads(deposit_host.check_output(
- 'swh deposit status --format json --username test --password test '
- '--url http://nginx:5080/deposit/1 --deposit-id %s' % deposit_id))
- if status['deposit_status'] == 'done':
+ status = json.loads(
+ deposit_host.check_output(
+ "swh deposit status --format json --username test --password test "
+ "--url http://nginx:5080/deposit/1 --deposit-id %s" % deposit_id
+ )
+ )
+ if status["deposit_status"] == "done":
break
time.sleep(1)
else:
@@ -67,32 +82,40 @@
def test_create_deposit_multipart(deposit_host):
deposit = deposit_host.check_output(
- 'swh deposit upload --format json --username test --password test '
- '--url http://nginx:5080/deposit/1 '
- '--archive /tmp/archive.tgz '
- '--partial')
+ "swh deposit upload --format json --username test --password test "
+ "--url http://nginx:5080/deposit/1 "
+ "--archive /tmp/archive.tgz "
+ "--partial"
+ )
deposit = json.loads(deposit)
- assert set(deposit.keys()) == {'deposit_id', 'deposit_status',
- 'deposit_status_detail', 'deposit_date'}
- assert deposit['deposit_status'] == 'partial'
- deposit_id = deposit['deposit_id']
+ assert set(deposit.keys()) == {
+ "deposit_id",
+ "deposit_status",
+ "deposit_status_detail",
+ "deposit_date",
+ }
+ assert deposit["deposit_status"] == "partial"
+ deposit_id = deposit["deposit_id"]
deposit = deposit_host.check_output(
- 'swh deposit upload --format json --username test --password test '
- '--url http://nginx:5080/deposit/1 '
- '--metadata /tmp/metadata.xml '
- '--deposit-id %s'
- % deposit_id)
+ "swh deposit upload --format json --username test --password test "
+ "--url http://nginx:5080/deposit/1 "
+ "--metadata /tmp/metadata.xml "
+ "--deposit-id %s" % deposit_id
+ )
deposit = json.loads(deposit)
- assert deposit['deposit_status'] == 'deposited'
- assert deposit['deposit_id'] == deposit_id
+ assert deposit["deposit_status"] == "deposited"
+ assert deposit["deposit_id"] == deposit_id
for i in range(60):
- status = json.loads(deposit_host.check_output(
- 'swh deposit status --format json --username test --password test '
- '--url http://nginx:5080/deposit/1 --deposit-id %s' % deposit_id))
- if status['deposit_status'] == 'done':
+ status = json.loads(
+ deposit_host.check_output(
+ "swh deposit status --format json --username test --password test "
+ "--url http://nginx:5080/deposit/1 --deposit-id %s" % deposit_id
+ )
+ )
+ if status["deposit_status"] == "done":
break
time.sleep(1)
else:
diff --git a/docker/tests/test_git_loader.py b/docker/tests/test_git_loader.py
--- a/docker/tests/test_git_loader.py
+++ b/docker/tests/test_git_loader.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2019-2020 The Software Heritage developers
+# Copyright (C) 2019-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -11,20 +11,20 @@
def test_git_loader(scheduler_host, git_origin):
url = git_origin
- print(f'Retrieve references available at {url}')
- gitrefs = scheduler_host.check_output(f'git ls-remote {url}')
+ print(f"Retrieve references available at {url}")
+ gitrefs = scheduler_host.check_output(f"git ls-remote {url}")
gitrefs = [x.split() for x in gitrefs.splitlines()]
- print(f'Look for origin {url}')
+ print(f"Look for origin {url}")
# use quote_plus to prevent urljoin from messing with the 'http://' part of
# the url
- origin = apiget(f'origin/{quote_plus(url)}/get')
- assert origin['url'] == url
+ origin = apiget(f"origin/{quote_plus(url)}/get")
+ assert origin["url"] == url
- visit = apiget(f'origin/{quote_plus(url)}/visit/latest')
- assert visit['status'] == 'full'
+ visit = apiget(f"origin/{quote_plus(url)}/visit/latest")
+ assert visit["status"] == "full"
- print('Check every identified git ref has been loaded')
+ print("Check every identified git ref has been loaded")
snapshot = apiget(f'snapshot/{visit["snapshot"]}')
print(f'snapshot has {len(snapshot["branches"])} branches')
@@ -33,17 +33,16 @@
# check every branch reported by git ls-remote is present in the snapshot
for rev, branch_name in gitrefs:
# for tags, only check for final revision id
- if branch_name.startswith('refs/tags/') \
- and not branch_name.endswith('^{}'):
+ if branch_name.startswith("refs/tags/") and not branch_name.endswith("^{}"):
continue
- rev_desc = apiget(f'revision/{rev}')
- assert rev_desc['type'] == 'git'
+ rev_desc = apiget(f"revision/{rev}")
+ assert rev_desc["type"] == "git"
tag_revision = {}
tag_release = {}
for rev, tag in gitrefs:
- if tag.startswith('refs/tags/'):
- if tag.endswith('^{}'):
+ if tag.startswith("refs/tags/"):
+ if tag.endswith("^{}"):
tag_revision[tag[:-3]] = rev
else:
tag_release[tag] = rev
@@ -52,11 +51,11 @@
# check that every release tag listed in the snapshot is known by the
# archive and consistant
release_id = tag_release[tag]
- release = apiget(f'release/{release_id}')
- assert release['id'] == release_id
- assert release['target_type'] == 'revision'
- assert release['target'] == revision
+ release = apiget(f"release/{release_id}")
+ assert release["id"] == release_id
+ assert release["target_type"] == "revision"
+ assert release["target"] == revision
# and compare this with what git ls-remote reported
tag_desc = branches[tag]
- assert tag_desc['target_type'] == 'release'
- assert tag_desc['target'] == release_id
+ assert tag_desc["target_type"] == "release"
+ assert tag_desc["target"] == release_id
diff --git a/docker/tests/test_vault.py b/docker/tests/test_vault.py
--- a/docker/tests/test_vault.py
+++ b/docker/tests/test_vault.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2019-2020 The Software Heritage developers
+# Copyright (C) 2019-2021 The Software Heritage developers
# See the AUTHORS file at the top-level directory of this distribution
# License: GNU General Public License version 3, or any later version
# See top-level LICENSE file for more information
@@ -16,23 +16,23 @@
def test_vault_directory(scheduler_host, git_origin):
# retrieve the root directory of the master branch of the ingested git
# repository (by the git_origin fixture)
- visit = apiget(f'origin/{quote_plus(git_origin)}/visit/latest')
+ visit = apiget(f"origin/{quote_plus(git_origin)}/visit/latest")
snapshot = apiget(f'snapshot/{visit["snapshot"]}')
rev_id = snapshot["branches"]["refs/heads/master"]["target"]
- revision = apiget(f'revision/{rev_id}')
- dir_id = revision['directory']
+ revision = apiget(f"revision/{rev_id}")
+ dir_id = revision["directory"]
# now cook it
- cook = apiget(f'vault/directory/{dir_id}/', 'POST')
- assert cook['obj_type'] == 'directory'
- assert cook['obj_id'] == dir_id
- assert cook['fetch_url'].endswith(f'vault/directory/{dir_id}/raw/')
+ cook = apiget(f"vault/directory/{dir_id}/", "POST")
+ assert cook["obj_type"] == "directory"
+ assert cook["obj_id"] == dir_id
+ assert cook["fetch_url"].endswith(f"vault/directory/{dir_id}/raw/")
# while it's cooking, get the directory tree from the archive
directory = getdirectory(dir_id)
# retrieve the cooked tar file
- resp = pollapi(f'vault/directory/{dir_id}/raw')
+ resp = pollapi(f"vault/directory/{dir_id}/raw")
tarf = tarfile.open(fileobj=io.BytesIO(resp.content))
# and check the tarfile seems ok wrt. 'directory'
@@ -42,22 +42,22 @@
for fname, fdesc in directory:
tfinfo = tarfiles.get(join(dir_id, fname))
assert tfinfo, f"Missing path {fname} in retrieved tarfile"
- if fdesc['type'] == 'file':
- assert fdesc['length'] == tfinfo.size, \
- f"File {fname}: length mismatch"
+ if fdesc["type"] == "file":
+ assert fdesc["length"] == tfinfo.size, f"File {fname}: length mismatch"
fdata = tarf.extractfile(tfinfo).read()
- for algo in fdesc['checksums']:
+ for algo in fdesc["checksums"]:
if algo not in hashlib.algorithms_available:
continue
hash = hashlib.new(algo, fdata).hexdigest()
- assert hash == fdesc['checksums'][algo], \
- f"File {fname}: {algo} mismatch"
+ assert (
+ hash == fdesc["checksums"][algo]
+ ), f"File {fname}: {algo} mismatch"
# XXX what to check for dir? symlink? (other?)
# check that if we ask a second time this directory, it returns the same
# and does not cook it again
- recook = apiget(f'vault/directory/{dir_id}/', 'POST')
- assert recook['obj_type'] == 'directory'
- assert recook['obj_id'] == dir_id
- assert recook['id'] == cook['id']
- assert recook['status'] == 'done' # no need to wait for this to be true
+ recook = apiget(f"vault/directory/{dir_id}/", "POST")
+ assert recook["obj_type"] == "directory"
+ assert recook["obj_id"] == dir_id
+ assert recook["id"] == cook["id"]
+ assert recook["status"] == "done" # no need to wait for this to be true