diff --git a/docker/tests/conftest.py b/docker/tests/conftest.py --- a/docker/tests/conftest.py +++ b/docker/tests/conftest.py @@ -1,4 +1,4 @@ -# Copyright (C) 2019-2020 The Software Heritage developers +# Copyright (C) 2019-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -17,9 +17,9 @@ import testinfra -APIURL = 'http://127.0.0.1:5080/api/1/' +APIURL = "http://127.0.0.1:5080/api/1/" -SAMPLE_METADATA = '''\ +SAMPLE_METADATA = """\ @@ -30,20 +30,20 @@ No One -''' +""" # scope='session' so we use the same container for all the tests; -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def docker_compose(request): # start the whole cluster - subprocess.check_output(['docker-compose', 'up', '-d']) + subprocess.check_output(["docker-compose", "up", "-d"]) yield # and strop it - subprocess.check_call(['docker-compose', 'down']) + subprocess.check_call(["docker-compose", "down"]) -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def wfi_timeout(): """ wait-for-it timeout in seconds @@ -51,95 +51,98 @@ return 60 -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def scheduler_host(request, docker_compose, wfi_timeout): # run a container in which test commands are executed - docker_id = subprocess.check_output( - ['docker-compose', 'run', '-d', - 'swh-scheduler', 'shell', 'sleep', '1h']).decode().strip() + docker_id = ( + subprocess.check_output( + ["docker-compose", "run", "-d", "swh-scheduler", "shell", "sleep", "1h"] + ) + .decode() + .strip() + ) scheduler_host = testinfra.get_host("docker://" + docker_id) - scheduler_host.check_output( - f'wait-for-it swh-scheduler:5008 -t {wfi_timeout}') - scheduler_host.check_output( - f'wait-for-it swh-storage:5002 -t {wfi_timeout}') + scheduler_host.check_output(f"wait-for-it swh-scheduler:5008 -t {wfi_timeout}") + scheduler_host.check_output(f"wait-for-it swh-storage:5002 -t {wfi_timeout}") # return a testinfra connection to the container yield scheduler_host # at the end of the test suite, destroy the container - subprocess.check_call(['docker', 'rm', '-f', docker_id]) + subprocess.check_call(["docker", "rm", "-f", docker_id]) # scope='session' so we use the same container for all the tests; -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def deposit_host(request, docker_compose, wfi_timeout): # run a container in which test commands are executed - docker_id = subprocess.check_output( - ['docker-compose', 'run', '-d', - 'swh-deposit', 'shell', 'sleep', '1h']).decode().strip() + docker_id = ( + subprocess.check_output( + ["docker-compose", "run", "-d", "swh-deposit", "shell", "sleep", "1h"] + ) + .decode() + .strip() + ) deposit_host = testinfra.get_host("docker://" + docker_id) - deposit_host.check_output( - 'echo \'print("Hello World!")\n\' > /tmp/hello.py') - deposit_host.check_output( - 'tar -C /tmp -czf /tmp/archive.tgz /tmp/hello.py') - deposit_host.check_output( - f'echo \'{SAMPLE_METADATA}\' > /tmp/metadata.xml') - deposit_host.check_output( - f'wait-for-it swh-deposit:5006 -t {wfi_timeout}') + deposit_host.check_output("echo 'print(\"Hello World!\")\n' > /tmp/hello.py") + deposit_host.check_output("tar -C /tmp -czf /tmp/archive.tgz /tmp/hello.py") + deposit_host.check_output(f"echo '{SAMPLE_METADATA}' > /tmp/metadata.xml") + deposit_host.check_output(f"wait-for-it swh-deposit:5006 -t {wfi_timeout}") # return a testinfra connection to the container yield deposit_host # at the end of the test suite, destroy the container - subprocess.check_call(['docker', 'rm', '-f', docker_id]) + subprocess.check_call(["docker", "rm", "-f", docker_id]) -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def git_url(): - return 'https://forge.softwareheritage.org/source/swh-core' + return "https://forge.softwareheritage.org/source/swh-core" -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def git_origin(scheduler_host, git_url): - task = scheduler_host.check_output( - f'swh scheduler task add load-git url={git_url}' - ) - taskid = re.search(r'^Task (?P\d+)$', task, - flags=re.MULTILINE).group('id') + task = scheduler_host.check_output(f"swh scheduler task add load-git url={git_url}") + taskid = re.search(r"^Task (?P\d+)$", task, flags=re.MULTILINE).group("id") assert int(taskid) > 0 for i in range(60): status = scheduler_host.check_output( - f'swh scheduler task list --list-runs --task-id {taskid}') - if 'Executions:' in status: - if '[eventful]' in status: + f"swh scheduler task list --list-runs --task-id {taskid}" + ) + if "Executions:" in status: + if "[eventful]" in status: break - if '[started]' in status or '[scheduled]' in status: + if "[started]" in status or "[scheduled]" in status: time.sleep(1) continue - if '[failed]' in status: + if "[failed]" in status: loader_logs = subprocess.check_output( - ['docker-compose', 'logs', 'swh-loader']) - assert False, ('Loading execution failed\n' - f'status: {status}\n' - f'loader logs: ' - + loader_logs.decode(errors='replace')) - assert False, f'Loading execution failed, task status is {status}' + ["docker-compose", "logs", "swh-loader"] + ) + assert False, ( + "Loading execution failed\n" + f"status: {status}\n" + f"loader logs: " + loader_logs.decode(errors="replace") + ) + assert False, f"Loading execution failed, task status is {status}" return git_url # Utility functions -def apiget(path: str, verb: str = 'GET', **kwargs): + +def apiget(path: str, verb: str = "GET", **kwargs): """Query the API at path and return the json result or raise an AssertionError""" url = urljoin(APIURL, path) resp = requests.request(verb, url, **kwargs) - assert resp.status_code == 200, f'failed to retrieve {url}: {resp.text}' + assert resp.status_code == 200, f"failed to retrieve {url}: {resp.text}" return resp.json() -def pollapi(path: str, verb: str = 'GET', **kwargs): +def pollapi(path: str, verb: str = "GET", **kwargs): """Poll the API at path until it returns an OK result""" url = urljoin(APIURL, path) for i in range(60): @@ -152,13 +155,14 @@ return resp -def getdirectory(dirid: str, currentpath: str = '') \ - -> Generator[Tuple[str, Mapping], None, None]: +def getdirectory( + dirid: str, currentpath: str = "" +) -> Generator[Tuple[str, Mapping], None, None]: """Recursively retrieve directory description from the archive""" - directory = apiget(f'directory/{dirid}') + directory = apiget(f"directory/{dirid}") for direntry in directory: - path = join(currentpath, direntry['name']) - if direntry['type'] != 'dir': + path = join(currentpath, direntry["name"]) + if direntry["type"] != "dir": yield (path, direntry) else: - yield from getdirectory(direntry['target'], path) + yield from getdirectory(direntry["target"], path) diff --git a/docker/tests/test_deposit.py b/docker/tests/test_deposit.py --- a/docker/tests/test_deposit.py +++ b/docker/tests/test_deposit.py @@ -1,4 +1,4 @@ -# Copyright (C) 2019-2020 The Software Heritage developers +# Copyright (C) 2019-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -9,32 +9,39 @@ def test_admin_collection(deposit_host): # 'deposit_host' binds to the container - assert deposit_host.check_output( - 'swh deposit admin collection list') == 'test' + assert deposit_host.check_output("swh deposit admin collection list") == "test" def test_admin_user(deposit_host): - assert deposit_host.check_output('swh deposit admin user list') == 'test' + assert deposit_host.check_output("swh deposit admin user list") == "test" def test_create_deposit_simple(deposit_host): deposit = deposit_host.check_output( - 'swh deposit upload --format json --username test --password test ' - '--url http://nginx:5080/deposit/1 ' - '--archive /tmp/archive.tgz ' - '--name test_deposit --author somebody') + "swh deposit upload --format json --username test --password test " + "--url http://nginx:5080/deposit/1 " + "--archive /tmp/archive.tgz " + "--name test_deposit --author somebody" + ) deposit = json.loads(deposit) - assert set(deposit.keys()) == {'deposit_id', 'deposit_status', - 'deposit_status_detail', 'deposit_date'} - assert deposit['deposit_status'] == 'deposited' - deposit_id = deposit['deposit_id'] + assert set(deposit.keys()) == { + "deposit_id", + "deposit_status", + "deposit_status_detail", + "deposit_date", + } + assert deposit["deposit_status"] == "deposited" + deposit_id = deposit["deposit_id"] for i in range(60): - status = json.loads(deposit_host.check_output( - 'swh deposit status --format json --username test --password test ' - '--url http://nginx:5080/deposit/1 --deposit-id %s' % deposit_id)) - if status['deposit_status'] == 'done': + status = json.loads( + deposit_host.check_output( + "swh deposit status --format json --username test --password test " + "--url http://nginx:5080/deposit/1 --deposit-id %s" % deposit_id + ) + ) + if status["deposit_status"] == "done": break time.sleep(1) else: @@ -43,22 +50,30 @@ def test_create_deposit_with_metadata(deposit_host): deposit = deposit_host.check_output( - 'swh deposit upload --format json --username test --password test ' - '--url http://nginx:5080/deposit/1 ' - '--archive /tmp/archive.tgz ' - '--metadata /tmp/metadata.xml') + "swh deposit upload --format json --username test --password test " + "--url http://nginx:5080/deposit/1 " + "--archive /tmp/archive.tgz " + "--metadata /tmp/metadata.xml" + ) deposit = json.loads(deposit) - assert set(deposit.keys()) == {'deposit_id', 'deposit_status', - 'deposit_status_detail', 'deposit_date'} - assert deposit['deposit_status'] == 'deposited' - deposit_id = deposit['deposit_id'] + assert set(deposit.keys()) == { + "deposit_id", + "deposit_status", + "deposit_status_detail", + "deposit_date", + } + assert deposit["deposit_status"] == "deposited" + deposit_id = deposit["deposit_id"] for i in range(60): - status = json.loads(deposit_host.check_output( - 'swh deposit status --format json --username test --password test ' - '--url http://nginx:5080/deposit/1 --deposit-id %s' % deposit_id)) - if status['deposit_status'] == 'done': + status = json.loads( + deposit_host.check_output( + "swh deposit status --format json --username test --password test " + "--url http://nginx:5080/deposit/1 --deposit-id %s" % deposit_id + ) + ) + if status["deposit_status"] == "done": break time.sleep(1) else: @@ -67,32 +82,40 @@ def test_create_deposit_multipart(deposit_host): deposit = deposit_host.check_output( - 'swh deposit upload --format json --username test --password test ' - '--url http://nginx:5080/deposit/1 ' - '--archive /tmp/archive.tgz ' - '--partial') + "swh deposit upload --format json --username test --password test " + "--url http://nginx:5080/deposit/1 " + "--archive /tmp/archive.tgz " + "--partial" + ) deposit = json.loads(deposit) - assert set(deposit.keys()) == {'deposit_id', 'deposit_status', - 'deposit_status_detail', 'deposit_date'} - assert deposit['deposit_status'] == 'partial' - deposit_id = deposit['deposit_id'] + assert set(deposit.keys()) == { + "deposit_id", + "deposit_status", + "deposit_status_detail", + "deposit_date", + } + assert deposit["deposit_status"] == "partial" + deposit_id = deposit["deposit_id"] deposit = deposit_host.check_output( - 'swh deposit upload --format json --username test --password test ' - '--url http://nginx:5080/deposit/1 ' - '--metadata /tmp/metadata.xml ' - '--deposit-id %s' - % deposit_id) + "swh deposit upload --format json --username test --password test " + "--url http://nginx:5080/deposit/1 " + "--metadata /tmp/metadata.xml " + "--deposit-id %s" % deposit_id + ) deposit = json.loads(deposit) - assert deposit['deposit_status'] == 'deposited' - assert deposit['deposit_id'] == deposit_id + assert deposit["deposit_status"] == "deposited" + assert deposit["deposit_id"] == deposit_id for i in range(60): - status = json.loads(deposit_host.check_output( - 'swh deposit status --format json --username test --password test ' - '--url http://nginx:5080/deposit/1 --deposit-id %s' % deposit_id)) - if status['deposit_status'] == 'done': + status = json.loads( + deposit_host.check_output( + "swh deposit status --format json --username test --password test " + "--url http://nginx:5080/deposit/1 --deposit-id %s" % deposit_id + ) + ) + if status["deposit_status"] == "done": break time.sleep(1) else: diff --git a/docker/tests/test_git_loader.py b/docker/tests/test_git_loader.py --- a/docker/tests/test_git_loader.py +++ b/docker/tests/test_git_loader.py @@ -1,4 +1,4 @@ -# Copyright (C) 2019-2020 The Software Heritage developers +# Copyright (C) 2019-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -11,20 +11,20 @@ def test_git_loader(scheduler_host, git_origin): url = git_origin - print(f'Retrieve references available at {url}') - gitrefs = scheduler_host.check_output(f'git ls-remote {url}') + print(f"Retrieve references available at {url}") + gitrefs = scheduler_host.check_output(f"git ls-remote {url}") gitrefs = [x.split() for x in gitrefs.splitlines()] - print(f'Look for origin {url}') + print(f"Look for origin {url}") # use quote_plus to prevent urljoin from messing with the 'http://' part of # the url - origin = apiget(f'origin/{quote_plus(url)}/get') - assert origin['url'] == url + origin = apiget(f"origin/{quote_plus(url)}/get") + assert origin["url"] == url - visit = apiget(f'origin/{quote_plus(url)}/visit/latest') - assert visit['status'] == 'full' + visit = apiget(f"origin/{quote_plus(url)}/visit/latest") + assert visit["status"] == "full" - print('Check every identified git ref has been loaded') + print("Check every identified git ref has been loaded") snapshot = apiget(f'snapshot/{visit["snapshot"]}') print(f'snapshot has {len(snapshot["branches"])} branches') @@ -33,17 +33,16 @@ # check every branch reported by git ls-remote is present in the snapshot for rev, branch_name in gitrefs: # for tags, only check for final revision id - if branch_name.startswith('refs/tags/') \ - and not branch_name.endswith('^{}'): + if branch_name.startswith("refs/tags/") and not branch_name.endswith("^{}"): continue - rev_desc = apiget(f'revision/{rev}') - assert rev_desc['type'] == 'git' + rev_desc = apiget(f"revision/{rev}") + assert rev_desc["type"] == "git" tag_revision = {} tag_release = {} for rev, tag in gitrefs: - if tag.startswith('refs/tags/'): - if tag.endswith('^{}'): + if tag.startswith("refs/tags/"): + if tag.endswith("^{}"): tag_revision[tag[:-3]] = rev else: tag_release[tag] = rev @@ -52,11 +51,11 @@ # check that every release tag listed in the snapshot is known by the # archive and consistant release_id = tag_release[tag] - release = apiget(f'release/{release_id}') - assert release['id'] == release_id - assert release['target_type'] == 'revision' - assert release['target'] == revision + release = apiget(f"release/{release_id}") + assert release["id"] == release_id + assert release["target_type"] == "revision" + assert release["target"] == revision # and compare this with what git ls-remote reported tag_desc = branches[tag] - assert tag_desc['target_type'] == 'release' - assert tag_desc['target'] == release_id + assert tag_desc["target_type"] == "release" + assert tag_desc["target"] == release_id diff --git a/docker/tests/test_vault.py b/docker/tests/test_vault.py --- a/docker/tests/test_vault.py +++ b/docker/tests/test_vault.py @@ -1,4 +1,4 @@ -# Copyright (C) 2019-2020 The Software Heritage developers +# Copyright (C) 2019-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -16,23 +16,23 @@ def test_vault_directory(scheduler_host, git_origin): # retrieve the root directory of the master branch of the ingested git # repository (by the git_origin fixture) - visit = apiget(f'origin/{quote_plus(git_origin)}/visit/latest') + visit = apiget(f"origin/{quote_plus(git_origin)}/visit/latest") snapshot = apiget(f'snapshot/{visit["snapshot"]}') rev_id = snapshot["branches"]["refs/heads/master"]["target"] - revision = apiget(f'revision/{rev_id}') - dir_id = revision['directory'] + revision = apiget(f"revision/{rev_id}") + dir_id = revision["directory"] # now cook it - cook = apiget(f'vault/directory/{dir_id}/', 'POST') - assert cook['obj_type'] == 'directory' - assert cook['obj_id'] == dir_id - assert cook['fetch_url'].endswith(f'vault/directory/{dir_id}/raw/') + cook = apiget(f"vault/directory/{dir_id}/", "POST") + assert cook["obj_type"] == "directory" + assert cook["obj_id"] == dir_id + assert cook["fetch_url"].endswith(f"vault/directory/{dir_id}/raw/") # while it's cooking, get the directory tree from the archive directory = getdirectory(dir_id) # retrieve the cooked tar file - resp = pollapi(f'vault/directory/{dir_id}/raw') + resp = pollapi(f"vault/directory/{dir_id}/raw") tarf = tarfile.open(fileobj=io.BytesIO(resp.content)) # and check the tarfile seems ok wrt. 'directory' @@ -42,22 +42,22 @@ for fname, fdesc in directory: tfinfo = tarfiles.get(join(dir_id, fname)) assert tfinfo, f"Missing path {fname} in retrieved tarfile" - if fdesc['type'] == 'file': - assert fdesc['length'] == tfinfo.size, \ - f"File {fname}: length mismatch" + if fdesc["type"] == "file": + assert fdesc["length"] == tfinfo.size, f"File {fname}: length mismatch" fdata = tarf.extractfile(tfinfo).read() - for algo in fdesc['checksums']: + for algo in fdesc["checksums"]: if algo not in hashlib.algorithms_available: continue hash = hashlib.new(algo, fdata).hexdigest() - assert hash == fdesc['checksums'][algo], \ - f"File {fname}: {algo} mismatch" + assert ( + hash == fdesc["checksums"][algo] + ), f"File {fname}: {algo} mismatch" # XXX what to check for dir? symlink? (other?) # check that if we ask a second time this directory, it returns the same # and does not cook it again - recook = apiget(f'vault/directory/{dir_id}/', 'POST') - assert recook['obj_type'] == 'directory' - assert recook['obj_id'] == dir_id - assert recook['id'] == cook['id'] - assert recook['status'] == 'done' # no need to wait for this to be true + recook = apiget(f"vault/directory/{dir_id}/", "POST") + assert recook["obj_type"] == "directory" + assert recook["obj_id"] == dir_id + assert recook["id"] == cook["id"] + assert recook["status"] == "done" # no need to wait for this to be true