diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,28 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v2.4.0 + hooks: + - id: trailing-whitespace + - id: check-json + - id: check-yaml + + - repo: https://gitlab.com/pycqa/flake8 + rev: 3.8.4 + hooks: + - id: flake8 + + - repo: https://github.com/codespell-project/codespell + rev: v1.16.0 + hooks: + - id: codespell + args: [-L cas] + + - repo: https://github.com/PyCQA/isort + rev: 5.5.2 + hooks: + - id: isort + + - repo: https://github.com/python/black + rev: 19.10b0 + hooks: + - id: black diff --git a/bin/change-all-repos b/bin/change-all-repos --- a/bin/change-all-repos +++ b/bin/change-all-repos @@ -5,7 +5,6 @@ import sys from typing import List - BIN_DIR = os.path.dirname(__file__) SWH_ENV_DIR = os.path.join(BIN_DIR, "..") @@ -19,7 +18,7 @@ path = os.path.join(SWH_ENV_DIR, repo) print() - print("="*50) + print("=" * 50) print(f"In {repo}") # Check the repo doesn't have any uncommitted changes to tracked files @@ -63,7 +62,6 @@ else: exit(1) - # Show the changes proc = subprocess.run( ["git", "-C", path, "diff", "--color=always"], diff --git a/bin/debpkg-update-metadata b/bin/debpkg-update-metadata --- a/bin/debpkg-update-metadata +++ b/bin/debpkg-update-metadata @@ -4,9 +4,9 @@ import os import sys -sys.path.append('/usr/share/dh-python') +sys.path.append("/usr/share/dh-python") -from dhpython.pydist import guess_dependency +from dhpython.pydist import guess_dependency # noqa def parse_requirements(filename): @@ -17,7 +17,7 @@ with open(filename) as f: for line in f.readlines(): line = line.strip() - if not line or line.startswith('#'): + if not line or line.startswith("#"): continue requirements.append(line) return requirements @@ -25,22 +25,22 @@ def dependencies_from_requirements(directory): requirements = [] - for filename in glob.glob(os.path.join(directory, 'requirements*.txt')): + for filename in glob.glob(os.path.join(directory, "requirements*.txt")): requirements.extend(parse_requirements(filename)) for req in set(requirements): - yield guess_dependency('cpython3', req, accept_upstream_versions=True) + yield guess_dependency("cpython3", req, accept_upstream_versions=True) def get_all_dependencies(directory): - dependencies = ['debhelper (>= 11)', 'python3-all', 'dh-python (>= 3)'] + dependencies = ["debhelper (>= 11)", "python3-all", "dh-python (>= 3)"] yield from dependencies yield from dependencies_from_requirements(directory) -if __name__ == '__main__': +if __name__ == "__main__": if len(sys.argv) != 2: - dir = '.' + dir = "." else: dir = sys.argv[1] for dep in get_all_dependencies(dir): diff --git a/docker/README.rst b/docker/README.rst --- a/docker/README.rst +++ b/docker/README.rst @@ -583,7 +583,7 @@ This repo comes with an optional ``docker-compose.storage-mirror.yml`` docker compose file that can be used to test the kafka-powered mirror -mecanism for the main storage. +mechanism for the main storage. This can be used like:: @@ -593,7 +593,7 @@ Compared to the original compose file, this will: - overrides the swh-storage service to activate the kafka direct writer - on swh.journal.objects prefixed topics using thw swh.storage.master + on swh.journal.objects prefixed topics using the swh.storage.master ID, - overrides the swh-web service to make it use the mirror instead of the master storage, @@ -633,7 +633,7 @@ ------------ All entrypoints to SWH code (CLI, gunicorn, celery, …) are, or should -be, intrumented using Sentry. By default this is disabled, but if you +be, instrumented using Sentry. By default this is disabled, but if you run your own Sentry instance, you can use it. To do so, you must get a DSN from your Sentry instance, and set it as @@ -651,7 +651,7 @@ Also, a few containers (``swh-storage``, ``swh-xxx-db``) use a volume for storing the blobs or the database files. With the default configuration provided in the ``docker-compose.yml`` file, these volumes -are not persistant. So removing the containers will delete the volumes! +are not persistent. So removing the containers will delete the volumes! Also note that for the ``swh-objstorage``, since the volume can be pretty big, the remove operation can be quite long (several minutes is diff --git a/docker/conf/cassandra.yaml b/docker/conf/cassandra.yaml --- a/docker/conf/cassandra.yaml +++ b/docker/conf/cassandra.yaml @@ -20,7 +20,7 @@ # Specifying initial_token will override this setting on the node's initial start, # on subsequent starts, this setting will apply even if initial token is set. # -# If you already have a cluster with 1 token per node, and wish to migrate to +# If you already have a cluster with 1 token per node, and wish to migrate to # multiple tokens per node, see http://wiki.apache.org/cassandra/Operations num_tokens: 256 @@ -36,8 +36,8 @@ # allocate_tokens_for_keyspace: KEYSPACE # initial_token allows you to specify tokens manually. While you can use it with -# vnodes (num_tokens > 1, above) -- in which case you should provide a -# comma-separated list -- it's primarily used when adding nodes to legacy clusters +# vnodes (num_tokens > 1, above) -- in which case you should provide a +# comma-separated list -- it's primarily used when adding nodes to legacy clusters # that do not have vnodes enabled. # initial_token: @@ -251,7 +251,7 @@ # # Valid values are either "auto" (omitting the value) or a value greater 0. # -# Note that specifying a too large value will result in long running GCs and possbily +# Note that specifying a too large value will result in long running GCs and possibly # out-of-memory errors. Keep the value at a small fraction of the heap. # # If you constantly see "prepared statements discarded in the last minute because @@ -260,7 +260,7 @@ # i.e. use bind markers for variable parts. # # Do only change the default value, if you really have more prepared statements than -# fit in the cache. In most cases it is not neccessary to change this value. +# fit in the cache. In most cases it is not necessary to change this value. # Constantly re-preparing statements is a performance penalty. # # Default value ("auto") is 1/256th of the heap or 10MB, whichever is greater @@ -369,8 +369,8 @@ saved_caches_directory: /var/lib/cassandra/saved_caches -# commitlog_sync may be either "periodic" or "batch." -# +# commitlog_sync may be either "periodic" or "batch." +# # When in batch mode, Cassandra won't ack writes until the commit log # has been fsynced to disk. It will wait # commitlog_sync_batch_window_in_ms milliseconds between fsyncs. @@ -419,7 +419,7 @@ # any class that implements the SeedProvider interface and has a # constructor that takes a Map of parameters will do. seed_provider: - # Addresses of hosts that are deemed contact points. + # Addresses of hosts that are deemed contact points. # Cassandra nodes use this list of hosts to find each other and learn # the topology of the ring. You must change this if you are running # multiple nodes! @@ -773,7 +773,7 @@ snapshot_before_compaction: false # Whether or not a snapshot is taken of the data before keyspace truncation -# or dropping of column families. The STRONGLY advised default of true +# or dropping of column families. The STRONGLY advised default of true # should be used to provide data safety. If you set this flag to false, you will # lose data on truncation or drop. auto_snapshot: true @@ -783,7 +783,7 @@ # number of rows per partition. The competing goals are these: # # - a smaller granularity means more index entries are generated -# and looking up rows withing the partition by collation column +# and looking up rows within the partition by collation column # is faster # - but, Cassandra will keep the collation index in memory for hot # rows (as part of the key cache), so a larger granularity means @@ -810,7 +810,7 @@ # # concurrent_compactors defaults to the smaller of (number of disks, # number of cores), with a minimum of 2 and a maximum of 8. -# +# # If your data directories are backed by SSD, you should increase this # to the number of cores. #concurrent_compactors: 1 @@ -825,7 +825,7 @@ # When compacting, the replacement sstable(s) can be opened before they # are completely written, and used in place of the prior sstables for -# any range that has been written. This helps to smoothly transfer reads +# any range that has been written. This helps to smoothly transfer reads # between the sstables, reducing page cache churn and keeping hot rows hot sstable_preemptive_open_interval_in_mb: 50 @@ -869,7 +869,7 @@ # Enable operation timeout information exchange between nodes to accurately # measure request timeouts. If disabled, replicas will assume that requests # were forwarded to them instantly by the coordinator, which means that -# under overload conditions we will waste that much extra time processing +# under overload conditions we will waste that much extra time processing # already-timed-out requests. # # Warning: before enabling this property make sure to ntp is installed @@ -955,7 +955,7 @@ # controls how often to perform the more expensive part of host score # calculation -dynamic_snitch_update_interval_in_ms: 100 +dynamic_snitch_update_interval_in_ms: 100 # controls how often to reset all host scores, allowing a bad host to # possibly recover dynamic_snitch_reset_interval_in_ms: 600000 @@ -989,7 +989,7 @@ # RoundRobin # throttle_limit # The throttle_limit is the number of in-flight -# requests per client. Requests beyond +# requests per client. Requests beyond # that limit are queued up until # running requests can complete. # The value of 80 here is twice the number of @@ -1118,7 +1118,7 @@ # Enables encrypting data at-rest (on disk). Different key providers can be plugged in, but the default reads from # a JCE-style keystore. A single keystore can hold multiple keys, but the one referenced by -# the "key_alias" is the only key that will be used for encrypt opertaions; previously used keys +# the "key_alias" is the only key that will be used for encrypt operations; previously used keys # can still (and should!) be in the keystore and will be used on decrypt operations # (to handle the case of key rotation). # @@ -1135,9 +1135,9 @@ key_alias: testing:1 # CBC IV length for AES needs to be 16 bytes (which is also the default size) # iv_length: 16 - key_provider: + key_provider: - class_name: org.apache.cassandra.security.JKSKeyProvider - parameters: + parameters: - keystore: conf/.keystore keystore_password: cassandra store_type: JCEKS diff --git a/docker/conf/prometheus-jmx-exporter-cassandra.yml b/docker/conf/prometheus-jmx-exporter-cassandra.yml --- a/docker/conf/prometheus-jmx-exporter-cassandra.yml +++ b/docker/conf/prometheus-jmx-exporter-cassandra.yml @@ -4,8 +4,8 @@ startDelaySeconds: 0 hostPort: cassandra-seed:7199 -username: -password: +username: +password: #jmxUrl: service:jmx:rmi:///jndi/rmi://127.0.0.1:1234/jmxrmi ssl: false lowercaseOutputName: false diff --git a/docker/services/keycloak/keycloak_swh_setup.py b/docker/services/keycloak/keycloak_swh_setup.py --- a/docker/services/keycloak/keycloak_swh_setup.py +++ b/docker/services/keycloak/keycloak_swh_setup.py @@ -6,8 +6,8 @@ # See top-level LICENSE file for more information import logging -from keycloak import KeycloakAdmin +from keycloak import KeycloakAdmin SERVER_URL = "http://localhost:8080/keycloak/auth/" REALM_NAME = "SoftwareHeritage" diff --git a/docker/tests/conftest.py b/docker/tests/conftest.py --- a/docker/tests/conftest.py +++ b/docker/tests/conftest.py @@ -1,25 +1,22 @@ -# Copyright (C) 2019-2020 The Software Heritage developers +# Copyright (C) 2019-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information +from os.path import join import re import subprocess import time - -import requests - -from os.path import join from typing import Generator, Mapping, Tuple from urllib.parse import urljoin import pytest +import requests import testinfra +APIURL = "http://127.0.0.1:5080/api/1/" -APIURL = 'http://127.0.0.1:5080/api/1/' - -SAMPLE_METADATA = '''\ +SAMPLE_METADATA = """\ @@ -30,20 +27,20 @@ No One -''' +""" # scope='session' so we use the same container for all the tests; -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def docker_compose(request): # start the whole cluster - subprocess.check_output(['docker-compose', 'up', '-d']) + subprocess.check_output(["docker-compose", "up", "-d"]) yield # and strop it - subprocess.check_call(['docker-compose', 'down']) + subprocess.check_call(["docker-compose", "down"]) -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def wfi_timeout(): """ wait-for-it timeout in seconds @@ -51,95 +48,98 @@ return 60 -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def scheduler_host(request, docker_compose, wfi_timeout): # run a container in which test commands are executed - docker_id = subprocess.check_output( - ['docker-compose', 'run', '-d', - 'swh-scheduler', 'shell', 'sleep', '1h']).decode().strip() + docker_id = ( + subprocess.check_output( + ["docker-compose", "run", "-d", "swh-scheduler", "shell", "sleep", "1h"] + ) + .decode() + .strip() + ) scheduler_host = testinfra.get_host("docker://" + docker_id) - scheduler_host.check_output( - f'wait-for-it swh-scheduler:5008 -t {wfi_timeout}') - scheduler_host.check_output( - f'wait-for-it swh-storage:5002 -t {wfi_timeout}') + scheduler_host.check_output(f"wait-for-it swh-scheduler:5008 -t {wfi_timeout}") + scheduler_host.check_output(f"wait-for-it swh-storage:5002 -t {wfi_timeout}") # return a testinfra connection to the container yield scheduler_host # at the end of the test suite, destroy the container - subprocess.check_call(['docker', 'rm', '-f', docker_id]) + subprocess.check_call(["docker", "rm", "-f", docker_id]) # scope='session' so we use the same container for all the tests; -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def deposit_host(request, docker_compose, wfi_timeout): # run a container in which test commands are executed - docker_id = subprocess.check_output( - ['docker-compose', 'run', '-d', - 'swh-deposit', 'shell', 'sleep', '1h']).decode().strip() + docker_id = ( + subprocess.check_output( + ["docker-compose", "run", "-d", "swh-deposit", "shell", "sleep", "1h"] + ) + .decode() + .strip() + ) deposit_host = testinfra.get_host("docker://" + docker_id) - deposit_host.check_output( - 'echo \'print("Hello World!")\n\' > /tmp/hello.py') - deposit_host.check_output( - 'tar -C /tmp -czf /tmp/archive.tgz /tmp/hello.py') - deposit_host.check_output( - f'echo \'{SAMPLE_METADATA}\' > /tmp/metadata.xml') - deposit_host.check_output( - f'wait-for-it swh-deposit:5006 -t {wfi_timeout}') + deposit_host.check_output("echo 'print(\"Hello World!\")\n' > /tmp/hello.py") + deposit_host.check_output("tar -C /tmp -czf /tmp/archive.tgz /tmp/hello.py") + deposit_host.check_output(f"echo '{SAMPLE_METADATA}' > /tmp/metadata.xml") + deposit_host.check_output(f"wait-for-it swh-deposit:5006 -t {wfi_timeout}") # return a testinfra connection to the container yield deposit_host # at the end of the test suite, destroy the container - subprocess.check_call(['docker', 'rm', '-f', docker_id]) + subprocess.check_call(["docker", "rm", "-f", docker_id]) -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def git_url(): - return 'https://forge.softwareheritage.org/source/swh-core' + return "https://forge.softwareheritage.org/source/swh-core" -@pytest.fixture(scope='session') +@pytest.fixture(scope="session") def git_origin(scheduler_host, git_url): - task = scheduler_host.check_output( - f'swh scheduler task add load-git url={git_url}' - ) - taskid = re.search(r'^Task (?P\d+)$', task, - flags=re.MULTILINE).group('id') + task = scheduler_host.check_output(f"swh scheduler task add load-git url={git_url}") + taskid = re.search(r"^Task (?P\d+)$", task, flags=re.MULTILINE).group("id") assert int(taskid) > 0 for i in range(60): status = scheduler_host.check_output( - f'swh scheduler task list --list-runs --task-id {taskid}') - if 'Executions:' in status: - if '[eventful]' in status: + f"swh scheduler task list --list-runs --task-id {taskid}" + ) + if "Executions:" in status: + if "[eventful]" in status: break - if '[started]' in status or '[scheduled]' in status: + if "[started]" in status or "[scheduled]" in status: time.sleep(1) continue - if '[failed]' in status: + if "[failed]" in status: loader_logs = subprocess.check_output( - ['docker-compose', 'logs', 'swh-loader']) - assert False, ('Loading execution failed\n' - f'status: {status}\n' - f'loader logs: ' - + loader_logs.decode(errors='replace')) - assert False, f'Loading execution failed, task status is {status}' + ["docker-compose", "logs", "swh-loader"] + ) + assert False, ( + "Loading execution failed\n" + f"status: {status}\n" + f"loader logs: " + loader_logs.decode(errors="replace") + ) + assert False, f"Loading execution failed, task status is {status}" return git_url # Utility functions -def apiget(path: str, verb: str = 'GET', **kwargs): + +def apiget(path: str, verb: str = "GET", **kwargs): """Query the API at path and return the json result or raise an AssertionError""" url = urljoin(APIURL, path) resp = requests.request(verb, url, **kwargs) - assert resp.status_code == 200, f'failed to retrieve {url}: {resp.text}' + assert resp.status_code == 200, f"failed to retrieve {url}: {resp.text}" return resp.json() -def pollapi(path: str, verb: str = 'GET', **kwargs): +def pollapi(path: str, verb: str = "GET", **kwargs): """Poll the API at path until it returns an OK result""" url = urljoin(APIURL, path) for i in range(60): @@ -152,13 +152,14 @@ return resp -def getdirectory(dirid: str, currentpath: str = '') \ - -> Generator[Tuple[str, Mapping], None, None]: +def getdirectory( + dirid: str, currentpath: str = "" +) -> Generator[Tuple[str, Mapping], None, None]: """Recursively retrieve directory description from the archive""" - directory = apiget(f'directory/{dirid}') + directory = apiget(f"directory/{dirid}") for direntry in directory: - path = join(currentpath, direntry['name']) - if direntry['type'] != 'dir': + path = join(currentpath, direntry["name"]) + if direntry["type"] != "dir": yield (path, direntry) else: - yield from getdirectory(direntry['target'], path) + yield from getdirectory(direntry["target"], path) diff --git a/docker/tests/test_02_vault.sh b/docker/tests/test_02_vault.sh --- a/docker/tests/test_02_vault.sh +++ b/docker/tests/test_02_vault.sh @@ -8,7 +8,7 @@ status_message "Waiting for the directory cooking task to complete" wait_for_service_output 300 swh-vault-worker "swh.vault.cooking_tasks.SWHCookingTask.*succeeded" -status_message "The directory cooking task has been sucessfully executed" +status_message "The directory cooking task has been successfully executed" status_message "Checking that the cooked directory tarball can be downloaded" http_request_check GET ${SWH_WEB_API_BASEURL}/vault/directory/$directory/raw/ @@ -19,7 +19,7 @@ status_message "Waiting for the revision cooking task to complete" wait_for_service_output 300 swh-vault-worker "swh.vault.cooking_tasks.SWHCookingTask.*succeeded" -status_message "The revision cooking task has been sucessfully executed" +status_message "The revision cooking task has been successfully executed" status_message "Checking that the cooked revision tarball can be downloaded" http_request_check GET ${SWH_WEB_API_BASEURL}/vault/revision/$revision/gitfast/raw/ diff --git a/docker/tests/test_deposit.py b/docker/tests/test_deposit.py --- a/docker/tests/test_deposit.py +++ b/docker/tests/test_deposit.py @@ -1,4 +1,4 @@ -# Copyright (C) 2019-2020 The Software Heritage developers +# Copyright (C) 2019-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -9,32 +9,39 @@ def test_admin_collection(deposit_host): # 'deposit_host' binds to the container - assert deposit_host.check_output( - 'swh deposit admin collection list') == 'test' + assert deposit_host.check_output("swh deposit admin collection list") == "test" def test_admin_user(deposit_host): - assert deposit_host.check_output('swh deposit admin user list') == 'test' + assert deposit_host.check_output("swh deposit admin user list") == "test" def test_create_deposit_simple(deposit_host): deposit = deposit_host.check_output( - 'swh deposit upload --format json --username test --password test ' - '--url http://nginx:5080/deposit/1 ' - '--archive /tmp/archive.tgz ' - '--name test_deposit --author somebody') + "swh deposit upload --format json --username test --password test " + "--url http://nginx:5080/deposit/1 " + "--archive /tmp/archive.tgz " + "--name test_deposit --author somebody" + ) deposit = json.loads(deposit) - assert set(deposit.keys()) == {'deposit_id', 'deposit_status', - 'deposit_status_detail', 'deposit_date'} - assert deposit['deposit_status'] == 'deposited' - deposit_id = deposit['deposit_id'] + assert set(deposit.keys()) == { + "deposit_id", + "deposit_status", + "deposit_status_detail", + "deposit_date", + } + assert deposit["deposit_status"] == "deposited" + deposit_id = deposit["deposit_id"] for i in range(60): - status = json.loads(deposit_host.check_output( - 'swh deposit status --format json --username test --password test ' - '--url http://nginx:5080/deposit/1 --deposit-id %s' % deposit_id)) - if status['deposit_status'] == 'done': + status = json.loads( + deposit_host.check_output( + "swh deposit status --format json --username test --password test " + "--url http://nginx:5080/deposit/1 --deposit-id %s" % deposit_id + ) + ) + if status["deposit_status"] == "done": break time.sleep(1) else: @@ -43,22 +50,30 @@ def test_create_deposit_with_metadata(deposit_host): deposit = deposit_host.check_output( - 'swh deposit upload --format json --username test --password test ' - '--url http://nginx:5080/deposit/1 ' - '--archive /tmp/archive.tgz ' - '--metadata /tmp/metadata.xml') + "swh deposit upload --format json --username test --password test " + "--url http://nginx:5080/deposit/1 " + "--archive /tmp/archive.tgz " + "--metadata /tmp/metadata.xml" + ) deposit = json.loads(deposit) - assert set(deposit.keys()) == {'deposit_id', 'deposit_status', - 'deposit_status_detail', 'deposit_date'} - assert deposit['deposit_status'] == 'deposited' - deposit_id = deposit['deposit_id'] + assert set(deposit.keys()) == { + "deposit_id", + "deposit_status", + "deposit_status_detail", + "deposit_date", + } + assert deposit["deposit_status"] == "deposited" + deposit_id = deposit["deposit_id"] for i in range(60): - status = json.loads(deposit_host.check_output( - 'swh deposit status --format json --username test --password test ' - '--url http://nginx:5080/deposit/1 --deposit-id %s' % deposit_id)) - if status['deposit_status'] == 'done': + status = json.loads( + deposit_host.check_output( + "swh deposit status --format json --username test --password test " + "--url http://nginx:5080/deposit/1 --deposit-id %s" % deposit_id + ) + ) + if status["deposit_status"] == "done": break time.sleep(1) else: @@ -67,32 +82,40 @@ def test_create_deposit_multipart(deposit_host): deposit = deposit_host.check_output( - 'swh deposit upload --format json --username test --password test ' - '--url http://nginx:5080/deposit/1 ' - '--archive /tmp/archive.tgz ' - '--partial') + "swh deposit upload --format json --username test --password test " + "--url http://nginx:5080/deposit/1 " + "--archive /tmp/archive.tgz " + "--partial" + ) deposit = json.loads(deposit) - assert set(deposit.keys()) == {'deposit_id', 'deposit_status', - 'deposit_status_detail', 'deposit_date'} - assert deposit['deposit_status'] == 'partial' - deposit_id = deposit['deposit_id'] + assert set(deposit.keys()) == { + "deposit_id", + "deposit_status", + "deposit_status_detail", + "deposit_date", + } + assert deposit["deposit_status"] == "partial" + deposit_id = deposit["deposit_id"] deposit = deposit_host.check_output( - 'swh deposit upload --format json --username test --password test ' - '--url http://nginx:5080/deposit/1 ' - '--metadata /tmp/metadata.xml ' - '--deposit-id %s' - % deposit_id) + "swh deposit upload --format json --username test --password test " + "--url http://nginx:5080/deposit/1 " + "--metadata /tmp/metadata.xml " + "--deposit-id %s" % deposit_id + ) deposit = json.loads(deposit) - assert deposit['deposit_status'] == 'deposited' - assert deposit['deposit_id'] == deposit_id + assert deposit["deposit_status"] == "deposited" + assert deposit["deposit_id"] == deposit_id for i in range(60): - status = json.loads(deposit_host.check_output( - 'swh deposit status --format json --username test --password test ' - '--url http://nginx:5080/deposit/1 --deposit-id %s' % deposit_id)) - if status['deposit_status'] == 'done': + status = json.loads( + deposit_host.check_output( + "swh deposit status --format json --username test --password test " + "--url http://nginx:5080/deposit/1 --deposit-id %s" % deposit_id + ) + ) + if status["deposit_status"] == "done": break time.sleep(1) else: diff --git a/docker/tests/test_git_loader.py b/docker/tests/test_git_loader.py --- a/docker/tests/test_git_loader.py +++ b/docker/tests/test_git_loader.py @@ -1,4 +1,4 @@ -# Copyright (C) 2019-2020 The Software Heritage developers +# Copyright (C) 2019-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information @@ -11,20 +11,20 @@ def test_git_loader(scheduler_host, git_origin): url = git_origin - print(f'Retrieve references available at {url}') - gitrefs = scheduler_host.check_output(f'git ls-remote {url}') + print(f"Retrieve references available at {url}") + gitrefs = scheduler_host.check_output(f"git ls-remote {url}") gitrefs = [x.split() for x in gitrefs.splitlines()] - print(f'Look for origin {url}') + print(f"Look for origin {url}") # use quote_plus to prevent urljoin from messing with the 'http://' part of # the url - origin = apiget(f'origin/{quote_plus(url)}/get') - assert origin['url'] == url + origin = apiget(f"origin/{quote_plus(url)}/get") + assert origin["url"] == url - visit = apiget(f'origin/{quote_plus(url)}/visit/latest') - assert visit['status'] == 'full' + visit = apiget(f"origin/{quote_plus(url)}/visit/latest") + assert visit["status"] == "full" - print('Check every identified git ref has been loaded') + print("Check every identified git ref has been loaded") snapshot = apiget(f'snapshot/{visit["snapshot"]}') print(f'snapshot has {len(snapshot["branches"])} branches') @@ -33,30 +33,29 @@ # check every branch reported by git ls-remote is present in the snapshot for rev, branch_name in gitrefs: # for tags, only check for final revision id - if branch_name.startswith('refs/tags/') \ - and not branch_name.endswith('^{}'): + if branch_name.startswith("refs/tags/") and not branch_name.endswith("^{}"): continue - rev_desc = apiget(f'revision/{rev}') - assert rev_desc['type'] == 'git' + rev_desc = apiget(f"revision/{rev}") + assert rev_desc["type"] == "git" tag_revision = {} tag_release = {} for rev, tag in gitrefs: - if tag.startswith('refs/tags/'): - if tag.endswith('^{}'): + if tag.startswith("refs/tags/"): + if tag.endswith("^{}"): tag_revision[tag[:-3]] = rev else: tag_release[tag] = rev for tag, revision in tag_revision.items(): # check that every release tag listed in the snapshot is known by the - # archive and consistant + # archive and consistent release_id = tag_release[tag] - release = apiget(f'release/{release_id}') - assert release['id'] == release_id - assert release['target_type'] == 'revision' - assert release['target'] == revision + release = apiget(f"release/{release_id}") + assert release["id"] == release_id + assert release["target_type"] == "revision" + assert release["target"] == revision # and compare this with what git ls-remote reported tag_desc = branches[tag] - assert tag_desc['target_type'] == 'release' - assert tag_desc['target'] == release_id + assert tag_desc["target_type"] == "release" + assert tag_desc["target"] == release_id diff --git a/docker/tests/test_vault.py b/docker/tests/test_vault.py --- a/docker/tests/test_vault.py +++ b/docker/tests/test_vault.py @@ -1,13 +1,12 @@ -# Copyright (C) 2019-2020 The Software Heritage developers +# Copyright (C) 2019-2021 The Software Heritage developers # See the AUTHORS file at the top-level directory of this distribution # License: GNU General Public License version 3, or any later version # See top-level LICENSE file for more information -import io import hashlib -import tarfile - +import io from os.path import join +import tarfile from urllib.parse import quote_plus from .conftest import apiget, getdirectory, pollapi @@ -16,23 +15,23 @@ def test_vault_directory(scheduler_host, git_origin): # retrieve the root directory of the master branch of the ingested git # repository (by the git_origin fixture) - visit = apiget(f'origin/{quote_plus(git_origin)}/visit/latest') + visit = apiget(f"origin/{quote_plus(git_origin)}/visit/latest") snapshot = apiget(f'snapshot/{visit["snapshot"]}') rev_id = snapshot["branches"]["refs/heads/master"]["target"] - revision = apiget(f'revision/{rev_id}') - dir_id = revision['directory'] + revision = apiget(f"revision/{rev_id}") + dir_id = revision["directory"] # now cook it - cook = apiget(f'vault/directory/{dir_id}/', 'POST') - assert cook['obj_type'] == 'directory' - assert cook['obj_id'] == dir_id - assert cook['fetch_url'].endswith(f'vault/directory/{dir_id}/raw/') + cook = apiget(f"vault/directory/{dir_id}/", "POST") + assert cook["obj_type"] == "directory" + assert cook["obj_id"] == dir_id + assert cook["fetch_url"].endswith(f"vault/directory/{dir_id}/raw/") # while it's cooking, get the directory tree from the archive directory = getdirectory(dir_id) # retrieve the cooked tar file - resp = pollapi(f'vault/directory/{dir_id}/raw') + resp = pollapi(f"vault/directory/{dir_id}/raw") tarf = tarfile.open(fileobj=io.BytesIO(resp.content)) # and check the tarfile seems ok wrt. 'directory' @@ -42,22 +41,22 @@ for fname, fdesc in directory: tfinfo = tarfiles.get(join(dir_id, fname)) assert tfinfo, f"Missing path {fname} in retrieved tarfile" - if fdesc['type'] == 'file': - assert fdesc['length'] == tfinfo.size, \ - f"File {fname}: length mismatch" + if fdesc["type"] == "file": + assert fdesc["length"] == tfinfo.size, f"File {fname}: length mismatch" fdata = tarf.extractfile(tfinfo).read() - for algo in fdesc['checksums']: + for algo in fdesc["checksums"]: if algo not in hashlib.algorithms_available: continue hash = hashlib.new(algo, fdata).hexdigest() - assert hash == fdesc['checksums'][algo], \ - f"File {fname}: {algo} mismatch" + assert ( + hash == fdesc["checksums"][algo] + ), f"File {fname}: {algo} mismatch" # XXX what to check for dir? symlink? (other?) # check that if we ask a second time this directory, it returns the same # and does not cook it again - recook = apiget(f'vault/directory/{dir_id}/', 'POST') - assert recook['obj_type'] == 'directory' - assert recook['obj_id'] == dir_id - assert recook['id'] == cook['id'] - assert recook['status'] == 'done' # no need to wait for this to be true + recook = apiget(f"vault/directory/{dir_id}/", "POST") + assert recook["obj_type"] == "directory" + assert recook["obj_id"] == dir_id + assert recook["id"] == cook["id"] + assert recook["status"] == "done" # no need to wait for this to be true diff --git a/pylintrc b/pylintrc --- a/pylintrc +++ b/pylintrc @@ -60,7 +60,7 @@ # can either give multiple identifiers separated by comma (,) or put this # option multiple times (only on the command line, not in the configuration # file where it should appear only once).You can also use "--disable=all" to -# disable everything first and then reenable specific checks. For example, if +# disable everything first and then re-enable specific checks. For example, if # you want to run only the similarities checker, you can use "--disable=all # --enable=similarities". If you want to run only the classes checker, but have # no Warning level messages displayed, use"--disable=all --enable=classes diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,11 @@ +[tool.black] +target-version = ['py37'] + +[tool.isort] +multi_line_output = 3 +include_trailing_comma = true +force_grid_wrap = 0 +use_parentheses = true +ensure_newline_before_comments = true +line_length = 88 +force_sort_within_sections = true diff --git a/setup.cfg b/setup.cfg new file mode 100644 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,6 @@ +[flake8] +# E203: whitespaces before ':' +# E231: missing whitespace after ',' +# W503: line break before binary operator +ignore = E203,E231,W503 +max-line-length = 88