diff --git a/docker/conf/storage-read-replica.yml b/docker/conf/storage-read-replica.yml new file mode 100644 --- /dev/null +++ b/docker/conf/storage-read-replica.yml @@ -0,0 +1,13 @@ +storage: + cls: local + args: + db: postgresql:///?service=swh-storage-read-replica + objstorage: + cls: filtered + args: + storage_conf: + cls: remote + args: + url: http://swh-objstorage:5003/ + filters_conf: + - type: readonly diff --git a/docker/conf/vault-read-replica.yml b/docker/conf/vault-read-replica.yml new file mode 100644 --- /dev/null +++ b/docker/conf/vault-read-replica.yml @@ -0,0 +1,17 @@ +storage: + cls: remote + args: + url: http://swh-storage-read-replica:5002/ +scheduler: + cls: remote + args: + url: http://swh-scheduler:5008/ +vault: + cls: local + args: + db: postgresql:///?service=swh-vault +cache: + cls: pathslicing + args: + root: /srv/softwareheritage/vault + slicing: 0:5 diff --git a/docker/conf/vault-worker-read-replica.yml b/docker/conf/vault-worker-read-replica.yml new file mode 100644 --- /dev/null +++ b/docker/conf/vault-worker-read-replica.yml @@ -0,0 +1,17 @@ +storage: + cls: remote + args: + url: http://swh-storage-read-replica:5002/ +vault: + cls: remote + args: + url: http://swh-vault:5005/ +celery: + task_broker: amqp://guest:guest@amqp// + task_modules: + - swh.vault.cooking_tasks + task_queues: + - swh.vault.cooking_tasks.SWHBatchCookingTask + - swh.vault.cooking_tasks.SWHCookingTask + +max_bundle_size: 536870912 diff --git a/docker/conf/web-read-replica.yml b/docker/conf/web-read-replica.yml new file mode 100644 --- /dev/null +++ b/docker/conf/web-read-replica.yml @@ -0,0 +1,66 @@ +storage: + cls: remote + args: + url: http://swh-storage-read-replica:5002/ + timeout: 1 + +indexer_storage: + cls: remote + args: + url: http://swh-idx-storage:5007/ + +scheduler: + cls: remote + args: + url: http://swh-scheduler:5008/ + +vault: + cls: remote + args: + url: http://swh-vault:5005/ + +deposit: + private_api_url: https://swh-deposit:5006/1/private/ + private_api_user: swhworker + private_api_password: '' + +allowed_hosts: + - "*" + +debug: yes + +serve_assets: yes + +development_db: /tmp/db.sqlite3 +production_db: /tmp/db.sqlite3 + +throttling: + cache_uri: 127.0.0.1:11211 + scopes: + swh_api: + limiter_rate: + default: 120/h + exempted_networks: + - 0.0.0.0/0 + swh_api_origin_search: + limiter_rate: + default: 70/m + exempted_networks: + - 0.0.0.0/0 + swh_api_origin_visit_latest: + limiter_rate: + default: 700/m + exempted_networks: + - 0.0.0.0/0 + swh_vault_cooking: + limiter_rate: + default: 120/h + exempted_networks: + - 0.0.0.0/0 + swh_save_origin: + limiter_rate: + default: 120/h + exempted_networks: + - 0.0.0.0/0 + +search: {} diff --git a/docker/docker-compose.storage-read-replica.yml b/docker/docker-compose.storage-read-replica.yml new file mode 100644 --- /dev/null +++ b/docker/docker-compose.storage-read-replica.yml @@ -0,0 +1,66 @@ +version: '2' + +services: + swh-storage-read-replica-db: + image: postgres:11 + env_file: + - ./env/storage-read-replica-db.env + environment: + # unset PGHOST as db service crashes otherwise + PGHOST: + # unset POSTGRES_DB: we're handling it ourselves + POSTGRES_DB: + + swh-storage-read-replica: + image: swh/stack + build: ./ + ports: + - 5042:5002 + depends_on: + - swh-storage + - swh-storage-db + - swh-storage-read-replica-db + - swh-objstorage + env_file: + - ./env/common_python.env + - ./env/storage-read-replica-db.env + environment: + SWH_CONFIG_FILENAME: /storage.yml + STORAGE_BACKEND: postgresql + entrypoint: /entrypoint.sh + volumes: + - "./conf/storage-read-replica.yml:/storage.yml:ro" + - "./services/swh-storage-read-replica/entrypoint.sh:/entrypoint.sh:ro" + + # override storage db to enable wal_level=logical + swh-storage-db: + command: postgres -c wal_level=logical + + # override web app to use the replica + swh-web: + depends_on: + - swh-storage-read-replica + - swh-idx-storage + environment: + SWH_CONFIG_FILENAME: /web-read-replica.yml + volumes: + - "./conf/web-read-replica.yml:/web-read-replica.yml:ro" + + # override vault to use the replica + swh-vault: + depends_on: + - swh-storage-read-replica + environment: + SWH_CONFIG_FILENAME: /vault-read-replica.yml + volumes: + - "./conf/vault-read-replica.yml:/vault-read-replica.yml:ro" + + swh-vault-worker: + depends_on: + - swh-storage-read-replica + environment: + SWH_CONFIG_FILENAME: /cooker-read-replica.yml + volumes: + - "./conf/vault-worker-read-replica.yml:/cooker-read-replica.yml:ro" + + diff --git a/docker/env/storage-db.env b/docker/env/storage-db.env --- a/docker/env/storage-db.env +++ b/docker/env/storage-db.env @@ -1,3 +1,5 @@ +# Please sync changes in storage-read-replica-db.env + PGHOST=swh-storage-db PGUSER=postgres POSTGRES_PASSWORD=testpassword diff --git a/docker/env/storage-read-replica-db.env b/docker/env/storage-read-replica-db.env new file mode 100644 --- /dev/null +++ b/docker/env/storage-read-replica-db.env @@ -0,0 +1,10 @@ +PGHOST=swh-storage-read-replica-db +PGUSER=postgres +POSTGRES_PASSWORD=testpassword +POSTGRES_DB=swh-storage-read-replica + +# Need to be synced with storage-db.env +PGHOST_SRC=swh-storage-db +PGUSER_SRC=postgres +POSTGRES_PASSWORD_SRC=testpassword +POSTGRES_DB_SRC=swh-storage diff --git a/docker/services/swh-storage-read-replica/entrypoint.sh b/docker/services/swh-storage-read-replica/entrypoint.sh new file mode 100755 --- /dev/null +++ b/docker/services/swh-storage-read-replica/entrypoint.sh @@ -0,0 +1,78 @@ +#!/bin/bash + +set -e + +source /srv/softwareheritage/utils/pyutils.sh +setup_pip + +if [ "$STORAGE_BACKEND" != "postgresql" ]; then + echo "Unsupported STORAGE_BACKEND ${STORAGE_BACKEND}; Only postgresql is supported." + exit 255 +fi + +source /srv/softwareheritage/utils/pgsql.sh +setup_pgsql + +case "$1" in + "shell") + exec bash -i + ;; + *) + wait_pgsql template1 + + echo Database setup + if ! check_pgsql_db_created; then + echo Creating database and extensions... + swh db create --db-name ${POSTGRES_DB} storage + fi + echo Initializing the database... + swh db init --db-name ${POSTGRES_DB} --flavor read_replica storage + + wait_pgsql ${POSTGRES_DB_SRC} ${PGHOST_SRC} + + has_publication=$(\ + psql service=${POSTGRES_DB_SRC} \ + --quiet --no-psqlrc --no-align --tuples-only -v ON_ERROR_STOP=1 \ + -c "select count(*) from pg_publication where pubname='softwareheritage';" \ + ) + + if [ $has_publication -ge 1 ]; then + echo "Publication found on source database" + else + echo "Adding publication to source database" + replication_contents=$(python -c ' +from importlib_metadata import files + +for file in files("swh.storage"): + if str(file).endswith("sql/logical_replication/replication_source.sql"): + print(file.read_text()) +') + psql service=${POSTGRES_DB_SRC} \ + -v ON_ERROR_STOP=1 \ + -c "$replication_contents" + fi + + has_subscription=$(\ + psql service=${POSTGRES_DB_SRC} \ + --quiet --no-psqlrc --no-align --tuples-only -v ON_ERROR_STOP=1 \ + -c "select count(*) from pg_subscription where subname='softwareheritage_replica';" \ + ) + + if [ $has_subscription -ge 1 ]; then + echo "Subscription found on replica database" + else + echo "Adding subscription to replica database" + psql service=${POSTGRES_DB} -c "CREATE SUBSCRIPTION softwareheritage_replica CONNECTION 'host=${PGHOST_SRC} user=${PGUSER_SRC} dbname=${POSTGRES_DB_SRC} password=${POSTGRES_PASSWORD_SRC}' PUBLICATION softwareheritage;" + fi + + echo Starting the swh-storage API server + exec gunicorn --bind 0.0.0.0:5002 \ + --reload \ + --threads 4 \ + --workers 2 \ + --log-level DEBUG \ + --timeout 3600 \ + --config 'python:swh.core.api.gunicorn_config' \ + 'swh.storage.api.server:make_app_from_configfile()' + ;; +esac diff --git a/docker/utils/pgsql.sh b/docker/utils/pgsql.sh --- a/docker/utils/pgsql.sh +++ b/docker/utils/pgsql.sh @@ -14,6 +14,18 @@ port=5432 user=${PGUSER} EOF + + if ! [ -z "$POSTGRES_DB_SRC" ]; then + echo "${PGHOST_SRC}:5432:${POSTGRES_DB_SRC}:${PGUSER_SRC}:${POSTGRES_PASSWORD_SRC}" >> ~/.pgpass + cat >> ~/.pg_service.conf <