diff --git a/docker/docker-compose.storage-mirror.yml b/docker/docker-compose.storage-mirror.yml index eba2841..f0494cd 100644 --- a/docker/docker-compose.storage-mirror.yml +++ b/docker/docker-compose.storage-mirror.yml @@ -1,66 +1,69 @@ version: '2' services: # override web app to use the mirror swh-web: environment: SWH_CONFIG_FILENAME: /web-mirror.yml volumes: - "./conf/web-mirror.yml:/web-mirror.yml:ro" # create a dedicated db for the mirror swh-storage-mirror-db: image: postgres:11 env_file: - ./env/storage-db-mirror.env environment: # unset PGHOST as db service crashes otherwise PGHOST: + # unset POSTGRES_DB: we're handling db creation ourselves in the backend + # service entrypoint + POSTGRES_DB: # and an RPC server swh-storage-mirror: image: swh/stack build: ./ depends_on: - swh-storage-mirror-db - swh-objstorage env_file: - ./env/storage-db-mirror.env environment: SWH_CONFIG_FILENAME: /storage-mirror.yml entrypoint: /entrypoint.sh volumes: - "./conf/storage-mirror.yml:/storage-mirror.yml:ro" - "./services/swh-storage/entrypoint.sh:/entrypoint.sh:ro" # and the background process that keeps the mirror in sync with the # main graph swh-storage-mirror-replayer: image: swh/stack build: ./ depends_on: - swh-storage-mirror-db - swh-objstorage env_file: - ./env/storage-db-mirror.env environment: SWH_CONFIG_FILENAME: /storage-mirror.yml entrypoint: /entrypoint.sh volumes: - "./conf/storage-mirror.yml:/storage-mirror.yml:ro" - "./services/swh-storage-replayer/entrypoint.sh:/entrypoint.sh:ro" swh-journal-backfiller: image: swh/stack build: ./ entrypoint: /entrypoint.sh environment: SWH_CONFIG_FILENAME: /journal_backfiller.yml env_file: - ./env/storage-db.env depends_on: - swh-storage-db - kafka volumes: - "./conf/journal_backfiller.yml:/journal_backfiller.yml:ro" - "./services/swh-journal-backfiller/entrypoint.sh:/entrypoint.sh:ro" diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 00e502c..d73bc2f 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -1,419 +1,431 @@ version: '2' services: amqp: image: rabbitmq:3.6-management ports: - 5072:5672 # flower: # image: mher/flower # command: --broker=amqp://guest:guest@amqp:5672// --url_prefix=flower # ports: # - 5055:5555 # depends_on: # - amqp zookeeper: image: wurstmeister/zookeeper restart: always kafka: image: wurstmeister/kafka ports: - "5092:9092" env_file: ./env/kafka.env depends_on: - zookeeper kafka-manager: image: hlebalbau/kafka-manager:stable ports: - "5093:9000" environment: ZK_HOSTS: zookeeper:2181 APPLICATION_SECRET: random-secret depends_on: - zookeeper command: -Dpidfile.path=/dev/null volumes: - "./conf/kafka-manager_logback.xml:/kafka-manager/conf/logback.xml:ro" prometheus: image: prom/prometheus depends_on: - prometheus-statsd-exporter command: # Needed for the reverse-proxy - "--web.external-url=/prometheus" - "--config.file=/etc/prometheus/prometheus.yml" volumes: - "./conf/prometheus.yml:/etc/prometheus/prometheus.yml:ro" restart: unless-stopped prometheus-statsd-exporter: image: prom/statsd-exporter command: - "--statsd.mapping-config=/etc/prometheus/statsd-mapping.yml" volumes: - "./conf/prometheus-statsd-mapping.yml:/etc/prometheus/statsd-mapping.yml:ro" restart: unless-stopped prometheus-rabbitmq-exporter: image: kbudde/rabbitmq-exporter restart: unless-stopped environment: SKIP_QUEUES: "RPC_.*" MAX_QUEUES: 5000 RABBIT_URL: http://amqp:15672 LOG_LEVEL: warning grafana: image: grafana/grafana restart: unless-stopped depends_on: - prometheus environment: GF_SERVER_ROOT_URL: http://localhost:5080/grafana volumes: - "./conf/grafana/provisioning:/etc/grafana/provisioning:ro" - "./conf/grafana/dashboards:/var/lib/grafana/dashboards" nginx: image: nginx volumes: - "./conf/nginx.conf:/etc/nginx/nginx.conf:ro" ports: - 5080:5080 # Scheduler swh-scheduler-db: image: postgres:11 env_file: - ./env/common_python.env - ./env/scheduler-db.env environment: # unset PGHOST as db service crashes otherwise PGHOST: + # unset POSTGRES_DB: we're handling db creation ourselves in the backend + # service entrypoint + POSTGRES_DB: swh-scheduler: image: swh/stack build: ./ env_file: - ./env/common_python.env - ./env/scheduler-db.env - ./env/scheduler.env environment: SWH_CONFIG_FILENAME: /scheduler.yml SWH_SCHEDULER_CONFIG_FILE: /scheduler.yml entrypoint: /entrypoint.sh depends_on: - swh-scheduler-db ports: - 5008:5008 volumes: - "./conf/scheduler.yml:/scheduler.yml:ro" - "./services/swh-scheduler/entrypoint.sh:/entrypoint.sh:ro" swh-scheduler-listener: image: swh/stack build: ./ env_file: - ./env/common_python.env - ./env/scheduler-db.env - ./env/scheduler.env environment: SWH_CONFIG_FILENAME: /scheduler.yml SWH_SCHEDULER_CONFIG_FILE: /scheduler.yml entrypoint: /entrypoint.sh command: start-listener depends_on: - swh-scheduler - amqp volumes: - "./conf/scheduler.yml:/scheduler.yml:ro" - "./services/swh-scheduler-worker/entrypoint.sh:/entrypoint.sh:ro" swh-scheduler-runner: image: swh/stack build: ./ env_file: - ./env/common_python.env - ./env/scheduler-db.env - ./env/scheduler.env environment: SWH_CONFIG_FILENAME: /scheduler.yml SWH_SCHEDULER_CONFIG_FILE: /scheduler.yml entrypoint: /entrypoint.sh command: start-runner -p 10 depends_on: - swh-scheduler - amqp volumes: - "./conf/scheduler.yml:/scheduler.yml:ro" - "./services/swh-scheduler-worker/entrypoint.sh:/entrypoint.sh:ro" # Graph storage swh-storage-db: image: postgres:11 env_file: - ./env/storage-db.env environment: # unset PGHOST as db service crashes otherwise PGHOST: + # unset POSTGRES_DB: we're handling db creation ourselves in the backend + # service entrypoint + POSTGRES_DB: swh-storage: image: swh/stack build: ./ ports: - 5002:5002 depends_on: - swh-storage-db - swh-objstorage - kafka env_file: - ./env/common_python.env - ./env/storage-db.env environment: SWH_CONFIG_FILENAME: /storage.yml STORAGE_BACKEND: postgresql entrypoint: /entrypoint.sh volumes: - "./conf/storage.yml:/storage.yml:ro" - "./services/swh-storage/entrypoint.sh:/entrypoint.sh:ro" # Object storage swh-objstorage: build: ./ image: swh/stack ports: - 5003:5003 env_file: - ./env/common_python.env environment: SWH_CONFIG_FILENAME: /objstorage.yml entrypoint: /entrypoint.sh volumes: - "./conf/objstorage.yml:/objstorage.yml:ro" - "./services/swh-objstorage/entrypoint.sh:/entrypoint.sh:ro" # Indexer storage swh-idx-storage-db: image: postgres:11 env_file: - ./env/indexers-db.env environment: # unset PGHOST as db service crashes otherwise PGHOST: + # unset POSTGRES_DB: we're handling db creation ourselves in the backend + # service entrypoint + POSTGRES_DB: swh-idx-storage: image: swh/stack build: ./ ports: - 5007:5007 depends_on: - swh-idx-storage-db env_file: - ./env/common_python.env - ./env/indexers-db.env environment: SWH_CONFIG_FILENAME: /indexer_storage.yml entrypoint: /entrypoint.sh volumes: - "./conf/indexer_storage.yml:/indexer_storage.yml:ro" - "./services/swh-indexer-storage/entrypoint.sh:/entrypoint.sh:ro" # Web interface swh-web: build: ./ image: swh/stack ports: - 5004:5004 depends_on: - swh-storage - swh-idx-storage env_file: - ./env/common_python.env environment: VERBOSITY: 3 DJANGO_SETTINGS_MODULE: swh.web.settings.production SWH_CONFIG_FILENAME: /web.yml entrypoint: /entrypoint.sh volumes: - "./conf/web.yml:/web.yml:ro" - "./services/swh-web/entrypoint.sh:/entrypoint.sh:ro" swh-deposit-db: image: postgres:11 env_file: - ./env/deposit-db.env environment: # unset PGHOST as db service crashes otherwise PGHOST: swh-deposit: image: swh/stack build: ./ ports: - 5006:5006 depends_on: - swh-deposit-db - swh-scheduler env_file: - ./env/common_python.env - ./env/deposit-db.env environment: VERBOSITY: 3 SWH_CONFIG_FILENAME: /deposit.yml DJANGO_SETTINGS_MODULE: swh.deposit.settings.production entrypoint: /entrypoint.sh volumes: - "./conf/deposit.yml:/deposit.yml:ro" - "./services/swh-deposit/entrypoint.sh:/entrypoint.sh:ro" swh-vault-db: image: postgres:11 env_file: - ./env/vault-db.env environment: # unset PGHOST as db service crashes otherwise PGHOST: + # unset POSTGRES_DB: we're handling db creation ourselves in the backend + # service entrypoint + POSTGRES_DB: swh-vault: image: swh/stack build: ./ env_file: - ./env/common_python.env - ./env/vault-db.env environment: SWH_CONFIG_FILENAME: /vault.yml command: server ports: - 5005:5005 depends_on: - swh-vault-db - swh-objstorage - swh-storage - swh-scheduler entrypoint: /entrypoint.sh volumes: - "./conf/vault.yml:/vault.yml:ro" - "./services/swh-vault/entrypoint.sh:/entrypoint.sh:ro" swh-vault-worker: image: swh/stack build: ./ command: worker env_file: - ./env/common_python.env environment: SWH_CONFIG_FILENAME: /cooker.yml depends_on: - swh-vault - swh-storage entrypoint: /entrypoint.sh volumes: - "./conf/vault-worker.yml:/cooker.yml:ro" - "./services/swh-vault/entrypoint.sh:/entrypoint.sh:ro" # Lister Celery workers swh-listers-db: image: postgres:11 env_file: - ./env/listers-db.env environment: # unset PGHOST as db service crashes otherwise PGHOST: swh-lister: image: swh/stack build: ./ env_file: - ./env/common_python.env - ./env/listers-db.env - ./env/workers.env user: swh environment: SWH_WORKER_INSTANCE: listers SWH_CONFIG_FILENAME: /lister.yml depends_on: - swh-listers-db - swh-scheduler - swh-scheduler-runner - swh-storage - amqp entrypoint: /entrypoint.sh volumes: - "./conf/lister.yml:/lister.yml:ro" - "./services/swh-listers-worker/entrypoint.sh:/entrypoint.sh:ro" # Loader + deposit checker Celery workers swh-loader: image: swh/stack build: ./ env_file: - ./env/common_python.env - ./env/workers.env user: swh environment: SWH_WORKER_INSTANCE: loader SWH_CONFIG_FILENAME: /loader.yml entrypoint: /entrypoint.sh depends_on: - swh-storage - swh-scheduler - swh-deposit - amqp volumes: - "./conf/loader.yml:/loader.yml:ro" - "./services/swh-worker/entrypoint.sh:/entrypoint.sh:ro" # Indexer Celery workers swh-indexer: image: swh/stack build: ./ user: swh env_file: - ./env/common_python.env - ./env/indexers-db.env - ./env/workers.env environment: SWH_WORKER_INSTANCE: indexer SWH_CONFIG_FILENAME: /indexer.yml CONCURRENCY: 4 entrypoint: /entrypoint.sh depends_on: - swh-scheduler-runner - swh-idx-storage - swh-storage - swh-objstorage - amqp volumes: - "./conf/indexer.yml:/indexer.yml:ro" - "./services/swh-indexer-worker/entrypoint.sh:/entrypoint.sh:ro" # Journal related swh-indexer-journal-client: image: swh/stack build: ./ entrypoint: /entrypoint.sh env_file: - ./env/common_python.env depends_on: - kafka - swh-storage - swh-scheduler volumes: - "./conf/indexer_journal_client.yml:/etc/softwareheritage/indexer/journal_client.yml:ro" - "./services/swh-indexer-journal-client/entrypoint.sh:/entrypoint.sh:ro" diff --git a/docker/services/swh-indexer-storage/entrypoint.sh b/docker/services/swh-indexer-storage/entrypoint.sh index 69484fe..bbf36dc 100755 --- a/docker/services/swh-indexer-storage/entrypoint.sh +++ b/docker/services/swh-indexer-storage/entrypoint.sh @@ -1,33 +1,37 @@ #!/bin/bash set -e source /srv/softwareheritage/utils/pyutils.sh setup_pip source /srv/softwareheritage/utils/pgsql.sh setup_pgsql case "$1" in "shell") exec bash -i ;; *) - wait_pgsql + wait_pgsql template1 - echo Setup the database - PGPASSWORD=${POSTGRES_PASSWORD} swh db-init \ - --db-name ${POSTGRES_DB} indexer + echo Database setup + if ! check_pgsql_db_created; then + echo Creating database and extensions... + swh db create --db-name ${POSTGRES_DB} indexer + fi + echo Initializing the database... + swh db init --db-name ${POSTGRES_DB} indexer echo Starting the swh-indexer-storage API server exec gunicorn --bind 0.0.0.0:5007 \ --reload \ --threads 2 \ --workers 2 \ --log-level DEBUG \ --timeout 3600 \ --config 'python:swh.core.api.gunicorn_config' \ 'swh.indexer.storage.api.server:make_app_from_configfile()' ;; esac diff --git a/docker/services/swh-scheduler/entrypoint.sh b/docker/services/swh-scheduler/entrypoint.sh index 0468e92..5b8bb95 100755 --- a/docker/services/swh-scheduler/entrypoint.sh +++ b/docker/services/swh-scheduler/entrypoint.sh @@ -1,35 +1,39 @@ #!/bin/bash set -e source /srv/softwareheritage/utils/pyutils.sh source /srv/softwareheritage/utils/pgsql.sh setup_pgsql setup_pip if [ "$1" = 'shell' ] ; then shift if (( $# == 0)); then exec bash -i else "$@" fi else - wait_pgsql + wait_pgsql template1 - echo Setup the swh-scheduler API database - PGPASSWORD=${POSTGRES_PASSWORD} swh db-init \ - --db-name ${POSTGRES_DB} scheduler + echo swh-scheduler database setup + if ! check_pgsql_db_created; then + echo Creating database and extensions... + swh db create --db-name ${POSTGRES_DB} scheduler + fi + echo Initializing the database... + swh db init --db-name ${POSTGRES_DB} scheduler echo Starting the swh-scheduler API server exec gunicorn --bind 0.0.0.0:5008 \ --log-level DEBUG \ --threads 2 \ --workers 2 \ --reload \ --timeout 3600 \ --config 'python:swh.core.api.gunicorn_config' \ 'swh.scheduler.api.server:make_app_from_configfile()' fi diff --git a/docker/services/swh-storage/entrypoint.sh b/docker/services/swh-storage/entrypoint.sh index 5b3f3e4..32b0895 100755 --- a/docker/services/swh-storage/entrypoint.sh +++ b/docker/services/swh-storage/entrypoint.sh @@ -1,46 +1,50 @@ #!/bin/bash set -e source /srv/softwareheritage/utils/pyutils.sh setup_pip if [ "$STORAGE_BACKEND" = "postgresql" ]; then source /srv/softwareheritage/utils/pgsql.sh setup_pgsql elif [ "$STORAGE_BACKEND" = "cassandra" ]; then echo Waiting for Cassandra to start wait-for-it ${CASSANDRA_SEED}:9042 -s --timeout=0 echo Creating keyspace cat << EOF | python3 from swh.storage.cassandra import create_keyspace create_keyspace(['cassandra-seed'], 'swh') EOF fi case "$1" in "shell") exec bash -i ;; *) if [ "$STORAGE_BACKEND" = "postgresql" ]; then - wait_pgsql - - echo Setup the database - PGPASSWORD=${POSTGRES_PASSWORD} swh db-init \ - --db-name ${POSTGRES_DB} storage + wait_pgsql template1 + + echo Database setup + if ! check_pgsql_db_created; then + echo Creating database and extensions... + swh db create --db-name ${POSTGRES_DB} storage + fi + echo Initializing the database... + swh db init --db-name ${POSTGRES_DB} storage fi echo Starting the swh-storage API server exec gunicorn --bind 0.0.0.0:5002 \ --reload \ --threads 4 \ --workers 2 \ --log-level DEBUG \ --timeout 3600 \ --config 'python:swh.core.api.gunicorn_config' \ 'swh.storage.api.server:make_app_from_configfile()' ;; esac diff --git a/docker/services/swh-vault/entrypoint.sh b/docker/services/swh-vault/entrypoint.sh index 2734bfc..242cf7f 100755 --- a/docker/services/swh-vault/entrypoint.sh +++ b/docker/services/swh-vault/entrypoint.sh @@ -1,38 +1,42 @@ #!/bin/bash set -e source /srv/softwareheritage/utils/pyutils.sh setup_pip source /srv/softwareheritage/utils/pgsql.sh setup_pgsql case "$1" in "shell") exec bash -i ;; "worker") echo Starting the swh-vault Celery worker for exec python -m celery worker \ --app=swh.scheduler.celery_backend.config.app \ --pool=prefork --events \ --concurrency=${CONCURRENCY:-1} \ --maxtasksperchild=${MAX_TASKS_PER_CHILD:-10} \ -Ofair --loglevel=${LOGLEVEL:-INFO} --without-gossip \ --without-mingle --without-heartbeat \ --hostname "vault@%h" ;; "server") # ensure the pathslicing root dir for the cache exists mkdir -p /srv/softwareheritage/vault - wait_pgsql + wait_pgsql template1 - echo Setup the swh-vault API database - PGPASSWORD=${POSTGRES_PASSWORD} swh db-init vault \ - --db-name ${POSTGRES_DB} + echo swh-vault Database setup + if ! check_pgsql_db_created; then + echo Creating database and extensions... + swh db create --db-name ${POSTGRES_DB} vault + fi + echo Initializing the database... + swh db init --db-name ${POSTGRES_DB} vault echo Starting the swh-vault API server exec swh vault rpc-serve -C ${SWH_CONFIG_FILENAME} esac diff --git a/docker/utils/pgsql.sh b/docker/utils/pgsql.sh index 6e30e83..5de9fce 100644 --- a/docker/utils/pgsql.sh +++ b/docker/utils/pgsql.sh @@ -1,20 +1,34 @@ #!/bin/bash setup_pgsql () { - echo "${PGHOST}:5432:postgres:${PGUSER}:${POSTGRES_PASSWORD}" > ~/.pgpass + : > ~/.pgpass + + echo "${PGHOST}:5432:template1:${PGUSER}:${POSTGRES_PASSWORD}" >> ~/.pgpass + echo "${PGHOST}:5432:${PGUSER}:${PGUSER}:${POSTGRES_PASSWORD}" >> ~/.pgpass echo "${PGHOST}:5432:${POSTGRES_DB}:${PGUSER}:${POSTGRES_PASSWORD}" >> ~/.pgpass cat > ~/.pg_service.conf < /dev/null 2> /dev/null; do sleep 1; done -} \ No newline at end of file + until psql "dbname=${db_to_check} port=5432 host=${PGHOST} user=${PGUSER}" -c "select 'postgresql is up!' as connected"; do sleep 1; done +} + +check_pgsql_db_created () { + psql "dbname=${POSTGRES_DB} port=5432 host=${PGHOST} user=${PGUSER}" -c "select 'postgresql is up!' as connected" >/dev/null 2>/dev/null +}