diff --git a/Dockerfile b/Dockerfile index 07aeeef..1ea69bb 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,45 +1,47 @@ FROM python:3.6 RUN export DEBIAN_FRONTEND=noninteractive && \ apt-get update && apt-get upgrade -y && \ apt-get install -y \ libapr1-dev \ libaprutil1-dev \ libpq-dev \ libsvn-dev \ libsystemd-dev \ postgresql-client \ wait-for-it && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* RUN pip install --upgrade pip setuptools wheel RUN pip install swh-deposit swh-indexer swh-journal swh-lister swh-loader-debian \ swh-loader-dir swh-loader-git swh-loader-mercurial swh-loader-pypi \ swh-loader-svn swh-loader-tar swh-storage swh-objstorage \ swh-scheduler swh-vault swh-web RUN pip install gunicorn psycopg2-binary # to make pip silent COPY services/swh-deposit/entrypoint.sh /swh-deposit/entrypoint.sh COPY services/swh-indexer-journal-client/entrypoint.sh /swh-indexer-journal-client/entrypoint.sh COPY services/swh-indexer-storage/entrypoint.sh /swh-indexer-storage/entrypoint.sh COPY services/swh-indexer-worker/entrypoint.sh /swh-indexer-worker/entrypoint.sh COPY services/swh-journal-client/entrypoint.sh /swh-journal-client/entrypoint.sh COPY services/swh-journal-publisher/entrypoint.sh /swh-journal-publisher/entrypoint.sh COPY services/swh-listers-worker/entrypoint.sh /swh-listers-worker/entrypoint.sh COPY services/swh-loaders-worker/entrypoint.sh /swh-loaders-worker/entrypoint.sh COPY services/swh-objstorage/entrypoint.sh /swh-objstorage/entrypoint.sh COPY services/swh-scheduler-api/entrypoint.sh /swh-scheduler-api/entrypoint.sh COPY services/swh-scheduler-worker/entrypoint.sh /swh-scheduler-worker/entrypoint.sh COPY services/swh-storage/entrypoint.sh /swh-storage/entrypoint.sh COPY services/swh-storage-listener/entrypoint.sh /swh-storage-listener/entrypoint.sh COPY services/swh-vault/entrypoint.sh /swh-vault/entrypoint.sh COPY services/swh-web/entrypoint.sh /swh-web/entrypoint.sh COPY services/swh-journal-client/client.py /swh-journal-client/ +COPY utils/pgsql.sh /swh-utils/pgsql.sh + RUN useradd -ms /bin/bash swh RUN mkdir -p /srv/softwareheritage/objects diff --git a/conf/deposit.yml b/conf/deposit.yml index 5337bc1..6e50b38 100644 --- a/conf/deposit.yml +++ b/conf/deposit.yml @@ -1,14 +1,14 @@ scheduler: cls: remote args: url: http://swh-scheduler:5008 private: secret_key: prod-in-docker db: host: swh-deposit-db port: 5432 - name: softwareheritage-deposit + name: swh-deposit user: postgres password: testpassword media_root: /tmp/swh-deposit/uploads diff --git a/conf/indexer_storage.yml b/conf/indexer_storage.yml index c0975ee..a5eb85c 100644 --- a/conf/indexer_storage.yml +++ b/conf/indexer_storage.yml @@ -1,4 +1,4 @@ indexer_storage: cls: local args: - db: postgresql:///?service=swh-indexer + db: postgresql:///?service=swh-indexers diff --git a/conf/lister.yml b/conf/lister.yml index 32fd0c6..d6ac2c4 100644 --- a/conf/lister.yml +++ b/conf/lister.yml @@ -1,44 +1,44 @@ storage: cls: remote args: url: http://swh-storage:5002/ scheduler: cls: remote args: url: http://swh-scheduler-api:5008/ lister: cls: local args: - db: postgresql:///?service=swh + db: postgresql:///?service=swh-listers celery: task_broker: amqp://guest:guest@amqp// task_modules: - swh.lister.bitbucket.tasks - swh.lister.debian.tasks - swh.lister.github.tasks - swh.lister.gitlab.tasks - swh.lister.npm.tasks - swh.lister.pypi.tasks task_queues: - swh.lister.bitbucket.tasks.FullBitBucketRelister - swh.lister.bitbucket.tasks.IncrementalBitBucketLister - swh.lister.bitbucket.tasks.RangeBitBucketLister - swh.lister.bitbucket.tasks.ping - swh.lister.debian.tasks.DebianListerTask - swh.lister.debian.tasks.ping - swh.lister.github.tasks.FullGitHubRelister - swh.lister.github.tasks.IncrementalGitHubLister - swh.lister.github.tasks.RangeGitHubLister - swh.lister.github.tasks.ping - swh.lister.gitlab.tasks.FullGitLabRelister - swh.lister.gitlab.tasks.IncrementalGitLabLister - swh.lister.gitlab.tasks.RangeGitLabLister - swh.lister.gitlab.tasks.ping - swh.lister.npm.tasks.NpmIncrementalListerTask - swh.lister.npm.tasks.NpmListerTask - swh.lister.npm.tasks.ping - swh.lister.pypi.tasks.PyPIListerTask - swh.lister.pypi.tasks.ping diff --git a/conf/loader.yml b/conf/loader.yml index a668d25..8a26a3a 100644 --- a/conf/loader.yml +++ b/conf/loader.yml @@ -1,33 +1,29 @@ storage: cls: remote args: url: http://swh-storage:5002/ -lister: - cls: local - args: - db: postgresql:///?service=swh celery: task_broker: amqp://guest:guest@amqp// task_modules: - swh.loader.debian.tasks - swh.loader.dir.tasks - swh.loader.git.tasks - swh.loader.mercurial.tasks - swh.loader.pypi.tasks - swh.loader.svn.tasks - swh.loader.tar.tasks task_queues: - swh.loader.debian.tasks.LoadDebianPackage - swh.loader.dir.tasks.LoadDirRepository - swh.loader.git.tasks.LoadDiskGitRepository - swh.loader.git.tasks.UncompressAndLoadDiskGitRepository - swh.loader.git.tasks.UpdateGitRepository - swh.loader.mercurial.tasks.LoadArchiveMercurial - swh.loader.mercurial.tasks.LoadMercurial - swh.loader.pypi.tasks.LoadPyPI - swh.loader.svn.tasks.DumpMountAndLoadSvnRepository - swh.loader.svn.tasks.LoadSvnRepository - swh.loader.svn.tasks.MountAndLoadSvnRepository - swh.loader.tar.tasks.LoadTarRepository lister_db_url: postgresql:///?service=swh diff --git a/conf/storage.yml b/conf/storage.yml index c4dc0ec..1ce9eb1 100644 --- a/conf/storage.yml +++ b/conf/storage.yml @@ -1,8 +1,8 @@ storage: cls: local args: - db: postgresql:///?service=swh + db: postgresql:///?service=swh-storage objstorage: cls: remote args: url: http://swh-objstorage:5003/ diff --git a/conf/storage_listener.yml b/conf/storage_listener.yml index 6eaeba6..28d876c 100644 --- a/conf/storage_listener.yml +++ b/conf/storage_listener.yml @@ -1,4 +1,4 @@ -database: postgresql:///?service=swh +database: postgresql:///?service=swh-storage brokers: - kafka topic_prefix: swh.tmp.journal.new diff --git a/docker-compose.yml b/docker-compose.yml index 8e89dcd..9ea5af3 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,358 +1,389 @@ version: '2' services: amqp: image: rabbitmq:3.6-management ports: - 5072:5672 # flower: # image: mher/flower # command: --broker=amqp://guest:guest@amqp:5672// --url_prefix=flower # ports: # - 5055:5555 # depends_on: # - amqp zookeeper: image: wurstmeister/zookeeper kafka: image: wurstmeister/kafka ports: - 5092:9092 env_file: ./env/kafka.env depends_on: - zookeeper prometheus: image: prom/prometheus command: # Needed for the reverse-proxy - "--web.external-url=/prometheus" - "--config.file=/etc/prometheus/prometheus.yml" volumes: - "./conf/prometheus.yml:/etc/prometheus/prometheus.yml:ro" restart: unless-stopped prometheus-statsd-exporter: image: prom/statsd-exporter command: - "--statsd.mapping-config=/etc/prometheus/statsd-mapping.yml" volumes: - "./conf/prometheus-statsd-mapping.yml:/etc/prometheus/statsd-mapping.yml:ro" restart: unless-stopped grafana: image: grafana/grafana restart: unless-stopped environment: GF_SERVER_ROOT_URL: http://localhost:5080/grafana volumes: - "./conf/grafana/provisioning:/etc/grafana/provisioning:ro" - "./conf/grafana/dashboards:/var/lib/grafana/dashboards" nginx: image: nginx volumes: - "./conf/nginx.conf:/etc/nginx/nginx.conf:ro" ports: - 5080:5080 # Scheduler swh-scheduler-db: image: postgres:11 + env_file: + - ./env/scheduler-db.env environment: - POSTGRES_PASSWORD: testpassword - POSTGRES_DB: swh-scheduler + # unset PGHOST as db service crashes otherwise + PGHOST: swh-scheduler-api: image: swh/stack build: ./ - env_file: ./env/scheduler.env + env_file: + - ./env/scheduler-db.env + - ./env/scheduler.env entrypoint: /swh-scheduler-api/entrypoint.sh environment: SWH_CONFIG_FILENAME: /scheduler.yml SWH_SCHEDULER_CONFIG_FILE: /scheduler.yml depends_on: - swh-scheduler-db ports: - 5008:5008 volumes: - "./conf/scheduler.yml:/scheduler.yml:ro" swh-scheduler-listener: image: swh/stack build: ./ entrypoint: /swh-scheduler-worker/entrypoint.sh - env_file: ./env/scheduler.env + env_file: + - ./env/scheduler-db.env + - ./env/scheduler.env environment: SWH_CONFIG_FILENAME: /scheduler.yml SWH_SCHEDULER_CONFIG_FILE: /scheduler.yml command: listener depends_on: - swh-scheduler-api - amqp volumes: - "./conf/scheduler.yml:/scheduler.yml:ro" swh-scheduler-runner: image: swh/stack build: ./ entrypoint: /swh-scheduler-worker/entrypoint.sh - env_file: ./env/scheduler.env + env_file: + - ./env/scheduler-db.env + - ./env/scheduler.env environment: SWH_CONFIG_FILENAME: /scheduler.yml SWH_SCHEDULER_CONFIG_FILE: /scheduler.yml command: runner -p 10 depends_on: - swh-scheduler-api - amqp volumes: - "./conf/scheduler.yml:/scheduler.yml:ro" # Graph storage swh-storage-db: image: postgres:11 + env_file: + - ./env/storage-db.env environment: - POSTGRES_PASSWORD: testpassword - POSTGRES_DB: swh-storage + # unset PGHOST as db service crashes otherwise + PGHOST: swh-storage: build: ./ image: swh/stack entrypoint: /swh-storage/entrypoint.sh ports: - 5002:5002 depends_on: - swh-storage-db - swh-objstorage - env_file: ./env/storage.env + env_file: + - ./env/storage-db.env + environment: + SWH_CONFIG_FILENAME: /storage.yml volumes: - "./conf/storage.yml:/storage.yml:ro" # Object storage swh-objstorage: build: ./ image: swh/stack entrypoint: /swh-objstorage/entrypoint.sh ports: - 5003:5003 environment: SWH_CONFIG_FILENAME: /objstorage.yml volumes: - "./conf/objstorage.yml:/objstorage.yml:ro" # Indexer storage swh-idx-storage-db: image: postgres:11 + env_file: + - ./env/indexers-db.env environment: - POSTGRES_PASSWORD: testpassword - POSTGRES_DB: swh-idx-storage + # unset PGHOST as db service crashes otherwise + PGHOST: swh-idx-storage: build: ./ image: swh/stack entrypoint: /swh-indexer-storage/entrypoint.sh ports: - 5007:5007 depends_on: - swh-idx-storage-db + env_file: + - ./env/indexers-db.env environment: - POSTGRES_PASSWORD: testpassword - POSTGRES_DB: swh-idx-storage - PGHOST: swh-idx-storage-db - PGUSER: postgres SWH_CONFIG_FILENAME: /indexer_storage.yml volumes: - "./conf/indexer_storage.yml:/indexer_storage.yml:ro" # Web interface swh-web: build: ./ image: swh/stack entrypoint: /swh-web/entrypoint.sh ports: - 5004:5004 depends_on: - swh-objstorage - swh-storage - swh-idx-storage environment: VERBOSITY: 3 DJANGO_SETTINGS_MODULE: swh.web.settings.development SWH_CONFIG_FILENAME: /web.yml PYTHONPATH: /tmp/swh volumes: - "./conf/web.yml:/web.yml:ro" swh-deposit-db: image: postgres:11 - env_file: ./env/deposit.env + env_file: + - ./env/deposit-db.env + environment: + # unset PGHOST as db service crashes otherwise + PGHOST: swh-deposit: build: ./ entrypoint: /swh-deposit/entrypoint.sh image: swh/stack ports: - 5006:5006 depends_on: - swh-deposit-db - swh-scheduler-api - env_file: ./env/deposit.env - environment: - PGHOST: swh-deposit-db + env_file: + - ./env/deposit-db.env + - ./env/deposit.env volumes: - "./conf/deposit.yml:/deposit.yml:ro" swh-vault-db: image: postgres:11 + env_file: + - ./env/vault-db.env environment: - POSTGRES_PASSWORD: testpassword - POSTGRES_DB: swh-vault + # unset PGHOST as db service crashes otherwise + PGHOST: swh-vault-api: build: ./ image: swh/stack entrypoint: /swh-vault/entrypoint.sh - env_file: ./env/vault.env + env_file: + - ./env/vault-db.env + environment: + SWH_CONFIG_FILENAME: /vault-api.yml command: server ports: - 5005:5005 depends_on: - swh-vault-db - swh-objstorage - swh-storage - swh-scheduler-api volumes: - "./conf/vault-api.yml:/vault-api.yml:ro" swh-vault-worker: build: ./ image: swh/stack entrypoint: /swh-vault/entrypoint.sh command: worker environment: SWH_CONFIG_FILENAME: /cooker.yml depends_on: - swh-vault-api - swh-storage volumes: - "./conf/vault-worker.yml:/cooker.yml:ro" # Lister Celery workers swh-listers-db: image: postgres:11 + env_file: + - ./env/listers-db.env environment: - POSTGRES_PASSWORD: testpassword + # unset PGHOST as db service crashes otherwise + PGHOST: swh-lister: image: swh/stack build: ./ entrypoint: /swh-listers-worker/entrypoint.sh - env_file: ./env/listers.env + env_file: + - ./env/listers-db.env + - ./env/listers.env user: swh environment: STATSD_HOST: prometheus-statsd-exporter STATSD_PORT: 9125 SWH_WORKER_INSTANCE: listers SWH_CONFIG_FILENAME: /lister.yml depends_on: - swh-listers-db - swh-scheduler-api - swh-scheduler-runner - swh-storage - amqp volumes: - "./conf/lister.yml:/lister.yml:ro" # Loader Celery workers swh-loader: image: swh/stack build: ./ entrypoint: /swh-loaders-worker/entrypoint.sh - env_file: ./env/listers.env + env_file: + - ./env/listers.env user: swh environment: STATSD_HOST: prometheus-statsd-exporter STATSD_PORT: 9125 SWH_WORKER_INSTANCE: loader SWH_CONFIG_FILENAME: /loader.yml depends_on: - swh-storage - amqp volumes: - "./conf/loader.yml:/loader.yml:ro" # Indexer Celery workers swh-indexer: image: swh/stack build: ./ user: swh entrypoint: /swh-indexer-worker/entrypoint.sh - env_file: ./env/indexers.env + env_file: + - ./env/indexers-db.env + - ./env/indexers.env environment: STATSD_HOST: prometheus-statsd-exporter STATSD_PORT: 9125 depends_on: - swh-scheduler-runner - swh-idx-storage - swh-storage - swh-objstorage - amqp volumes: - "./conf/indexer.yml:/indexer.yml:ro" # Journal related swh-storage-listener: image: swh/stack build: ./ entrypoint: /swh-storage-listener/entrypoint.sh - env_file: ./env/storage.env + env_file: + - ./env/storage-db.env depends_on: - swh-storage-db - kafka volumes: - "./conf/storage_listener.yml:/etc/softwareheritage/storage/listener.yml:ro" swh-journal-publisher: image: swh/stack build: ./ entrypoint: /swh-journal-publisher/entrypoint.sh environment: SWH_CONFIG_FILENAME: /journal_publisher.yml depends_on: - kafka - swh-storage-listener volumes: - "./conf/journal_publisher.yml:/journal_publisher.yml:ro" swh-journal-client: image: swh/stack build: ./ entrypoint: /swh-journal-client/entrypoint.sh depends_on: - swh-journal-publisher volumes: - "./conf/journal_client.yml:/etc/softwareheritage/journal/logger.yml:ro" swh-indexer-journal-client: image: swh/stack build: ./ entrypoint: /swh-indexer-journal-client/entrypoint.sh depends_on: - swh-journal-publisher - swh-scheduler-api volumes: - "./conf/journal_client.yml:/etc/softwareheritage/indexer/journal_client.yml:ro" diff --git a/env/deposit-db.env b/env/deposit-db.env new file mode 100644 index 0000000..0a4973d --- /dev/null +++ b/env/deposit-db.env @@ -0,0 +1,5 @@ +PGHOST=swh-deposit-db +PGUSER=postgres +POSTGRES_PASSWORD=testpassword +POSTGRES_DB=swh-deposit + diff --git a/env/deposit.env b/env/deposit.env index 29365b0..a57d67f 100644 --- a/env/deposit.env +++ b/env/deposit.env @@ -1,5 +1,2 @@ -POSTGRES_DB=softwareheritage-deposit -POSTGRES_PASSWORD=testpassword -PGUSER=postgres SWH_CONFIG_FILENAME=/deposit.yml -DJANGO_SETTINGS_MODULE=swh.deposit.settings.production \ No newline at end of file +DJANGO_SETTINGS_MODULE=swh.deposit.settings.production diff --git a/env/indexers-db.env b/env/indexers-db.env new file mode 100644 index 0000000..0352306 --- /dev/null +++ b/env/indexers-db.env @@ -0,0 +1,4 @@ +PGHOST=swh-idx-storage-db +PGUSER=postgres +POSTGRES_PASSWORD=testpassword +POSTGRES_DB=swh-idx-storage \ No newline at end of file diff --git a/env/listers.env b/env/listers-db.env similarity index 57% copy from env/listers.env copy to env/listers-db.env index f02519a..5146abc 100644 --- a/env/listers.env +++ b/env/listers-db.env @@ -1,6 +1,4 @@ -CONCURRENCY=1 -MAX_TASKS_PER_CHILD=10 -LOGLEVEL=DEBUG -POSTGRES_PASSWORD=testpassword PGHOST=swh-listers-db PGUSER=postgres +POSTGRES_PASSWORD=testpassword +POSTGRES_DB=swh-listers diff --git a/env/listers.env b/env/listers.env index f02519a..0f02e61 100644 --- a/env/listers.env +++ b/env/listers.env @@ -1,6 +1,3 @@ CONCURRENCY=1 MAX_TASKS_PER_CHILD=10 LOGLEVEL=DEBUG -POSTGRES_PASSWORD=testpassword -PGHOST=swh-listers-db -PGUSER=postgres diff --git a/env/scheduler.env b/env/scheduler-db.env similarity index 56% copy from env/scheduler.env copy to env/scheduler-db.env index 5499b57..8e1de99 100644 --- a/env/scheduler.env +++ b/env/scheduler-db.env @@ -1,7 +1,4 @@ -POSTGRES_PASSWORD=testpassword -POSTGRES_DB=swh-scheduler PGHOST=swh-scheduler-db PGUSER=postgres -SWH_WORKER_INSTANCE=scheduler -LOGLEVEL=INFO -CELERY_BROKER_URL=amqp://amqp// +POSTGRES_PASSWORD=testpassword +POSTGRES_DB=swh-scheduler diff --git a/env/scheduler.env b/env/scheduler.env index 5499b57..8650403 100644 --- a/env/scheduler.env +++ b/env/scheduler.env @@ -1,7 +1,3 @@ -POSTGRES_PASSWORD=testpassword -POSTGRES_DB=swh-scheduler -PGHOST=swh-scheduler-db -PGUSER=postgres SWH_WORKER_INSTANCE=scheduler LOGLEVEL=INFO CELERY_BROKER_URL=amqp://amqp// diff --git a/env/storage-db.env b/env/storage-db.env new file mode 100644 index 0000000..65da09b --- /dev/null +++ b/env/storage-db.env @@ -0,0 +1,4 @@ +PGHOST=swh-storage-db +PGUSER=postgres +POSTGRES_PASSWORD=testpassword +POSTGRES_DB=swh-storage diff --git a/env/vault-db.env b/env/vault-db.env new file mode 100644 index 0000000..2adadcf --- /dev/null +++ b/env/vault-db.env @@ -0,0 +1,4 @@ +POSTGRES_DB=swh-vault +POSTGRES_PASSWORD=testpassword +PGUSER=postgres +PGHOST=swh-vault-db diff --git a/services/swh-deposit/entrypoint.sh b/services/swh-deposit/entrypoint.sh index 17b6def..d992abe 100755 --- a/services/swh-deposit/entrypoint.sh +++ b/services/swh-deposit/entrypoint.sh @@ -1,41 +1,34 @@ #!/bin/bash set -ex if [[ -d /src ]] ; then pwd for src_repo in /src/swh-* ; do pushd $src_repo echo "Installing ${src_repo}" pip install -e . popd done fi -echo "${PGHOST}:5432:${POSTGRES_DB}:${PGUSER}:${POSTGRES_PASSWORD}" > ~/.pgpass -cat > ~/.pg_service.conf < ~/.pgpass -cat > ~/.pg_service.conf < ~/.pgpass -echo "${PGHOST}:5432:${POSTGRES_DB}:${PGUSER}:${POSTGRES_PASSWORD}" >> ~/.pgpass -cat > ~/.pg_service.conf < ~/.pgpass -cat > ~/.pg_service.conf < ~/.pgpass -cat > ~/.pg_service.conf < ~/.pgpass -cat > ~/.pg_service.conf < ~/.pgpass -cat > ~/.pg_service.conf < ~/.pgpass - cat > ~/.pg_service.conf < ~/.pgpass + echo "${PGHOST}:5432:${POSTGRES_DB}:${PGUSER}:${POSTGRES_PASSWORD}" >> ~/.pgpass + cat > ~/.pg_service.conf < /dev/null 2> /dev/null; do sleep 1; done +} \ No newline at end of file