diff --git a/conf/journal_client.yml b/conf/journal_client.yml index 3bda7ee..6c7a2be 100644 --- a/conf/journal_client.yml +++ b/conf/journal_client.yml @@ -1,4 +1,8 @@ brokers: - kafka # small number of batch max_messages: 1 +scheduler: + cls: remote + args: + url: http://swh-scheduler-api:5008/ diff --git a/conf/lister.yml b/conf/lister.yml index 94ffe23..887cc2b 100644 --- a/conf/lister.yml +++ b/conf/lister.yml @@ -1,14 +1,26 @@ storage: cls: remote args: url: http://swh-storage:5002/ scheduler: cls: remote args: url: http://swh-scheduler-api:5008/ +lister: + cls: local + args: + db: postgresql:///?service=swh + celery: task_broker: amqp://guest:guest@amqp// + task_modules: + - swh.lister.bitbucket.tasks + - swh.lister.debian.tasks + - swh.lister.github.tasks + - swh.lister.gitlab.tasks + - swh.lister.npm.tasks + - swh.lister.pypi.tasks lister_db_url: postgresql:///?service=swh diff --git a/conf/loader.yml b/conf/loader.yml index 76499ad..df20afe 100644 --- a/conf/loader.yml +++ b/conf/loader.yml @@ -1,7 +1,19 @@ storage: cls: remote args: url: http://swh-storage:5002/ -lister_db_url: postgresql:///?service=swh +lister: + cls: local + args: + db: postgresql:///?service=swh celery: task_broker: amqp://guest:guest@amqp// + task_modules: + - swh.loader.debian.tasks + - swh.loader.dir.tasks + - swh.loader.git.tasks + - swh.loader.mercurial.tasks + - swh.loader.pypi.tasks + - swh.loader.svn.tasks + - swh.loader.tar.tasks +lister_db_url: postgresql:///?service=swh diff --git a/docker-compose.yml b/docker-compose.yml index a20a663..fe94139 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,451 +1,279 @@ version: '2' services: amqp: image: rabbitmq:3.6-management ports: - 5072:5672 - flower: - image: mher/flower - command: --broker=amqp://guest:guest@amqp:5672// --url_prefix=flower - ports: - - 5055:5555 - depends_on: - - amqp +# flower: +# image: mher/flower +# command: --broker=amqp://guest:guest@amqp:5672// --url_prefix=flower +# ports: +# - 5055:5555 +# depends_on: +# - amqp zookeeper: image: wurstmeister/zookeeper kafka: image: wurstmeister/kafka ports: - 5092:9092 env_file: ./env/kafka.env depends_on: - zookeeper prometheus: image: prom/prometheus command: # Needed for the reverse-proxy - "--web.external-url=/prometheus" - "--config.file=/etc/prometheus/prometheus.yml" volumes: - "./conf/prometheus.yml:/etc/prometheus/prometheus.yml:ro" restart: unless-stopped prometheus-statsd-exporter: image: prom/statsd-exporter command: - "--statsd.mapping-config=/etc/prometheus/statsd-mapping.yml" volumes: - "./conf/prometheus-statsd-mapping.yml:/etc/prometheus/statsd-mapping.yml:ro" restart: unless-stopped nginx: image: nginx volumes: - "./conf/nginx.conf:/etc/nginx/nginx.conf:ro" ports: - 5080:5080 # Scheduler swh-scheduler-db: image: postgres:10 environment: POSTGRES_PASSWORD: testpassword POSTGRES_DB: swh-scheduler swh-scheduler-api: image: swh/scheduler-api build: ./dockerfiles/swh-scheduler-api env_file: ./env/scheduler.env depends_on: - swh-scheduler-db ports: - 5008:5008 volumes: - "./conf/scheduler.yml:/scheduler.yml:ro" swh-scheduler-listener: image: swh/scheduler-worker build: ./dockerfiles/swh-scheduler-worker env_file: ./env/scheduler.env command: listener depends_on: - swh-scheduler-api - amqp volumes: - "./conf/scheduler.yml:/scheduler.yml:ro" swh-scheduler-runner: image: swh/scheduler-worker build: ./dockerfiles/swh-scheduler-worker env_file: ./env/scheduler.env command: runner -p 10 depends_on: - swh-scheduler-api - amqp volumes: - "./conf/scheduler.yml:/scheduler.yml:ro" # Graph storage swh-storage-db: image: postgres:10 environment: POSTGRES_PASSWORD: testpassword POSTGRES_DB: swh-storage swh-storage: build: ./dockerfiles/swh-storage image: swh/storage ports: - 5002:5002 depends_on: - swh-storage-db - swh-objstorage env_file: ./env/storage.env volumes: - "./conf/storage.yml:/storage.yml:ro" # Object storage swh-objstorage: build: ./dockerfiles/swh-objstorage image: swh/objstorage ports: - 5003:5003 volumes: - "./conf/objstorage.yml:/objstorage.yml:ro" # Indexer storage swh-idx-storage-db: image: postgres:10 environment: POSTGRES_PASSWORD: testpassword POSTGRES_DB: swh-idx-storage swh-idx-storage: build: ./dockerfiles/swh-indexer-storage image: swh/indexer-storage ports: - 5007:5007 depends_on: - swh-idx-storage-db environment: POSTGRES_PASSWORD: testpassword POSTGRES_DB: swh-idx-storage PGHOST: swh-idx-storage-db PGUSER: postgres volumes: - "./conf/indexer_storage.yml:/indexer_storage.yml:ro" # Web interface swh-web: build: ./dockerfiles/swh-web image: swh/web ports: - 5004:5004 depends_on: - swh-objstorage - swh-storage - swh-idx-storage + environment: + VERBOSITY: 3 volumes: - "./conf/web.yml:/etc/softwareheritage/web/web.yml:ro" swh-deposit-db: image: postgres:10 env_file: ./env/deposit.env swh-deposit: build: ./dockerfiles/swh-deposit image: swh/deposit ports: - 5006:5006 depends_on: - swh-deposit-db - swh-scheduler-api env_file: ./env/deposit.env environment: PGHOST: swh-deposit-db volumes: - "./conf/deposit_server.yml:/etc/softwareheritage/deposit/server.yml:ro" - "./conf/deposit_private.yml:/etc/softwareheritage/deposit/private.yml:ro" # Lister Celery workers swh-listers-db: image: postgres:10 environment: POSTGRES_PASSWORD: testpassword - swh-lister-debian: - image: swh/listers-worker - build: ./dockerfiles/swh-listers-worker - env_file: ./env/listers.env - environment: - STATSD_HOST: prometheus-statsd-exporter - STATSD_PORT: 9125 - SWH_WORKER_INSTANCE: debian - depends_on: - - swh-listers-db - - swh-scheduler-api - - swh-scheduler-runner - - swh-storage - - amqp - volumes: - - "./conf/lister.yml:/home/swh/.config/swh/lister.yml:ro" - - swh-lister-bitbucket: + swh-lister: image: swh/listers-worker build: ./dockerfiles/swh-listers-worker env_file: ./env/listers.env environment: STATSD_HOST: prometheus-statsd-exporter STATSD_PORT: 9125 - SWH_WORKER_INSTANCE: bitbucket + SWH_WORKER_INSTANCE: listers + SWH_CONFIG_FILENAME: /lister.yml depends_on: - swh-listers-db - swh-scheduler-api - swh-scheduler-runner - swh-storage - amqp volumes: - - "./conf/lister.yml:/home/swh/.config/swh/lister.yml:ro" + - "./conf/lister.yml:/lister.yml:ro" - swh-lister-github: - image: swh/listers-worker - build: ./dockerfiles/swh-listers-worker - env_file: ./env/listers.env - environment: - STATSD_HOST: prometheus-statsd-exporter - STATSD_PORT: 9125 - SWH_WORKER_INSTANCE: github - depends_on: - - swh-listers-db - - swh-scheduler-api - - swh-scheduler-runner - - swh-storage - - amqp - volumes: - - "./conf/lister.yml:/home/swh/.config/swh/lister.yml:ro" - - swh-lister-gitlab: - image: swh/listers-worker - build: ./dockerfiles/swh-listers-worker - env_file: ./env/listers.env - environment: - STATSD_HOST: prometheus-statsd-exporter - STATSD_PORT: 9125 - SWH_WORKER_INSTANCE: gitlab - depends_on: - - swh-listers-db - - swh-scheduler-api - - swh-scheduler-runner - - swh-storage - - amqp - volumes: - - "./conf/lister.yml:/home/swh/.config/swh/lister.yml:ro" - - swh-lister-npm: - image: swh/listers-worker - build: ./dockerfiles/swh-listers-worker - env_file: ./env/listers.env - environment: - STATSD_HOST: prometheus-statsd-exporter - STATSD_PORT: 9125 - SWH_WORKER_INSTANCE: npm - depends_on: - - swh-listers-db - - swh-scheduler-api - - swh-scheduler-runner - - swh-storage - - amqp - volumes: - - "./conf/lister.yml:/home/swh/.config/swh/lister.yml:ro" +# Loader Celery workers - swh-lister-pypi: - image: swh/listers-worker - build: ./dockerfiles/swh-listers-worker + swh-loader: + image: swh/loaders-worker + build: ./dockerfiles/swh-loaders-worker env_file: ./env/listers.env environment: STATSD_HOST: prometheus-statsd-exporter STATSD_PORT: 9125 - SWH_WORKER_INSTANCE: pypi + SWH_WORKER_INSTANCE: loader + SWH_CONFIG_FILENAME: /loader.yml depends_on: - - swh-listers-db - - swh-scheduler-api - - swh-scheduler-runner - swh-storage - amqp volumes: - - "./conf/lister.yml:/home/swh/.config/swh/lister.yml:ro" + - "./conf/loader.yml:/loader.yml:ro" # Indexer Celery workers swh-indexer: image: swh/indexer-worker build: ./dockerfiles/swh-indexer-worker env_file: ./env/indexers.env environment: STATSD_HOST: prometheus-statsd-exporter STATSD_PORT: 9125 - depends_on: - - swh-scheduler-runner - - swh-idx-storage - - swh-storage - - swh-objstorage - - amqp - depends_on: - swh-scheduler-runner - swh-idx-storage - swh-storage - swh-objstorage - amqp volumes: - - "./conf/indexer.yml:/home/swh/.config/swh/indexer.yml:ro" + - "./conf/indexer.yml:/indexer.yml:ro" swh-indexer-journal-client: image: swh/indexer-journal-client build: ./dockerfiles/swh-indexer-journal-client depends_on: - swh-journal-publisher - swh-scheduler-api volumes: - "./conf/journal_client.yml:/etc/softwareheritage/indexer/journal_client.yml:ro" # Journal related swh-storage-listener: image: swh/storage-listener build: ./dockerfiles/swh-storage-listener env_file: ./env/storage.env depends_on: - swh-storage-db - kafka volumes: - "./conf/storage_listener.yml:/etc/softwareheritage/storage/listener.yml:ro" swh-journal-publisher: image: swh/journal-publisher build: ./dockerfiles/swh-journal-publisher depends_on: - kafka - swh-storage-listener volumes: - "./conf/journal_publisher.yml:/etc/softwareheritage/journal/publisher.yml:ro" swh-journal-client: image: swh/journal-client build: ./dockerfiles/swh-journal-client depends_on: - swh-journal-publisher volumes: - "./conf/journal_client.yml:/etc/softwareheritage/journal/logger.yml:ro" - -# Loader Celery workers - - swh-loader-debian: - image: swh/loaders-worker - build: ./dockerfiles/swh-loaders-worker - env_file: ./env/listers.env - environment: - STATSD_HOST: prometheus-statsd-exporter - STATSD_PORT: 9125 - SWH_WORKER_INSTANCE: debian - depends_on: - - swh-storage - - amqp - volumes: - - "./conf/loader.yml:/home/swh/.config/swh/loader.yml:ro" - - swh-loader-dir: - image: swh/loaders-worker - build: ./dockerfiles/swh-loaders-worker - env_file: ./env/listers.env - environment: - STATSD_HOST: prometheus-statsd-exporter - STATSD_PORT: 9125 - SWH_WORKER_INSTANCE: dir - depends_on: - - swh-storage - - amqp - volumes: - - "./conf/loader.yml:/home/swh/.config/swh/loader.yml:ro" - - swh-loader-git: - image: swh/loaders-worker - build: ./dockerfiles/swh-loaders-worker - env_file: ./env/listers.env - environment: - STATSD_HOST: prometheus-statsd-exporter - STATSD_PORT: 9125 - SWH_WORKER_INSTANCE: git - depends_on: - - swh-storage - - amqp - volumes: - - "./conf/loader.yml:/home/swh/.config/swh/loader.yml:ro" - - swh-loader-mercurial: - image: swh/loaders-worker - build: ./dockerfiles/swh-loaders-worker - env_file: ./env/listers.env - environment: - STATSD_HOST: prometheus-statsd-exporter - STATSD_PORT: 9125 - SWH_WORKER_INSTANCE: mercurial - depends_on: - - swh-storage - - amqp - volumes: - - "./conf/loader.yml:/home/swh/.config/swh/loader.yml:ro" - - swh-loader-pypi: - image: swh/loaders-worker - build: ./dockerfiles/swh-loaders-worker - env_file: ./env/listers.env - environment: - STATSD_HOST: prometheus-statsd-exporter - STATSD_PORT: 9125 - SWH_WORKER_INSTANCE: pypi - depends_on: - - swh-storage - - amqp - volumes: - - "./conf/loader.yml:/home/swh/.config/swh/loader.yml:ro" - - swh-loader-svn: - image: swh/loaders-worker - build: ./dockerfiles/swh-loaders-worker - env_file: ./env/listers.env - environment: - STATSD_HOST: prometheus-statsd-exporter - STATSD_PORT: 9125 - SWH_WORKER_INSTANCE: svn - depends_on: - - swh-storage - - amqp - volumes: - - "./conf/loader.yml:/home/swh/.config/swh/loader.yml:ro" - - swh-loader-tar: - image: swh/loaders-worker - build: ./dockerfiles/swh-loaders-worker - env_file: ./env/listers.env - environment: - STATSD_HOST: prometheus-statsd-exporter - STATSD_PORT: 9125 - SWH_WORKER_INSTANCE: tar - depends_on: - - swh-storage - - amqp - volumes: - - "./conf/loader.yml:/home/swh/.config/swh/loader.yml:ro" diff --git a/dockerfiles/swh-listers-worker/entrypoint.sh b/dockerfiles/swh-listers-worker/entrypoint.sh index a5699e4..8978f3b 100755 --- a/dockerfiles/swh-listers-worker/entrypoint.sh +++ b/dockerfiles/swh-listers-worker/entrypoint.sh @@ -1,72 +1,63 @@ #!/bin/bash set -e export PATH=${HOME}/.local/bin:${PATH} if [[ -d /src ]] ; then for srcrepo in /src/swh-* ; do pushd $srcrepo echo "WARNING: $srcrepo wil NOT be pip installed in dev mode" echo " due to permission limitations." pip install --user . popd done fi echo Installed Python packages: pip list export POSTGRES_DB=swh-lister-${SWH_WORKER_INSTANCE} echo "${PGHOST}:5432:postgres:${PGUSER}:${POSTGRES_PASSWORD}" > ~/.pgpass echo "${PGHOST}:5432:${POSTGRES_DB}:${PGUSER}:${POSTGRES_PASSWORD}" >> ~/.pgpass cat > ~/.pg_service.conf < ~/.config/swh/worker/${SWH_WORKER_INSTANCE}.ini < ~/.config/swh/worker/${SWH_WORKER_INSTANCE}.ini <