diff --git a/docker/conf/loader-deposit.yml b/docker/conf/loader-deposit.yml new file mode 100644 index 0000000..37a75aa --- /dev/null +++ b/docker/conf/loader-deposit.yml @@ -0,0 +1,26 @@ +storage: + cls: pipeline + steps: + - cls: buffer + min_batch_size: + content: 10000 + content_bytes: 104857600 + directory: 1000 + revision: 1000 + - cls: filter + - cls: remote + url: http://swh-storage:5002/ + +celery: + task_broker: amqp://guest:guest@amqp// + task_queues: + - swh.loader.package.deposit.tasks.LoadDeposit + - swh.deposit.loader.tasks.ChecksDepositTsk + +deposit: + url: http://swh-deposit:5006/1/private + auth: + username: test + password: test + +url: 'http://swh-deposit:5006' diff --git a/docker/conf/loader.yml b/docker/conf/loader.yml index 78db274..c6d0c8d 100644 --- a/docker/conf/loader.yml +++ b/docker/conf/loader.yml @@ -1,48 +1,30 @@ storage: cls: pipeline steps: - cls: buffer min_batch_size: content: 10000 content_bytes: 104857600 directory: 1000 revision: 1000 - cls: filter - cls: remote url: http://swh-storage:5002/ - -scheduler: - cls: remote - url: http://swh-scheduler:5008/ - -deposit: - url: http://swh-deposit:5006/1/private - auth: - username: test - password: test - celery: task_broker: amqp://guest:guest@amqp// - task_queues: - swh.loader.dir.tasks.LoadDirRepository - swh.loader.git.tasks.LoadDiskGitRepository - swh.loader.git.tasks.UncompressAndLoadDiskGitRepository - swh.loader.git.tasks.UpdateGitRepository - swh.loader.mercurial.tasks.LoadArchiveMercurial - swh.loader.mercurial.tasks.LoadMercurial - swh.loader.package.archive.tasks.LoadArchive - swh.loader.package.cran.tasks.LoadCRAN - swh.loader.package.debian.tasks.LoadDebian - - swh.loader.package.deposit.tasks.LoadDeposit - swh.loader.package.npm.tasks.LoadNpm - swh.loader.package.pypi.tasks.LoadPyPI - swh.loader.svn.tasks.DumpMountAndLoadSvnRepository - swh.loader.svn.tasks.LoadSvnRepository - swh.loader.svn.tasks.MountAndLoadSvnRepository - - swh.deposit.loader.tasks.ChecksDepositTsk - -lister_db_url: postgresql://postgres@swh-listers-db/swh-listers - -url: 'http://swh-deposit:5006' diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 4a1bab7..5f6ad7d 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -1,405 +1,424 @@ version: "2.1" services: amqp: image: rabbitmq:3.6-management ports: - 5072:5672 zookeeper: image: wurstmeister/zookeeper restart: always kafka: image: wurstmeister/kafka ports: - "5092:9092" env_file: ./env/kafka.env environment: KAFKA_CREATE_TOPICS: swh.journal.objects.origin:1:1, swh.journal.objects.origin_visit:1:1, swh.journal.objects.origin_visit_status:1:1, swh.journal.indexed.origin_intrinsic_metadata:1:1 depends_on: - zookeeper healthcheck: test: "[ `JMX_PORT= kafka-topics.sh --list --zookeeper zookeeper:2181 | wc -l` -ge 4 ]" interval: 10s timeout: 5s retries: 10 prometheus: image: prom/prometheus depends_on: - prometheus-statsd-exporter command: # Needed for the reverse-proxy - "--web.external-url=/prometheus" - "--config.file=/etc/prometheus/prometheus.yml" volumes: - "./conf/prometheus.yml:/etc/prometheus/prometheus.yml:ro" restart: unless-stopped prometheus-statsd-exporter: image: prom/statsd-exporter command: - "--statsd.mapping-config=/etc/prometheus/statsd-mapping.yml" volumes: - "./conf/prometheus-statsd-mapping.yml:/etc/prometheus/statsd-mapping.yml:ro" restart: unless-stopped prometheus-rabbitmq-exporter: image: kbudde/rabbitmq-exporter restart: unless-stopped environment: SKIP_QUEUES: "RPC_.*" MAX_QUEUES: 5000 RABBIT_URL: http://amqp:15672 LOG_LEVEL: warning grafana: image: grafana/grafana restart: unless-stopped depends_on: - prometheus environment: GF_SERVER_ROOT_URL: http://localhost:5080/grafana volumes: - "./conf/grafana/provisioning:/etc/grafana/provisioning:ro" - "./conf/grafana/dashboards:/var/lib/grafana/dashboards" nginx: image: nginx volumes: - "./conf/nginx.conf:/etc/nginx/nginx.conf:ro" ports: - 5080:5080 # Scheduler swh-scheduler-db: image: postgres:12 env_file: - ./env/common_python.env - ./env/scheduler-db.env swh-scheduler: image: swh/stack build: ./ env_file: - ./env/common_python.env - ./env/scheduler-db.env - ./env/scheduler.env environment: SWH_CONFIG_FILENAME: /scheduler.yml SWH_SCHEDULER_CONFIG_FILE: /scheduler.yml entrypoint: /entrypoint.sh depends_on: - swh-scheduler-db ports: - 5008:5008 volumes: - "./conf/scheduler.yml:/scheduler.yml:ro" - "./services/swh-scheduler/entrypoint.sh:/entrypoint.sh:ro" swh-scheduler-listener: image: swh/stack build: ./ env_file: - ./env/common_python.env - ./env/scheduler-db.env - ./env/scheduler.env environment: SWH_CONFIG_FILENAME: /scheduler.yml SWH_SCHEDULER_CONFIG_FILE: /scheduler.yml entrypoint: /entrypoint.sh command: start-listener depends_on: - swh-scheduler - amqp volumes: - "./conf/scheduler.yml:/scheduler.yml:ro" - "./services/swh-scheduler-worker/entrypoint.sh:/entrypoint.sh:ro" swh-scheduler-runner: image: swh/stack build: ./ env_file: - ./env/common_python.env - ./env/scheduler-db.env - ./env/scheduler.env environment: SWH_CONFIG_FILENAME: /scheduler.yml SWH_SCHEDULER_CONFIG_FILE: /scheduler.yml entrypoint: /entrypoint.sh command: start-runner -p 10 depends_on: - swh-scheduler - amqp volumes: - "./conf/scheduler.yml:/scheduler.yml:ro" - "./services/swh-scheduler-worker/entrypoint.sh:/entrypoint.sh:ro" # Graph storage swh-storage-db: image: postgres:12 env_file: - ./env/storage-db.env swh-storage: image: swh/stack build: ./ ports: - 5002:5002 depends_on: - swh-storage-db - swh-objstorage - kafka env_file: - ./env/common_python.env - ./env/storage.env environment: SWH_CONFIG_FILENAME: /storage.yml STORAGE_BACKEND: postgresql entrypoint: /entrypoint.sh volumes: - "./conf/storage.yml:/storage.yml:ro" - "./services/swh-storage/entrypoint.sh:/entrypoint.sh:ro" # Object storage swh-objstorage: build: ./ image: swh/stack ports: - 5003:5003 env_file: - ./env/common_python.env environment: SWH_CONFIG_FILENAME: /objstorage.yml entrypoint: /entrypoint.sh volumes: - "./conf/objstorage.yml:/objstorage.yml:ro" - "./services/swh-objstorage/entrypoint.sh:/entrypoint.sh:ro" # Indexer storage swh-idx-storage-db: image: postgres:12 env_file: - ./env/indexers-db.env swh-idx-storage: image: swh/stack build: ./ ports: - 5007:5007 depends_on: - swh-idx-storage-db env_file: - ./env/common_python.env - ./env/indexers-db.env - ./env/indexers.env environment: SWH_CONFIG_FILENAME: /indexer_storage.yml entrypoint: /entrypoint.sh volumes: - "./conf/indexer_storage.yml:/indexer_storage.yml:ro" - "./services/swh-indexer-storage/entrypoint.sh:/entrypoint.sh:ro" # Web interface swh-web: build: ./ image: swh/stack ports: - 5004:5004 depends_on: - swh-storage - swh-idx-storage env_file: - ./env/common_python.env environment: VERBOSITY: 3 DJANGO_SETTINGS_MODULE: swh.web.settings.production SWH_CONFIG_FILENAME: /web.yml entrypoint: /entrypoint.sh volumes: - "./conf/web.yml:/web.yml:ro" - "./services/swh-web/entrypoint.sh:/entrypoint.sh:ro" swh-deposit-db: image: postgres:12 env_file: - ./env/deposit-db.env swh-deposit: image: swh/stack build: ./ ports: - 5006:5006 depends_on: - swh-deposit-db - swh-scheduler env_file: - ./env/common_python.env - ./env/deposit-db.env - ./env/deposit.env environment: VERBOSITY: 3 SWH_CONFIG_FILENAME: /deposit.yml DJANGO_SETTINGS_MODULE: swh.deposit.settings.production entrypoint: /entrypoint.sh volumes: - "./conf/deposit.yml:/deposit.yml:ro" - "./services/swh-deposit/entrypoint.sh:/entrypoint.sh:ro" swh-vault-db: image: postgres:12 env_file: - ./env/vault-db.env swh-vault: image: swh/stack build: ./ env_file: - ./env/common_python.env - ./env/vault-db.env - ./env/vault.env environment: SWH_CONFIG_FILENAME: /vault.yml command: server ports: - 5005:5005 depends_on: - swh-vault-db - swh-objstorage - swh-storage - swh-scheduler entrypoint: /entrypoint.sh volumes: - "./conf/vault.yml:/vault.yml:ro" - "./services/swh-vault/entrypoint.sh:/entrypoint.sh:ro" swh-vault-worker: image: swh/stack build: ./ command: worker env_file: - ./env/common_python.env - ./env/workers.env environment: SWH_CONFIG_FILENAME: /cooker.yml depends_on: - swh-vault - swh-storage entrypoint: /entrypoint.sh volumes: - "./conf/vault-worker.yml:/cooker.yml:ro" - "./services/swh-vault/entrypoint.sh:/entrypoint.sh:ro" # Lister Celery workers swh-lister: image: swh/stack build: ./ env_file: - ./env/common_python.env - ./env/listers.env - ./env/workers.env user: swh environment: SWH_WORKER_INSTANCE: listers SWH_CONFIG_FILENAME: /lister.yml depends_on: - swh-scheduler - swh-scheduler-runner - amqp entrypoint: /entrypoint.sh volumes: - "./conf/lister.yml:/lister.yml:ro" - "./services/swh-listers-worker/entrypoint.sh:/entrypoint.sh:ro" # Loader + deposit checker Celery workers swh-loader: image: swh/stack build: ./ env_file: - ./env/common_python.env - ./env/workers.env user: swh environment: SWH_WORKER_INSTANCE: loader SWH_CONFIG_FILENAME: /loader.yml entrypoint: /entrypoint.sh depends_on: - swh-storage - swh-scheduler - - swh-deposit - amqp volumes: - "./conf/loader.yml:/loader.yml:ro" - "./services/swh-worker/entrypoint.sh:/entrypoint.sh:ro" + swh-loader-deposit: + image: swh/stack + build: ./ + env_file: + - ./env/common_python.env + - ./env/workers.env + user: swh + environment: + SWH_WORKER_INSTANCE: loader-deposit + SWH_CONFIG_FILENAME: /loader-deposit.yml + entrypoint: /entrypoint.sh + depends_on: + - swh-storage + - swh-scheduler + - swh-deposit + - amqp + volumes: + - "./conf/loader-deposit.yml:/loader-deposit.yml:ro" + - "./services/swh-worker/entrypoint.sh:/entrypoint.sh:ro" + # Indexer Celery workers swh-indexer: image: swh/stack build: ./ user: swh env_file: - ./env/common_python.env - ./env/indexers-db.env - ./env/indexers.env - ./env/workers.env environment: SWH_WORKER_INSTANCE: indexer SWH_CONFIG_FILENAME: /indexer.yml CONCURRENCY: 4 entrypoint: /entrypoint.sh depends_on: - swh-scheduler-runner - swh-idx-storage - swh-storage - swh-objstorage - amqp volumes: - "./conf/indexer.yml:/indexer.yml:ro" - "./services/swh-indexer-worker/entrypoint.sh:/entrypoint.sh:ro" # Journal related swh-indexer-journal-client: image: swh/stack build: ./ entrypoint: /entrypoint.sh env_file: - ./env/common_python.env depends_on: kafka: condition: service_healthy swh-storage: condition: service_started swh-scheduler: condition: service_started volumes: - "./conf/indexer_journal_client.yml:/etc/softwareheritage/indexer/journal_client.yml:ro" - "./services/swh-indexer-journal-client/entrypoint.sh:/entrypoint.sh:ro" swh-scheduler-journal-client: image: swh/stack build: ./ entrypoint: /entrypoint.sh env_file: - ./env/common_python.env depends_on: kafka: condition: service_healthy swh-scheduler: condition: service_started volumes: - "./conf/scheduler_journal_client.yml:/etc/softwareheritage/scheduler/journal_client.yml:ro" - "./services/swh-scheduler-journal-client/entrypoint.sh:/entrypoint.sh:ro" diff --git a/docker/env/workers.env b/docker/env/workers.env index 0f02e61..042fea8 100644 --- a/docker/env/workers.env +++ b/docker/env/workers.env @@ -1,3 +1,4 @@ CONCURRENCY=1 MAX_TASKS_PER_CHILD=10 LOGLEVEL=DEBUG +SWH_SCHEDULER_INSTANCE=http://swh-scheduler:5008 diff --git a/docker/services/swh-worker/entrypoint.sh b/docker/services/swh-worker/entrypoint.sh index e314fd8..647781d 100755 --- a/docker/services/swh-worker/entrypoint.sh +++ b/docker/services/swh-worker/entrypoint.sh @@ -1,30 +1,30 @@ #!/bin/bash set -e source /srv/softwareheritage/utils/pyutils.sh setup_pip case "$1" in "shell") exec bash -i ;; *) echo Waiting for RabbitMQ to start wait-for-it amqp:5672 -s --timeout=0 echo Register task types in scheduler database wait-for-it swh-scheduler:5008 -s --timeout=0 - swh scheduler -C ${SWH_CONFIG_FILENAME} task-type register + swh scheduler --url ${SWH_SCHEDULER_INSTANCE} task-type register echo Starting the swh Celery worker for ${SWH_WORKER_INSTANCE} exec python -m celery \ --app=swh.scheduler.celery_backend.config.app \ worker \ --pool=prefork --events \ --concurrency=${CONCURRENCY} \ --max-tasks-per-child=${MAX_TASKS_PER_CHILD} \ -Ofair --loglevel=${LOGLEVEL} \ --hostname "${SWH_WORKER_INSTANCE}@%h" ;; esac