diff --git a/docker/conf/indexer.yml b/docker/conf/indexer.yml --- a/docker/conf/indexer.yml +++ b/docker/conf/indexer.yml @@ -27,6 +27,13 @@ - swh.indexer.tasks.RecomputeChecksums - swh.indexer.tasks.RevisionMetadata +journal: + brokers: + - kafka + group_id: swh.indexer.journal_client + prefix: swh.journal.objects +max_messages: 50 + tools: name: swh-metadata-detector version: 0.0.2 diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -497,9 +497,9 @@ - "./conf/loader-opam.yml:/loader-opam.yml:ro" - "./services/swh-worker/entrypoint.sh:/entrypoint.sh:ro" - # Indexer Celery workers + # Indexer workers - swh-indexer: + swh-indexer-worker-celery: image: swh/stack build: ./ user: swh @@ -521,27 +521,36 @@ - amqp volumes: - "./conf/indexer.yml:/indexer.yml:ro" - - "./services/swh-indexer-worker/entrypoint.sh:/entrypoint.sh:ro" + - "./services/swh-indexer-worker-celery/entrypoint.sh:/entrypoint.sh:ro" - # Journal related - - swh-indexer-journal-client: + swh-indexer-worker-journal: image: swh/stack build: ./ - entrypoint: /entrypoint.sh + user: swh env_file: - ./env/common_python.env + - ./env/indexers-db.env + - ./env/indexers.env + - ./env/workers.env + environment: + SWH_WORKER_INSTANCE: indexer + SWH_CONFIG_FILENAME: /indexer.yml + CONCURRENCY: 4 + entrypoint: /entrypoint.sh depends_on: kafka: condition: service_healthy swh-storage: condition: service_started - swh-scheduler: + swh-idx-storage: + condition: service_started + swh-objstorage: condition: service_started - volumes: - - "./conf/indexer_journal_client.yml:/etc/softwareheritage/indexer/journal_client.yml:ro" - - "./services/swh-indexer-journal-client/entrypoint.sh:/entrypoint.sh:ro" + - "./conf/indexer.yml:/indexer.yml:ro" + - "./services/swh-indexer-worker-journal/entrypoint.sh:/entrypoint.sh:ro" + + # Journal related swh-scheduler-journal-client: image: swh/stack diff --git a/docker/services/swh-indexer-worker/entrypoint.sh b/docker/services/swh-indexer-worker-celery/entrypoint.sh rename from docker/services/swh-indexer-worker/entrypoint.sh rename to docker/services/swh-indexer-worker-celery/entrypoint.sh --- a/docker/services/swh-indexer-worker/entrypoint.sh +++ b/docker/services/swh-indexer-worker-celery/entrypoint.sh @@ -18,7 +18,7 @@ wait_pgsql - echo Starting swh-indexer worker + echo Starting swh-indexer Celery-based worker exec python -m celery \ --app=swh.scheduler.celery_backend.config.app \ worker \ diff --git a/docker/services/swh-indexer-worker-journal/entrypoint.sh b/docker/services/swh-indexer-worker-journal/entrypoint.sh new file mode 100755 --- /dev/null +++ b/docker/services/swh-indexer-worker-journal/entrypoint.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +set -e + +source /srv/softwareheritage/utils/pyutils.sh +setup_pip + +source /srv/softwareheritage/utils/pgsql.sh +setup_pgsql + +case "$1" in + "shell") + exec bash -i + ;; + *) + echo Waiting for Kafka to start + wait-for-it kafka:9092 -s --timeout=0 + + wait_pgsql + + echo Starting swh-indexer journal-based worker + swh --log-level ${LOGLEVEL} indexer --config-file /indexer.yml journal-client '*' + ;; +esac