diff --git a/docker/conf/search-memory.yml b/docker/conf/search-memory.yml new file mode 100644 index 0000000..9512b93 --- /dev/null +++ b/docker/conf/search-memory.yml @@ -0,0 +1,2 @@ +search: + cls: memory diff --git a/docker/conf/web-keycloak.yml b/docker/conf/web-keycloak.yml index de984c5..703366d 100644 --- a/docker/conf/web-keycloak.yml +++ b/docker/conf/web-keycloak.yml @@ -1,68 +1,73 @@ storage: cls: remote url: http://swh-storage:5002/ timeout: 1 objstorage: cls: remote url: http://swh-objstorage:5003/ indexer_storage: cls: remote url: http://swh-idx-storage:5007/ scheduler: cls: remote url: http://swh-scheduler:5008/ vault: cls: remote args: url: http://swh-vault:5005/ deposit: private_api_url: http://swh-deposit:5006/1/private/ private_api_user: swhworker private_api_password: "" allowed_hosts: - "*" debug: yes serve_assets: yes development_db: /tmp/swh/web.sqlite3 production_db: host: swh-web-db port: 5432 name: swh-web user: postgres password: testpassword throttling: cache_uri: memcache:11211 scopes: swh_api: limiter_rate: default: 120/h swh_api_origin_search: limiter_rate: default: 70/m swh_api_origin_visit_latest: limiter_rate: default: 700/m swh_vault_cooking: limiter_rate: default: 120/h swh_save_origin: limiter_rate: default: 120/h -search: {} +search: + cls: remote + url: http://swh-search:5010/ + +search_config: + metadata_backend: swh-search keycloak: server_url: http://keycloak:8080/keycloak/auth/ realm_name: SoftwareHeritage instance_name: archive-docker.softwareheritage.org diff --git a/docker/conf/web-mirror.yml b/docker/conf/web-mirror.yml index 2781633..efa7bab 100644 --- a/docker/conf/web-mirror.yml +++ b/docker/conf/web-mirror.yml @@ -1,43 +1,51 @@ storage: cls: remote url: http://swh-storage-mirror:5002/ timeout: 1 objstorage: cls: remote url: http://swh-objstorage:5003/ indexer_storage: cls: remote url: http://swh-idx-storage:5007/ scheduler: cls: remote url: http://swh-scheduler:5008/ vault: cls: remote args: url: http://swh-vault:5005/ deposit: private_api_url: http://swh-deposit:5006/1/private/ private_api_user: swhworker private_api_password: "" allowed_hosts: - "*" debug: yes serve_assets: yes development_db: /tmp/swh/web.sqlite3 production_db: host: swh-web-db port: 5432 name: swh-web user: postgres password: testpassword + +search: + cls: remote + url: http://swh-search:5010/ + +search_config: + metadata_backend: swh-search + instance_name: archive-docker.softwareheritage.org diff --git a/docker/conf/web-read-replica.yml b/docker/conf/web-read-replica.yml index 854061c..fef386b 100644 --- a/docker/conf/web-read-replica.yml +++ b/docker/conf/web-read-replica.yml @@ -1,70 +1,76 @@ storage: cls: remote url: http://swh-storage-read-replica:5002/ timeout: 1 indexer_storage: cls: remote url: http://swh-idx-storage:5007/ scheduler: cls: remote url: http://swh-scheduler:5008/ vault: cls: remote args: url: http://swh-vault:5005/ deposit: private_api_url: http://swh-deposit:5006/1/private/ private_api_user: swhworker private_api_password: "" allowed_hosts: - "*" debug: yes serve_assets: yes development_db: /tmp/swh/web.sqlite3 production_db: host: swh-web-db port: 5432 name: swh-web user: postgres password: testpassword throttling: cache_uri: memcache:11211 scopes: swh_api: limiter_rate: default: 120/h exempted_networks: - 0.0.0.0/0 swh_api_origin_search: limiter_rate: default: 70/m exempted_networks: - 0.0.0.0/0 swh_api_origin_visit_latest: limiter_rate: default: 700/m exempted_networks: - 0.0.0.0/0 swh_vault_cooking: limiter_rate: default: 120/h exempted_networks: - 0.0.0.0/0 swh_save_origin: limiter_rate: default: 120/h exempted_networks: - 0.0.0.0/0 -search: {} +search: + cls: remote + url: http://swh-search:5010/ + +search_config: + metadata_backend: swh-search + instance_name: archive-docker.softwareheritage.org diff --git a/docker/conf/web-search.yml b/docker/conf/web-search.yml deleted file mode 100644 index 08c6e3d..0000000 --- a/docker/conf/web-search.yml +++ /dev/null @@ -1,76 +0,0 @@ -storage: - cls: remote - url: http://swh-storage:5002/ - timeout: 5 - -indexer_storage: - cls: remote - url: http://swh-idx-storage:5007/ - -scheduler: - cls: remote - url: http://swh-scheduler:5008/ - -vault: - cls: remote - url: http://swh-vault:5005/ - -deposit: - private_api_url: http://swh-deposit:5006/1/private/ - private_api_user: swhworker - private_api_password: "" - -allowed_hosts: - - "*" - -debug: yes - -serve_assets: yes - -development_db: /tmp/swh/web.sqlite3 - -production_db: - name: postgresql:///?service=swh-web - -throttling: - cache_uri: memcache:11211 - scopes: - swh_api: - limiter_rate: - default: 120/h - exempted_networks: - - 0.0.0.0/0 - swh_api_origin_search: - limiter_rate: - default: 70/m - exempted_networks: - - 0.0.0.0/0 - swh_api_origin_visit_latest: - limiter_rate: - default: 700/m - exempted_networks: - - 0.0.0.0/0 - swh_vault_cooking: - limiter_rate: - default: 120/h - exempted_networks: - - 0.0.0.0/0 - swh_save_origin: - limiter_rate: - default: 120/h - exempted_networks: - - 0.0.0.0/0 -instance_name: archive-docker.softwareheritage.org - -search: - cls: remote - url: http://swh-search:5010/ - -search_config: - metadata_backend: swh-search - -counters: - cls: remote - url: http://swh-counters:5011/ -counters_backend: swh-counters -history_counters_url: http://swh-counters:5011/counters_history/history.json diff --git a/docker/conf/web.yml b/docker/conf/web.yml index 7450828..929000c 100644 --- a/docker/conf/web.yml +++ b/docker/conf/web.yml @@ -1,72 +1,77 @@ storage: cls: remote url: http://swh-storage:5002/ timeout: 5 indexer_storage: cls: remote url: http://swh-idx-storage:5007/ scheduler: cls: remote url: http://swh-scheduler:5008/ vault: cls: remote url: http://swh-vault:5005/ deposit: private_api_url: http://swh-deposit:5006/1/private/ private_api_user: swhworker private_api_password: "" allowed_hosts: - "*" debug: yes serve_assets: yes development_db: /tmp/swh/web.sqlite3 production_db: name: postgresql:///?service=swh-web throttling: cache_uri: memcache:11211 scopes: swh_api: limiter_rate: default: 120/h exempted_networks: - 0.0.0.0/0 swh_api_origin_search: limiter_rate: default: 70/m exempted_networks: - 0.0.0.0/0 swh_api_origin_visit_latest: limiter_rate: default: 700/m exempted_networks: - 0.0.0.0/0 swh_vault_cooking: limiter_rate: default: 120/h exempted_networks: - 0.0.0.0/0 swh_save_origin: limiter_rate: default: 120/h exempted_networks: - 0.0.0.0/0 instance_name: archive-docker.softwareheritage.org -search: {} +search: + cls: remote + url: http://swh-search:5010/ + +search_config: + metadata_backend: swh-search counters: cls: remote url: http://swh-counters:5011/ counters_backend: swh-counters history_counters_url: http://swh-counters:5011/counters_history/history.json diff --git a/docker/docker-compose.search.yml b/docker/docker-compose.search.yml index df19162..f6c94a7 100644 --- a/docker/docker-compose.search.yml +++ b/docker/docker-compose.search.yml @@ -1,67 +1,23 @@ version: "2.1" services: elasticsearch: env_file: - ./env/elasticsearch.env image: elastic/elasticsearch:7.15.2 environment: - ingest.geoip.downloader.enabled=false - "ES_JAVA_OPTS=-Xms512m -Xmx512m" ports: - 9200:9200 volumes: - elasticsearch-data:/usr/share/elasticsearch/data swh-search: - image: swh/stack - build: ./ - entrypoint: /entrypoint.sh - ports: - - 5010:5010 depends_on: - elasticsearch - environment: - SWH_CONFIG_FILENAME: /search.yml - env_file: - - ./env/common_python.env volumes: - "./conf/search.yml:/search.yml:ro" - - "./services/swh-search/entrypoint.sh:/entrypoint.sh:ro" - - swh-search-journal-client-objects: - image: swh/stack - build: ./ - entrypoint: /entrypoint.sh - depends_on: - kafka: - condition: service_healthy - swh-search: - condition: service_started - volumes: - - "./conf/search_journal_client_objects.yml:/etc/softwareheritage/search/journal_client.yml:ro" - - "./services/swh-search-journal-client/entrypoint.sh:/entrypoint.sh:ro" - - swh-search-journal-client-indexed: - image: swh/stack - build: ./ - entrypoint: /entrypoint.sh - depends_on: - kafka: - condition: service_healthy - swh-search: - condition: service_started - volumes: - - "./conf/search_journal_client_indexed.yml:/etc/softwareheritage/search/journal_client.yml:ro" - - "./services/swh-search-journal-client/entrypoint.sh:/entrypoint.sh:ro" - - swh-web: - depends_on: - - swh-search - environment: - SWH_CONFIG_FILENAME: /web-search.yml - volumes: - - "./conf/web-search.yml:/web-search.yml:ro" volumes: elasticsearch-data: diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 70d9b08..5a1df2d 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -1,664 +1,707 @@ version: "2.1" services: amqp: image: rabbitmq:3.6-management ports: - 5072:5672 zookeeper: image: wurstmeister/zookeeper restart: always ports: - "5081:2181" environment: ZOO_LOG4J_PROP: WARN,CONSOLE kafka: image: wurstmeister/kafka ports: - "5092:5092" env_file: ./env/kafka.env environment: # all other kafka config options are defined in env/kafka.env KAFKA_CREATE_TOPICS: swh.journal.objects.content:16:1:compact, swh.journal.objects.directory:16:1:compact, swh.journal.objects.extid:16:1:compact, swh.journal.objects.origin:16:1:compact, swh.journal.objects.origin_visit:16:1:compact, swh.journal.objects.origin_visit_status:16:1:compact, swh.journal.objects.raw_extrinsic_metadata:16:1:compact, swh.journal.objects.release:16:1:compact, swh.journal.objects_privileged.release:16:1:compact, swh.journal.objects.revision:16:1:compact, swh.journal.objects_privileged.revision:16:1:compact, swh.journal.objects.skipped_content:16:1:compact, swh.journal.objects.snapshot:16:1:compact, swh.journal.indexed.origin_intrinsic_metadata:16:1:compact depends_on: - zookeeper healthcheck: test: "[ `JMX_PORT= kafka-topics.sh --list --zookeeper zookeeper:2181 | wc -l` -ge `echo $$KAFKA_CREATE_TOPICS | tr ',' '\n' | wc -l` ]" interval: 10s timeout: 5s retries: 10 volumes: - kafka-data:/kafka cmak: # Note: CMAK does not work out of the box, you need to run this first: # $ docker-compose exec zookeeper ./bin/zkCli.sh # create /kafka-manager/mutex "" # create /kafka-manager/mutex/locks "" # create /kafka-manager/mutex/leases "" # See: https://github.com/yahoo/CMAK/issues/731#issuecomment-643880544 image: hlebalbau/kafka-manager:stable environment: ZK_HOSTS: "zookeeper:2181" prometheus: image: prom/prometheus depends_on: - prometheus-statsd-exporter command: # Needed for the reverse-proxy - "--web.external-url=/prometheus" - "--config.file=/etc/prometheus/prometheus.yml" volumes: - "./conf/prometheus.yml:/etc/prometheus/prometheus.yml:ro" restart: unless-stopped prometheus-statsd-exporter: image: prom/statsd-exporter command: - "--statsd.mapping-config=/etc/prometheus/statsd-mapping.yml" volumes: - "./conf/prometheus-statsd-mapping.yml:/etc/prometheus/statsd-mapping.yml:ro" restart: unless-stopped prometheus-rabbitmq-exporter: image: kbudde/rabbitmq-exporter restart: unless-stopped environment: SKIP_QUEUES: "RPC_.*" MAX_QUEUES: 5000 RABBIT_URL: http://amqp:15672 LOG_LEVEL: warning grafana: image: grafana/grafana restart: unless-stopped depends_on: - prometheus environment: GF_SERVER_ROOT_URL: http://localhost:5080/grafana volumes: - "./conf/grafana/provisioning:/etc/grafana/provisioning:ro" - "./conf/grafana/dashboards:/var/lib/grafana/dashboards" mailhog: image: mailhog/mailhog ports: - "1025:1025" - "8025:8025" memcache: image: memcached restart: unless-stopped redis: image: redis volumes: - redis-data:/data ports: - 6379 command: - "--save" - "60" - "1" # flush every minutes healthcheck: test: [ "CMD", "redis-cli", "PING" ] interval: 1s timeout: 5s retries: 10 nginx: image: nginx volumes: - "./conf/nginx.conf:/etc/nginx/nginx.conf:ro" ports: - 5080:5080 # Scheduler swh-scheduler-db: image: postgres:12 env_file: - ./env/common_python.env - ./env/scheduler-db.env volumes: - "./services/initdb.d:/docker-entrypoint-initdb.d" swh-scheduler: image: swh/stack build: ./ env_file: - ./env/common_python.env - ./env/scheduler-db.env - ./env/scheduler.env environment: SWH_CONFIG_FILENAME: /scheduler.yml SWH_SCHEDULER_CONFIG_FILE: /scheduler.yml entrypoint: /entrypoint.sh depends_on: - swh-scheduler-db ports: - 5008:5008 volumes: - "./conf/scheduler.yml:/scheduler.yml:ro" - "./services/swh-scheduler/entrypoint.sh:/entrypoint.sh:ro" swh-scheduler-listener: image: swh/stack build: ./ env_file: - ./env/common_python.env - ./env/scheduler-db.env - ./env/scheduler.env environment: SWH_CONFIG_FILENAME: /scheduler.yml SWH_SCHEDULER_CONFIG_FILE: /scheduler.yml entrypoint: /entrypoint.sh command: start-listener depends_on: - swh-scheduler - amqp volumes: - "./conf/scheduler.yml:/scheduler.yml:ro" - "./services/swh-scheduler-worker/entrypoint.sh:/entrypoint.sh:ro" swh-scheduler-runner: image: swh/stack build: ./ env_file: - ./env/common_python.env - ./env/scheduler-db.env - ./env/scheduler.env environment: SWH_CONFIG_FILENAME: /scheduler.yml SWH_SCHEDULER_CONFIG_FILE: /scheduler.yml entrypoint: /entrypoint.sh command: start-runner --period 10 depends_on: - swh-scheduler - amqp volumes: - "./conf/scheduler.yml:/scheduler.yml:ro" - "./services/swh-scheduler-worker/entrypoint.sh:/entrypoint.sh:ro" swh-scheduler-runner-priority: image: swh/stack build: ./ env_file: - ./env/common_python.env - ./env/scheduler-db.env - ./env/scheduler.env environment: SWH_CONFIG_FILENAME: /scheduler.yml SWH_SCHEDULER_CONFIG_FILE: /scheduler.yml entrypoint: /entrypoint.sh command: start-runner --period 10 --with-priority depends_on: - swh-scheduler - amqp volumes: - "./conf/scheduler.yml:/scheduler.yml:ro" - "./services/swh-scheduler-worker/entrypoint.sh:/entrypoint.sh:ro" swh-scheduler-schedule-recurrent: image: swh/stack build: ./ env_file: - ./env/common_python.env - ./env/scheduler-db.env - ./env/scheduler.env environment: SWH_CONFIG_FILENAME: /scheduler.yml SWH_SCHEDULER_CONFIG_FILE: /scheduler.yml LOGLEVEL: INFO entrypoint: /entrypoint.sh command: schedule-recurrent depends_on: - swh-scheduler - amqp volumes: - "./conf/scheduler.yml:/scheduler.yml:ro" - "./services/swh-scheduler-worker/entrypoint.sh:/entrypoint.sh:ro" swh-scheduler-update-metrics: image: swh/stack build: ./ env_file: - ./env/common_python.env - ./env/scheduler-db.env - ./env/scheduler.env environment: SWH_CONFIG_FILENAME: /scheduler.yml SWH_SCHEDULER_CONFIG_FILE: /scheduler.yml entrypoint: /entrypoint.sh command: update-metrics depends_on: - swh-scheduler-db volumes: - "./conf/scheduler.yml:/scheduler.yml:ro" - "./services/swh-scheduler/entrypoint.sh:/entrypoint.sh:ro" # Graph storage swh-storage-db: image: postgres:12 env_file: - ./env/storage-db.env volumes: - "./services/initdb.d:/docker-entrypoint-initdb.d" - storage-data:/var/lib/postgresql swh-storage: image: swh/stack build: ./ ports: - 5002:5002 depends_on: - swh-storage-db - swh-objstorage - kafka env_file: - ./env/common_python.env - ./env/storage.env environment: SWH_CONFIG_FILENAME: /storage.yml STORAGE_BACKEND: postgresql entrypoint: /entrypoint.sh volumes: - "./conf/storage.yml:/storage.yml:ro" - "./services/swh-storage/entrypoint.sh:/entrypoint.sh:ro" # Object storage swh-objstorage: build: ./ image: swh/stack ports: - 5003:5003 env_file: - ./env/common_python.env environment: SWH_CONFIG_FILENAME: /objstorage.yml entrypoint: /entrypoint.sh volumes: - "./conf/objstorage.yml:/objstorage.yml:ro" - "./services/swh-objstorage/entrypoint.sh:/entrypoint.sh:ro" - objstorage-data:/srv/softwareheritage # Indexer storage swh-idx-storage-db: image: postgres:12 env_file: - ./env/indexers-db.env volumes: - "./services/initdb.d:/docker-entrypoint-initdb.d" swh-idx-storage: image: swh/stack build: ./ ports: - 5007:5007 depends_on: - swh-idx-storage-db env_file: - ./env/common_python.env - ./env/indexers-db.env - ./env/indexers.env environment: SWH_CONFIG_FILENAME: /indexer_storage.yml entrypoint: /entrypoint.sh volumes: - "./conf/indexer_storage.yml:/indexer_storage.yml:ro" - "./services/swh-indexer-storage/entrypoint.sh:/entrypoint.sh:ro" # Web interface swh-web-db: image: postgres:12 env_file: - ./env/01-web-db.env volumes: - "./services/initdb.d:/docker-entrypoint-initdb.d" swh-web: build: ./ image: swh/stack ports: - 3000:3000 - 5004:5004 depends_on: - swh-idx-storage - swh-scheduler - swh-storage - swh-web-db + - swh-search - memcache env_file: - ./env/common_python.env - ./env/01-web-db.env - ./env/02-web-db.env environment: VERBOSITY: 3 DJANGO_SETTINGS_MODULE: swh.web.settings.production SWH_CONFIG_FILENAME: /web.yml entrypoint: /entrypoint.sh volumes: - "./conf/web.yml:/web.yml:ro" - "./services/swh-web/entrypoint.sh:/entrypoint.sh:ro" swh-web-cron: build: ./ image: swh/stack depends_on: - swh-web env_file: - ./env/common_python.env - ./env/01-web-db.env - ./env/02-web-db.env environment: VERBOSITY: 2 DJANGO_SETTINGS_MODULE: swh.web.settings.production SWH_CONFIG_FILENAME: /web.yml entrypoint: /entrypoint.sh command: cron volumes: - "./conf/web.yml:/web.yml:ro" - "./services/swh-web/entrypoint.sh:/entrypoint.sh:ro" swh-deposit-db: image: postgres:12 env_file: - ./env/deposit-db.env volumes: - "./services/initdb.d:/docker-entrypoint-initdb.d" swh-deposit: image: swh/stack build: ./ ports: - 5006:5006 depends_on: - swh-deposit-db - swh-scheduler env_file: - ./env/common_python.env - ./env/deposit-db.env - ./env/deposit.env environment: VERBOSITY: 3 SWH_CONFIG_FILENAME: /deposit.yml DJANGO_SETTINGS_MODULE: swh.deposit.settings.production entrypoint: /entrypoint.sh volumes: - "./conf/deposit.yml:/deposit.yml:ro" - "./services/swh-deposit/entrypoint.sh:/entrypoint.sh:ro" swh-vault-db: image: postgres:12 env_file: - ./env/vault-db.env volumes: - "./services/initdb.d:/docker-entrypoint-initdb.d" swh-vault: image: swh/stack build: ./ env_file: - ./env/common_python.env - ./env/vault-db.env - ./env/vault.env environment: SWH_CONFIG_FILENAME: /vault.yml command: server ports: - 5005:5005 depends_on: - swh-vault-db - swh-objstorage - swh-storage - swh-scheduler entrypoint: /entrypoint.sh volumes: - "./conf/vault.yml:/vault.yml:ro" - "./services/swh-vault/entrypoint.sh:/entrypoint.sh:ro" swh-vault-worker: image: swh/stack build: ./ command: worker env_file: - ./env/common_python.env - ./env/workers.env environment: SWH_CONFIG_FILENAME: /cooker.yml depends_on: - swh-vault - swh-storage entrypoint: /entrypoint.sh volumes: - "./conf/vault-worker.yml:/cooker.yml:ro" - "./services/swh-vault/entrypoint.sh:/entrypoint.sh:ro" # Lister Celery workers swh-lister: image: swh/stack build: ./ env_file: - ./env/common_python.env - ./env/listers.env - ./env/workers.env user: swh environment: SWH_WORKER_INSTANCE: listers SWH_CONFIG_FILENAME: /lister.yml depends_on: - swh-scheduler - swh-scheduler-runner - amqp - swh-lister-maven-nginx entrypoint: /entrypoint.sh volumes: - "./conf/lister.yml:/lister.yml:ro" - "./services/swh-listers-worker/entrypoint.sh:/entrypoint.sh:ro" swh-lister-maven-nginx: # Http server to host the maven extracted index for the maven lister image: nginx volumes: - "./conf/maven_index/:/usr/share/nginx/html:ro" ports: - 8880:80 # Loader + deposit checker Celery workers swh-loader: image: swh/stack build: ./ env_file: - ./env/common_python.env - ./env/workers.env user: swh environment: SWH_WORKER_INSTANCE: loader SWH_CONFIG_FILENAME: /loader.yml entrypoint: /entrypoint.sh depends_on: - swh-storage - swh-scheduler - amqp volumes: - "./conf/loader.yml:/loader.yml:ro" - "./services/swh-worker/entrypoint.sh:/entrypoint.sh:ro" swh-loader-deposit: image: swh/stack build: ./ env_file: - ./env/common_python.env - ./env/workers.env user: swh environment: SWH_WORKER_INSTANCE: loader-deposit SWH_CONFIG_FILENAME: /loader-deposit.yml entrypoint: /entrypoint.sh depends_on: - swh-storage - swh-scheduler - swh-deposit - amqp volumes: - "./conf/loader-deposit.yml:/loader-deposit.yml:ro" - "./services/swh-worker/entrypoint.sh:/entrypoint.sh:ro" swh-loader-opam: image: swh/stack build: ./ env_file: - ./env/common_python.env - ./env/workers.env user: swh environment: SWH_WORKER_INSTANCE: loader-opam SWH_CONFIG_FILENAME: /loader-opam.yml entrypoint: /entrypoint.sh depends_on: - swh-storage - swh-scheduler - swh-deposit - amqp volumes: - "./conf/loader-opam.yml:/loader-opam.yml:ro" - "./services/swh-worker/entrypoint.sh:/entrypoint.sh:ro" # Indexer workers swh-indexer-worker-celery: image: swh/stack build: ./ user: swh env_file: - ./env/common_python.env - ./env/indexers-db.env - ./env/indexers.env - ./env/workers.env environment: SWH_WORKER_INSTANCE: indexer SWH_CONFIG_FILENAME: /indexer.yml CONCURRENCY: 4 entrypoint: /entrypoint.sh depends_on: - swh-scheduler-runner - swh-idx-storage - swh-storage - swh-objstorage - amqp volumes: - "./conf/indexer.yml:/indexer.yml:ro" - "./services/swh-indexer-worker-celery/entrypoint.sh:/entrypoint.sh:ro" swh-indexer-worker-journal: image: swh/stack build: ./ user: swh env_file: - ./env/common_python.env - ./env/indexers-db.env - ./env/indexers.env - ./env/workers.env environment: SWH_WORKER_INSTANCE: indexer SWH_CONFIG_FILENAME: /indexer.yml CONCURRENCY: 4 entrypoint: /entrypoint.sh depends_on: kafka: condition: service_healthy swh-storage: condition: service_started swh-idx-storage: condition: service_started swh-objstorage: condition: service_started volumes: - "./conf/indexer.yml:/indexer.yml:ro" - "./services/swh-indexer-worker-journal/entrypoint.sh:/entrypoint.sh:ro" # Journal related swh-scheduler-journal-client: image: swh/stack build: ./ entrypoint: /entrypoint.sh env_file: - ./env/common_python.env depends_on: kafka: condition: service_healthy swh-scheduler: condition: service_started volumes: - "./conf/scheduler_journal_client.yml:/etc/softwareheritage/scheduler/journal_client.yml:ro" - "./services/swh-scheduler-journal-client/entrypoint.sh:/entrypoint.sh:ro" swh-counters: image: swh/stack build: ./ entrypoint: /entrypoint.sh environment: SWH_CONFIG_FILENAME: /etc/softwareheritage/counters/server.yml env_file: - ./env/common_python.env ports: - 5011:5011 depends_on: redis: condition: service_healthy volumes: - "./conf/counters.yml:/etc/softwareheritage/counters/server.yml:ro" - "./services/swh-counters/entrypoint.sh:/entrypoint.sh:ro" healthcheck: test: [ "CMD", "curl", "-f", "http://localhost:5011/" ] interval: 10s timeout: 5s retries: 10 swh-counters-journal-client: image: swh/stack build: ./ entrypoint: /entrypoint.sh env_file: - ./env/common_python.env depends_on: kafka: condition: service_healthy redis: condition: service_healthy swh-counters: condition: service_healthy volumes: - "./conf/counters_journal_client.yml:/etc/softwareheritage/counters/journal_client.yml:ro" - "./services/swh-counters-journal-client/entrypoint.sh:/entrypoint.sh:ro" + # Search related + + swh-search: + image: swh/stack + build: ./ + entrypoint: /entrypoint.sh + ports: + - 5010:5010 + environment: + SWH_CONFIG_FILENAME: /search.yml + env_file: + - ./env/common_python.env + volumes: + - "./conf/search-memory.yml:/search.yml:ro" + - "./services/swh-search/entrypoint.sh:/entrypoint.sh:ro" + + swh-search-journal-client-objects: + image: swh/stack + build: ./ + entrypoint: /entrypoint.sh + depends_on: + kafka: + condition: service_healthy + swh-search: + condition: service_started + volumes: + - "./conf/search_journal_client_objects.yml:/etc/softwareheritage/search/journal_client.yml:ro" + - "./services/swh-search-journal-client/entrypoint.sh:/entrypoint.sh:ro" + + swh-search-journal-client-indexed: + image: swh/stack + build: ./ + entrypoint: /entrypoint.sh + depends_on: + kafka: + condition: service_healthy + swh-search: + condition: service_started + volumes: + - "./conf/search_journal_client_indexed.yml:/etc/softwareheritage/search/journal_client.yml:ro" + - "./services/swh-search-journal-client/entrypoint.sh:/entrypoint.sh:ro" + volumes: redis-data: storage-data: objstorage-data: kafka-data: diff --git a/docker/services/swh-search/entrypoint.sh b/docker/services/swh-search/entrypoint.sh index 551a578..8e0b06a 100755 --- a/docker/services/swh-search/entrypoint.sh +++ b/docker/services/swh-search/entrypoint.sh @@ -1,32 +1,35 @@ #!/bin/bash set -e source /srv/softwareheritage/utils/pyutils.sh setup_pip case "$1" in "shell") exec bash -i ;; *) echo Starting the swh-search API server - wait-for-it elasticsearch:9200 -s --timeout=0 - echo "Waiting for ElasticSearch cluster to be up" - cat << EOF | python3 + if grep -q elasticsearch $SWH_CONFIG_FILENAME; + then + wait-for-it elasticsearch:9200 -s --timeout=0 + echo "Waiting for ElasticSearch cluster to be up" + cat << EOF | python3 import elasticsearch es = elasticsearch.Elasticsearch(['elasticsearch:9200']) es.cluster.health(wait_for_status='yellow') EOF - echo "ElasticSearch cluster is up" + echo "ElasticSearch cluster is up" + fi swh search -C $SWH_CONFIG_FILENAME initialize exec gunicorn --bind 0.0.0.0:5010 \ --reload \ --threads 4 \ --workers 2 \ --log-level DEBUG \ --timeout 3600 \ --config 'python:swh.core.api.gunicorn_config' \ 'swh.search.api.server:make_app_from_configfile()' ;; esac