diff --git a/docker-compose.yml b/docker-compose.yml index 07f21cb..ad88a4e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,450 +1,397 @@ version: '2' services: amqp: image: rabbitmq:3.6-management ports: - 5072:5672 flower: image: mher/flower command: --broker=amqp://guest:guest@amqp:5672// --url_prefix=flower ports: - 5055:5555 depends_on: - amqp zookeeper: image: wurstmeister/zookeeper kafka: image: wurstmeister/kafka ports: - 5092:9092 env_file: ./kafka.env depends_on: - zookeeper prometheus: image: prom/prometheus command: # Needed for the reverse-proxy - "--web.external-url=/prometheus" volumes: - "./prometheus.yml:/etc/prometheus/prometheus.yml:ro" restart: unless-stopped prometheus-statsd-exporter: image: prom/statsd-exporter command: - "--statsd.mapping-config=/etc/prometheus/statsd-mapping.yml" volumes: - "./prometheus-statsd-mapping.yml:/etc/prometheus/statsd-mapping.yml:ro" restart: unless-stopped nginx: image: nginx volumes: - "./nginx.conf:/etc/nginx/nginx.conf:ro" ports: - 5080:5080 # Scheduler swh-scheduler-db: image: postgres:10 environment: POSTGRES_PASSWORD: testpassword POSTGRES_DB: swh-scheduler swh-scheduler-api: image: swh/scheduler-api build: ./dockerfiles/swh-scheduler-api env_file: ./scheduler.env depends_on: - swh-scheduler-db ports: - 5008:5008 swh-scheduler-listener: image: swh/scheduler-worker build: ./dockerfiles/swh-scheduler-worker env_file: ./scheduler.env command: listener depends_on: - swh-scheduler-api - amqp swh-scheduler-runner: image: swh/scheduler-worker build: ./dockerfiles/swh-scheduler-worker env_file: ./scheduler.env command: runner -p 10 depends_on: - swh-scheduler-api - amqp # Graph storage swh-storage-db: image: postgres:10 environment: POSTGRES_PASSWORD: testpassword POSTGRES_DB: swh-storage swh-storage: build: ./dockerfiles/swh-storage image: swh/storage ports: - 5002:5002 depends_on: - swh-storage-db - swh-objstorage env_file: ./storage.env # Object storage swh-objstorage: build: ./dockerfiles/swh-objstorage image: swh/objstorage ports: - 5003:5003 # Indexer storage swh-idx-storage-db: image: postgres:10 environment: POSTGRES_PASSWORD: testpassword POSTGRES_DB: swh-idx-storage swh-idx-storage: build: ./dockerfiles/swh-indexer-storage image: swh/indexer-storage ports: - 5007:5007 depends_on: - swh-idx-storage-db environment: POSTGRES_PASSWORD: testpassword POSTGRES_DB: swh-idx-storage PGHOST: swh-idx-storage-db PGUSER: postgres # Web interface swh-web: build: ./dockerfiles/swh-web image: swh/web ports: - 5004:5004 depends_on: - swh-objstorage - swh-storage - swh-idx-storage swh-deposit-db: image: postgres:10 env_file: ./deposit.env swh-deposit: build: ./dockerfiles/swh-deposit image: swh/deposit ports: - 5006:5006 depends_on: - swh-deposit-db - swh-scheduler-api env_file: ./deposit.env environment: PGHOST: swh-deposit-db # Lister Celery workers swh-listers-db: image: postgres:10 environment: POSTGRES_PASSWORD: testpassword swh-lister-debian: image: swh/listers-worker build: ./dockerfiles/swh-listers-worker env_file: ./listers.env environment: STATSD_HOST: prometheus-statsd-exporter STATSD_PORT: 9125 SWH_WORKER_INSTANCE: debian depends_on: - swh-listers-db - swh-scheduler-api - swh-scheduler-runner - swh-storage - amqp swh-lister-bitbucket: image: swh/listers-worker build: ./dockerfiles/swh-listers-worker env_file: ./listers.env environment: STATSD_HOST: prometheus-statsd-exporter STATSD_PORT: 9125 SWH_WORKER_INSTANCE: bitbucket depends_on: - swh-listers-db - swh-scheduler-api - swh-scheduler-runner - swh-storage - amqp swh-lister-github: image: swh/listers-worker build: ./dockerfiles/swh-listers-worker env_file: ./listers.env environment: STATSD_HOST: prometheus-statsd-exporter STATSD_PORT: 9125 SWH_WORKER_INSTANCE: github depends_on: - swh-listers-db - swh-scheduler-api - swh-scheduler-runner - swh-storage - amqp swh-lister-gitlab: image: swh/listers-worker build: ./dockerfiles/swh-listers-worker env_file: ./listers.env environment: STATSD_HOST: prometheus-statsd-exporter STATSD_PORT: 9125 SWH_WORKER_INSTANCE: gitlab depends_on: - swh-listers-db - swh-scheduler-api - swh-scheduler-runner - swh-storage - amqp swh-lister-npm: image: swh/listers-worker build: ./dockerfiles/swh-listers-worker env_file: ./listers.env environment: STATSD_HOST: prometheus-statsd-exporter STATSD_PORT: 9125 SWH_WORKER_INSTANCE: npm depends_on: - swh-listers-db - swh-scheduler-api - swh-scheduler-runner - swh-storage - amqp swh-lister-pypi: image: swh/listers-worker build: ./dockerfiles/swh-listers-worker env_file: ./listers.env environment: STATSD_HOST: prometheus-statsd-exporter STATSD_PORT: 9125 SWH_WORKER_INSTANCE: pypi depends_on: - swh-listers-db - swh-scheduler-api - swh-scheduler-runner - swh-storage - amqp # Indexer Celery workers - swh-indexer-mimetype: + swh-indexer: image: swh/indexer-worker build: ./dockerfiles/swh-indexer-worker env_file: ./indexers.env environment: STATSD_HOST: prometheus-statsd-exporter STATSD_PORT: 9125 - SWH_WORKER_INSTANCE: content_mimetype depends_on: - swh-scheduler-runner - swh-idx-storage - swh-storage - swh-objstorage - amqp - swh-indexer-license: - image: swh/indexer-worker - build: ./dockerfiles/swh-indexer-worker - env_file: ./indexers.env - environment: - STATSD_HOST: prometheus-statsd-exporter - STATSD_PORT: 9125 - SWH_WORKER_INSTANCE: content_fossology_license - depends_on: - - swh-scheduler-runner - - swh-idx-storage - - swh-storage - - swh-objstorage - - amqp - - swh-indexer-origin-head: - image: swh/indexer-worker - build: ./dockerfiles/swh-indexer-worker - env_file: ./indexers.env - environment: - STATSD_HOST: prometheus-statsd-exporter - STATSD_PORT: 9125 - SWH_WORKER_INSTANCE: origin_head - depends_on: - - swh-scheduler-api - - swh-scheduler-runner - - swh-idx-storage - - swh-storage - - amqp - - swh-indexer-revision-metadata: - image: swh/indexer-worker - build: ./dockerfiles/swh-indexer-worker - env_file: ./indexers.env - environment: - STATSD_HOST: prometheus-statsd-exporter - STATSD_PORT: 9125 - SWH_WORKER_INSTANCE: revision_metadata depends_on: - swh-scheduler-runner - swh-idx-storage - swh-storage - swh-objstorage - amqp - swh-indexer-origin-intrinsic-metadata: - image: swh/indexer-worker - build: ./dockerfiles/swh-indexer-worker - env_file: ./indexers.env - environment: - STATSD_HOST: prometheus-statsd-exporter - STATSD_PORT: 9125 - SWH_WORKER_INSTANCE: origin_intrinsic_metadata - depends_on: - - swh-scheduler-runner - - swh-idx-storage - - swh-storage - - amqp - swh-indexer-journal-client: image: swh/indexer-journal-client build: ./dockerfiles/swh-indexer-journal-client depends_on: - swh-journal-publisher - swh-scheduler-api # Journal related swh-storage-listener: image: swh/storage-listener build: ./dockerfiles/swh-storage-listener env_file: ./storage.env depends_on: - swh-storage-db - kafka swh-journal-publisher: image: swh/journal-publisher build: ./dockerfiles/swh-journal-publisher depends_on: - kafka - swh-storage-listener swh-journal-client: image: swh/journal-client build: ./dockerfiles/swh-journal-client depends_on: - swh-journal-publisher # Loader Celery workers swh-loader-debian: image: swh/loaders-worker build: ./dockerfiles/swh-loaders-worker env_file: ./listers.env environment: STATSD_HOST: prometheus-statsd-exporter STATSD_PORT: 9125 SWH_WORKER_INSTANCE: debian depends_on: - swh-storage - amqp swh-loader-dir: image: swh/loaders-worker build: ./dockerfiles/swh-loaders-worker env_file: ./listers.env environment: STATSD_HOST: prometheus-statsd-exporter STATSD_PORT: 9125 SWH_WORKER_INSTANCE: dir depends_on: - swh-storage - amqp swh-loader-git: image: swh/loaders-worker build: ./dockerfiles/swh-loaders-worker env_file: ./listers.env environment: STATSD_HOST: prometheus-statsd-exporter STATSD_PORT: 9125 SWH_WORKER_INSTANCE: git depends_on: - swh-storage - amqp swh-loader-mercurial: image: swh/loaders-worker build: ./dockerfiles/swh-loaders-worker env_file: ./listers.env environment: STATSD_HOST: prometheus-statsd-exporter STATSD_PORT: 9125 SWH_WORKER_INSTANCE: mercurial depends_on: - swh-storage - amqp swh-loader-pypi: image: swh/loaders-worker build: ./dockerfiles/swh-loaders-worker env_file: ./listers.env environment: STATSD_HOST: prometheus-statsd-exporter STATSD_PORT: 9125 SWH_WORKER_INSTANCE: pypi depends_on: - swh-storage - amqp swh-loader-svn: image: swh/loaders-worker build: ./dockerfiles/swh-loaders-worker env_file: ./listers.env environment: STATSD_HOST: prometheus-statsd-exporter STATSD_PORT: 9125 SWH_WORKER_INSTANCE: svn depends_on: - swh-storage - amqp swh-loader-tar: image: swh/loaders-worker build: ./dockerfiles/swh-loaders-worker env_file: ./listers.env environment: STATSD_HOST: prometheus-statsd-exporter STATSD_PORT: 9125 SWH_WORKER_INSTANCE: tar depends_on: - swh-storage - amqp diff --git a/dockerfiles/swh-indexer-worker/Dockerfile b/dockerfiles/swh-indexer-worker/Dockerfile index 254f725..77a07b3 100644 --- a/dockerfiles/swh-indexer-worker/Dockerfile +++ b/dockerfiles/swh-indexer-worker/Dockerfile @@ -1,20 +1,16 @@ FROM python:3.6 RUN export DEBIAN_FRONTEND=noninteractive && \ apt-get update && \ apt-get install -y \ libsystemd-dev postgresql-client +RUN pip install --upgrade pip RUN pip install swh-indexer RUN useradd -ms /bin/bash swh -COPY entrypoint.sh / - -COPY mimetype.yml /home/swh/.config/swh/indexer/ -COPY fossology_license.yml /home/swh/.config/swh/indexer/ -COPY origin_head.yml /home/swh/.config/swh/indexer/ -COPY revision_metadata.yml /home/swh/.config/swh/indexer/ -COPY origin_intrinsic_metadata.yml /home/swh/.config/swh/indexer/ -RUN chown -R swh: /home/swh/.config/ USER swh -ENTRYPOINT ["/entrypoint.sh"] +COPY entrypoint.sh / +COPY indexer.yml /home/swh/ + +ENTRYPOINT ["/home/swh/entrypoint.sh"] diff --git a/dockerfiles/swh-indexer-worker/fossology_license.yml b/dockerfiles/swh-indexer-worker/fossology_license.yml deleted file mode 100644 index 55ab356..0000000 --- a/dockerfiles/swh-indexer-worker/fossology_license.yml +++ /dev/null @@ -1,12 +0,0 @@ -storage: - cls: remote - args: - url: http://swh-storage:5002/ -objstorage: - cls: remote - args: - url: http://swh-objstorage:5003/ -indexer_storage: - cls: remote - args: - url: http://swh-idx-storage:5007/ diff --git a/dockerfiles/swh-indexer-worker/revision_metadata.yml b/dockerfiles/swh-indexer-worker/indexer.yml similarity index 98% rename from dockerfiles/swh-indexer-worker/revision_metadata.yml rename to dockerfiles/swh-indexer-worker/indexer.yml index 78654ed..22303d4 100644 --- a/dockerfiles/swh-indexer-worker/revision_metadata.yml +++ b/dockerfiles/swh-indexer-worker/indexer.yml @@ -1,19 +1,16 @@ storage: cls: remote args: url: http://swh-storage:5002/ - objstorage: cls: remote args: url: http://swh-objstorage:5003/ - indexer_storage: cls: remote args: url: http://swh-idx-storage:5007/ - scheduler: cls: remote args: url: http://swh-scheduler-api:5008/ diff --git a/dockerfiles/swh-indexer-worker/mimetype.yml b/dockerfiles/swh-indexer-worker/mimetype.yml deleted file mode 100644 index 55ab356..0000000 --- a/dockerfiles/swh-indexer-worker/mimetype.yml +++ /dev/null @@ -1,12 +0,0 @@ -storage: - cls: remote - args: - url: http://swh-storage:5002/ -objstorage: - cls: remote - args: - url: http://swh-objstorage:5003/ -indexer_storage: - cls: remote - args: - url: http://swh-idx-storage:5007/ diff --git a/dockerfiles/swh-indexer-worker/origin_head.yml b/dockerfiles/swh-indexer-worker/origin_head.yml deleted file mode 100644 index 72724af..0000000 --- a/dockerfiles/swh-indexer-worker/origin_head.yml +++ /dev/null @@ -1,19 +0,0 @@ -storage: - cls: remote - args: - url: http://swh-storage:5002/ - -indexer_storage: - cls: remote - args: - url: http://swh-idx-storage:5007/ - -scheduler: - cls: remote - args: - url: http://swh-scheduler-api:5008/ - -tasks: - revision_metadata: indexer_revision_metadata - origin_intrinsic_metadata: indexer_origin_metadata - diff --git a/dockerfiles/swh-indexer-worker/origin_intrinsic_metadata.yml b/dockerfiles/swh-indexer-worker/origin_intrinsic_metadata.yml deleted file mode 100644 index 33e933c..0000000 --- a/dockerfiles/swh-indexer-worker/origin_intrinsic_metadata.yml +++ /dev/null @@ -1,9 +0,0 @@ -storage: - cls: remote - args: - url: http://swh-storage:5002/ - -indexer_storage: - cls: remote - args: - url: http://swh-idx-storage:5007/ diff --git a/indexers.env b/indexers.env index 0f02e61..3e15cd5 100644 --- a/indexers.env +++ b/indexers.env @@ -1,3 +1,5 @@ -CONCURRENCY=1 +CONCURRENCY=4 MAX_TASKS_PER_CHILD=10 LOGLEVEL=DEBUG +SWH_WORKER_INSTANCE=indexer +SWH_CONFIG_FILENAME=/home/swh/indexer.yml