diff --git a/docker-compose.yml b/docker-compose.yml index 46ec11a..c942cc8 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,221 +1,258 @@ version: '2' services: amqp: image: rabbitmq:3.6-management ports: - 5018:15672 zookeeper: image: wurstmeister/zookeeper ports: - "2181:2181" kafka: image: wurstmeister/kafka ports: - "9092:9092" env_file: ./kafka.env depends_on: - zookeeper # Scheduler swh-scheduler-db: image: postgres:10 environment: POSTGRES_PASSWORD: testpassword POSTGRES_DB: swh-scheduler swh-scheduler-api: image: swh/scheduler-api build: ./dockerfiles/swh-scheduler-api env_file: ./scheduler.env depends_on: - swh-scheduler-db ports: - 5008:5008 swh-scheduler-listener: image: swh/scheduler-worker build: ./dockerfiles/swh-scheduler-worker env_file: ./scheduler.env command: listener depends_on: - swh-scheduler-api - amqp swh-scheduler-runner: image: swh/scheduler-worker build: ./dockerfiles/swh-scheduler-worker env_file: ./scheduler.env command: runner depends_on: - swh-scheduler-api - amqp # Graph storage swh-storage-db: image: postgres:10 environment: POSTGRES_PASSWORD: testpassword POSTGRES_DB: swh-storage swh-storage: build: ./dockerfiles/swh-storage image: swh/storage ports: - 5002:5002 depends_on: - swh-storage-db - swh-objstorage env_file: ./storage.env # Object storage swh-objstorage: build: ./dockerfiles/swh-objstorage image: swh/objstorage ports: - 5003:5003 # Indexer storage swh-idx-storage-db: image: postgres:10 environment: POSTGRES_PASSWORD: testpassword POSTGRES_DB: swh-idx-storage swh-idx-storage: build: ./dockerfiles/swh-indexer-storage image: swh/indexer-storage ports: - 5007:5007 depends_on: - swh-idx-storage-db environment: POSTGRES_PASSWORD: testpassword POSTGRES_DB: swh-idx-storage PGHOST: swh-idx-storage-db PGUSER: postgres # Web interface swh-web: build: ./dockerfiles/swh-web image: swh/web ports: - 8080:5004 depends_on: - swh-objstorage - swh-storage - swh-idx-storage # Lister Celery workers swh-listers-db: image: postgres:10 environment: POSTGRES_PASSWORD: testpassword swh-lister-debian: image: swh/listers-worker build: ./dockerfiles/swh-listers-worker env_file: ./listers.env environment: SWH_WORKER_INSTANCE: debian depends_on: - swh-listers-db - swh-scheduler-api - swh-storage - amqp swh-lister-github: image: swh/listers-worker build: ./dockerfiles/swh-listers-worker env_file: ./listers.env environment: SWH_WORKER_INSTANCE: github depends_on: - swh-listers-db - swh-scheduler-api - swh-storage - amqp swh-lister-gitlab: image: swh/listers-worker build: ./dockerfiles/swh-listers-worker env_file: ./listers.env environment: SWH_WORKER_INSTANCE: gitlab depends_on: - swh-listers-db - swh-scheduler-api - swh-storage - amqp swh-lister-npm: image: swh/listers-worker build: ./dockerfiles/swh-listers-worker env_file: ./listers.env environment: SWH_WORKER_INSTANCE: npm depends_on: - swh-listers-db - swh-scheduler-api - swh-storage - amqp swh-lister-pypi: image: swh/listers-worker build: ./dockerfiles/swh-listers-worker env_file: ./listers.env environment: SWH_WORKER_INSTANCE: pypi depends_on: - swh-listers-db - swh-scheduler-api - swh-storage - amqp # Indexer Celery workers swh-indexer-mimetype: image: swh/indexer-worker build: ./dockerfiles/swh-indexer-worker env_file: ./indexers.env environment: SWH_WORKER_INSTANCE: content_mimetype depends_on: - swh-scheduler-api - swh-idx-storage - swh-storage - swh-objstorage - amqp + swh-indexer-origin-head: + image: swh/indexer-worker + build: ./dockerfiles/swh-indexer-worker + env_file: ./indexers.env + environment: + SWH_WORKER_INSTANCE: origin_head + depends_on: + - swh-scheduler-api + - swh-idx-storage + - swh-storage + - amqp + + swh-indexer-revision-metadata: + image: swh/indexer-worker + build: ./dockerfiles/swh-indexer-worker + env_file: ./indexers.env + environment: + SWH_WORKER_INSTANCE: revision_metadata + depends_on: + - swh-scheduler-api + - swh-idx-storage + - swh-storage + - swh-objstorage + - amqp + + swh-indexer-origin-intrinsic-metadata: + image: swh/indexer-worker + build: ./dockerfiles/swh-indexer-worker + env_file: ./indexers.env + environment: + SWH_WORKER_INSTANCE: origin_intrinsic_metadata + depends_on: + - swh-scheduler-api + - swh-idx-storage + - swh-storage + - amqp + # Journal related swh-storage-listener: image: swh/storage-listener build: ./dockerfiles/swh-storage-listener env_file: ./storage.env depends_on: - swh-storage-db - kafka swh-journal-publisher: image: swh/journal-publisher build: ./dockerfiles/swh-journal-publisher depends_on: - kafka - swh-storage-listener swh-journal-client: image: swh/journal-client build: ./dockerfiles/swh-journal-client depends_on: - swh-journal-publisher diff --git a/dockerfiles/swh-indexer-worker/Dockerfile b/dockerfiles/swh-indexer-worker/Dockerfile index b13b71d..b6f33d7 100644 --- a/dockerfiles/swh-indexer-worker/Dockerfile +++ b/dockerfiles/swh-indexer-worker/Dockerfile @@ -1,16 +1,19 @@ FROM python:3 RUN export DEBIAN_FRONTEND=noninteractive && \ apt-get update && \ apt-get install -y \ libsystemd-dev postgresql-client RUN pip install swh-indexer RUN useradd -ms /bin/bash swh COPY entrypoint.sh / COPY mimetype.yml /home/swh/.config/swh/indexer/ +COPY origin_head.yml /home/swh/.config/swh/indexer/ +COPY revision_metadata.yml /home/swh/.config/swh/indexer/ +COPY origin_intrinsic_metadata.yml /home/swh/.config/swh/indexer/ RUN chown -R swh: /home/swh/.config/ USER swh ENTRYPOINT ["/entrypoint.sh"] diff --git a/dockerfiles/swh-indexer-worker/origin_head.yml b/dockerfiles/swh-indexer-worker/origin_head.yml new file mode 100644 index 0000000..72724af --- /dev/null +++ b/dockerfiles/swh-indexer-worker/origin_head.yml @@ -0,0 +1,19 @@ +storage: + cls: remote + args: + url: http://swh-storage:5002/ + +indexer_storage: + cls: remote + args: + url: http://swh-idx-storage:5007/ + +scheduler: + cls: remote + args: + url: http://swh-scheduler-api:5008/ + +tasks: + revision_metadata: indexer_revision_metadata + origin_intrinsic_metadata: indexer_origin_metadata + diff --git a/dockerfiles/swh-indexer-worker/origin_intrinsic_metadata.yml b/dockerfiles/swh-indexer-worker/origin_intrinsic_metadata.yml new file mode 100644 index 0000000..33e933c --- /dev/null +++ b/dockerfiles/swh-indexer-worker/origin_intrinsic_metadata.yml @@ -0,0 +1,9 @@ +storage: + cls: remote + args: + url: http://swh-storage:5002/ + +indexer_storage: + cls: remote + args: + url: http://swh-idx-storage:5007/ diff --git a/dockerfiles/swh-indexer-worker/revision_metadata.yml b/dockerfiles/swh-indexer-worker/revision_metadata.yml new file mode 100644 index 0000000..78654ed --- /dev/null +++ b/dockerfiles/swh-indexer-worker/revision_metadata.yml @@ -0,0 +1,19 @@ +storage: + cls: remote + args: + url: http://swh-storage:5002/ + +objstorage: + cls: remote + args: + url: http://swh-objstorage:5003/ + +indexer_storage: + cls: remote + args: + url: http://swh-idx-storage:5007/ + +scheduler: + cls: remote + args: + url: http://swh-scheduler-api:5008/