diff --git a/docker-compose.yml b/docker-compose.yml index e6b34fa..593904a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,282 +1,353 @@ version: '2' services: amqp: image: rabbitmq:3.6-management ports: - 5018:15672 zookeeper: image: wurstmeister/zookeeper ports: - "2181:2181" kafka: image: wurstmeister/kafka ports: - "9092:9092" env_file: ./kafka.env depends_on: - zookeeper flower: image: mher/flower command: --broker=amqp://guest:guest@amqp:5672// ports: - 5555:5555 depends_on: - amqp # Scheduler swh-scheduler-db: image: postgres:10 environment: POSTGRES_PASSWORD: testpassword POSTGRES_DB: swh-scheduler swh-scheduler-api: image: swh/scheduler-api build: ./dockerfiles/swh-scheduler-api env_file: ./scheduler.env depends_on: - swh-scheduler-db ports: - 5008:5008 swh-scheduler-listener: image: swh/scheduler-worker build: ./dockerfiles/swh-scheduler-worker env_file: ./scheduler.env command: listener depends_on: - swh-scheduler-api - amqp swh-scheduler-runner: image: swh/scheduler-worker build: ./dockerfiles/swh-scheduler-worker env_file: ./scheduler.env - command: runner + command: runner -p 10 depends_on: - swh-scheduler-api - amqp # Graph storage swh-storage-db: image: postgres:10 environment: POSTGRES_PASSWORD: testpassword POSTGRES_DB: swh-storage swh-storage: build: ./dockerfiles/swh-storage image: swh/storage ports: - 5002:5002 depends_on: - swh-storage-db - swh-objstorage env_file: ./storage.env # Object storage swh-objstorage: build: ./dockerfiles/swh-objstorage image: swh/objstorage ports: - 5003:5003 # Indexer storage swh-idx-storage-db: image: postgres:10 environment: POSTGRES_PASSWORD: testpassword POSTGRES_DB: swh-idx-storage swh-idx-storage: build: ./dockerfiles/swh-indexer-storage image: swh/indexer-storage ports: - 5007:5007 depends_on: - swh-idx-storage-db environment: POSTGRES_PASSWORD: testpassword POSTGRES_DB: swh-idx-storage PGHOST: swh-idx-storage-db PGUSER: postgres # Web interface swh-web: build: ./dockerfiles/swh-web image: swh/web ports: - 8080:5004 depends_on: - swh-objstorage - swh-storage - swh-idx-storage swh-deposit-db: image: postgres:10 env_file: ./deposit.env swh-deposit: build: ./dockerfiles/swh-deposit image: swh/deposit ports: - 5006:5006 depends_on: - swh-deposit-db - swh-scheduler-api env_file: ./deposit.env environment: PGHOST: swh-deposit-db # Lister Celery workers swh-listers-db: image: postgres:10 environment: POSTGRES_PASSWORD: testpassword swh-lister-debian: image: swh/listers-worker build: ./dockerfiles/swh-listers-worker env_file: ./listers.env environment: SWH_WORKER_INSTANCE: debian depends_on: - swh-listers-db - swh-scheduler-api - swh-storage - amqp swh-lister-github: image: swh/listers-worker build: ./dockerfiles/swh-listers-worker env_file: ./listers.env environment: SWH_WORKER_INSTANCE: github depends_on: - swh-listers-db - swh-scheduler-api - swh-storage - amqp swh-lister-gitlab: image: swh/listers-worker build: ./dockerfiles/swh-listers-worker env_file: ./listers.env environment: SWH_WORKER_INSTANCE: gitlab depends_on: - swh-listers-db - swh-scheduler-api - swh-storage - amqp swh-lister-npm: image: swh/listers-worker build: ./dockerfiles/swh-listers-worker env_file: ./listers.env environment: SWH_WORKER_INSTANCE: npm depends_on: - swh-listers-db - swh-scheduler-api - swh-storage - amqp swh-lister-pypi: image: swh/listers-worker build: ./dockerfiles/swh-listers-worker env_file: ./listers.env environment: SWH_WORKER_INSTANCE: pypi depends_on: - swh-listers-db - swh-scheduler-api - swh-storage - amqp # Indexer Celery workers swh-indexer-mimetype: image: swh/indexer-worker build: ./dockerfiles/swh-indexer-worker env_file: ./indexers.env environment: SWH_WORKER_INSTANCE: content_mimetype depends_on: - swh-scheduler-api - swh-idx-storage - swh-storage - swh-objstorage - amqp swh-indexer-origin-head: image: swh/indexer-worker build: ./dockerfiles/swh-indexer-worker env_file: ./indexers.env environment: SWH_WORKER_INSTANCE: origin_head depends_on: - swh-scheduler-api - swh-idx-storage - swh-storage - amqp swh-indexer-revision-metadata: image: swh/indexer-worker build: ./dockerfiles/swh-indexer-worker env_file: ./indexers.env environment: SWH_WORKER_INSTANCE: revision_metadata depends_on: - swh-scheduler-api - swh-idx-storage - swh-storage - swh-objstorage - amqp swh-indexer-origin-intrinsic-metadata: image: swh/indexer-worker build: ./dockerfiles/swh-indexer-worker env_file: ./indexers.env environment: SWH_WORKER_INSTANCE: origin_intrinsic_metadata depends_on: - swh-scheduler-api - swh-idx-storage - swh-storage - amqp # Journal related swh-storage-listener: image: swh/storage-listener build: ./dockerfiles/swh-storage-listener env_file: ./storage.env depends_on: - swh-storage-db - kafka swh-journal-publisher: image: swh/journal-publisher build: ./dockerfiles/swh-journal-publisher depends_on: - kafka - swh-storage-listener swh-journal-client: image: swh/journal-client build: ./dockerfiles/swh-journal-client depends_on: - swh-journal-publisher +# Loader Celery workers + + swh-loader-debian: + image: swh/loaders-worker + build: ./dockerfiles/swh-loaders-worker + env_file: ./listers.env + environment: + SWH_WORKER_INSTANCE: debian + depends_on: + - swh-storage + - amqp + + swh-loader-dir: + image: swh/loaders-worker + build: ./dockerfiles/swh-loaders-worker + env_file: ./listers.env + environment: + SWH_WORKER_INSTANCE: dir + depends_on: + - swh-storage + - amqp + + swh-loader-git: + image: swh/loaders-worker + build: ./dockerfiles/swh-loaders-worker + env_file: ./listers.env + environment: + SWH_WORKER_INSTANCE: git + depends_on: + - swh-storage + - amqp + + swh-loader-mercurial: + image: swh/loaders-worker + build: ./dockerfiles/swh-loaders-worker + env_file: ./listers.env + environment: + SWH_WORKER_INSTANCE: mercurial + depends_on: + - swh-storage + - amqp + + swh-loader-pypi: + image: swh/loaders-worker + build: ./dockerfiles/swh-loaders-worker + env_file: ./listers.env + environment: + SWH_WORKER_INSTANCE: pypi + depends_on: + - swh-storage + - amqp + + swh-loader-svn: + image: swh/loaders-worker + build: ./dockerfiles/swh-loaders-worker + env_file: ./listers.env + environment: + SWH_WORKER_INSTANCE: svn + depends_on: + - swh-storage + - amqp + + swh-loader-tar: + image: swh/loaders-worker + build: ./dockerfiles/swh-loaders-worker + env_file: ./listers.env + environment: + SWH_WORKER_INSTANCE: tar + depends_on: + - swh-storage + - amqp diff --git a/dockerfiles/swh-listers-worker/lister.yml b/dockerfiles/swh-listers-worker/lister.yml index cc896ac..e4d8619 100644 --- a/dockerfiles/swh-listers-worker/lister.yml +++ b/dockerfiles/swh-listers-worker/lister.yml @@ -1,9 +1,9 @@ storage: cls: remote args: url: http://swh-storage:5002/ scheduler: cls: remote args: url: http://swh-scheduler-api:5008/ -lister_db_url: service=swh +lister_db_url: postgresql:///?service=swh diff --git a/dockerfiles/swh-loaders-worker/Dockerfile b/dockerfiles/swh-loaders-worker/Dockerfile new file mode 100644 index 0000000..1c8a450 --- /dev/null +++ b/dockerfiles/swh-loaders-worker/Dockerfile @@ -0,0 +1,25 @@ +FROM python:3 + +RUN export DEBIAN_FRONTEND=noninteractive && \ + apt-get update && \ + apt-get install -y \ + libsystemd-dev libapr1-dev libaprutil1-dev libsvn-dev + +RUN pip install \ + swh-loader-debian \ + swh-loader-dir \ + swh-loader-git \ + swh-loader-mercurial \ + swh-loader-pypi \ + swh-loader-svn \ + swh-loader-tar + + +RUN useradd -ms /bin/bash swh +COPY entrypoint.sh / + +COPY loader.yml /home/swh/.config/swh/ +RUN chown -R swh: /home/swh/.config/ + +USER swh +ENTRYPOINT ["/entrypoint.sh"] diff --git a/dockerfiles/swh-loaders-worker/entrypoint.sh b/dockerfiles/swh-loaders-worker/entrypoint.sh new file mode 100755 index 0000000..82e80df --- /dev/null +++ b/dockerfiles/swh-loaders-worker/entrypoint.sh @@ -0,0 +1,46 @@ +#!/bin/bash + +set -e + +if [[ -d /src ]] ; then + for srcrepo in /src/swh-* ; do + pushd $srcrepo + echo "WARNING: $srcrepo will NOT be pip installed in dev mode" + echo " due to permission limitations." + pip install --user . + popd + done +fi + +echo Installed Python packages: +pip list + +mkdir -p ~/.config/swh/worker + +cat > ~/.config/swh/worker/${SWH_WORKER_INSTANCE}.ini < ~/.pgpass cat > ~/.pg_service.conf <