Page Menu
Home
Software Heritage
Search
Configure Global Search
Log In
Files
F9312101
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Award Token
Flag For Later
Size
12 KB
Subscribers
None
View Options
diff --git a/docker-compose.yml b/docker-compose.yml
index e6b34fa..593904a 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,282 +1,353 @@
version: '2'
services:
amqp:
image: rabbitmq:3.6-management
ports:
- 5018:15672
zookeeper:
image: wurstmeister/zookeeper
ports:
- "2181:2181"
kafka:
image: wurstmeister/kafka
ports:
- "9092:9092"
env_file: ./kafka.env
depends_on:
- zookeeper
flower:
image: mher/flower
command: --broker=amqp://guest:guest@amqp:5672//
ports:
- 5555:5555
depends_on:
- amqp
# Scheduler
swh-scheduler-db:
image: postgres:10
environment:
POSTGRES_PASSWORD: testpassword
POSTGRES_DB: swh-scheduler
swh-scheduler-api:
image: swh/scheduler-api
build: ./dockerfiles/swh-scheduler-api
env_file: ./scheduler.env
depends_on:
- swh-scheduler-db
ports:
- 5008:5008
swh-scheduler-listener:
image: swh/scheduler-worker
build: ./dockerfiles/swh-scheduler-worker
env_file: ./scheduler.env
command: listener
depends_on:
- swh-scheduler-api
- amqp
swh-scheduler-runner:
image: swh/scheduler-worker
build: ./dockerfiles/swh-scheduler-worker
env_file: ./scheduler.env
- command: runner
+ command: runner -p 10
depends_on:
- swh-scheduler-api
- amqp
# Graph storage
swh-storage-db:
image: postgres:10
environment:
POSTGRES_PASSWORD: testpassword
POSTGRES_DB: swh-storage
swh-storage:
build: ./dockerfiles/swh-storage
image: swh/storage
ports:
- 5002:5002
depends_on:
- swh-storage-db
- swh-objstorage
env_file: ./storage.env
# Object storage
swh-objstorage:
build: ./dockerfiles/swh-objstorage
image: swh/objstorage
ports:
- 5003:5003
# Indexer storage
swh-idx-storage-db:
image: postgres:10
environment:
POSTGRES_PASSWORD: testpassword
POSTGRES_DB: swh-idx-storage
swh-idx-storage:
build: ./dockerfiles/swh-indexer-storage
image: swh/indexer-storage
ports:
- 5007:5007
depends_on:
- swh-idx-storage-db
environment:
POSTGRES_PASSWORD: testpassword
POSTGRES_DB: swh-idx-storage
PGHOST: swh-idx-storage-db
PGUSER: postgres
# Web interface
swh-web:
build: ./dockerfiles/swh-web
image: swh/web
ports:
- 8080:5004
depends_on:
- swh-objstorage
- swh-storage
- swh-idx-storage
swh-deposit-db:
image: postgres:10
env_file: ./deposit.env
swh-deposit:
build: ./dockerfiles/swh-deposit
image: swh/deposit
ports:
- 5006:5006
depends_on:
- swh-deposit-db
- swh-scheduler-api
env_file: ./deposit.env
environment:
PGHOST: swh-deposit-db
# Lister Celery workers
swh-listers-db:
image: postgres:10
environment:
POSTGRES_PASSWORD: testpassword
swh-lister-debian:
image: swh/listers-worker
build: ./dockerfiles/swh-listers-worker
env_file: ./listers.env
environment:
SWH_WORKER_INSTANCE: debian
depends_on:
- swh-listers-db
- swh-scheduler-api
- swh-storage
- amqp
swh-lister-github:
image: swh/listers-worker
build: ./dockerfiles/swh-listers-worker
env_file: ./listers.env
environment:
SWH_WORKER_INSTANCE: github
depends_on:
- swh-listers-db
- swh-scheduler-api
- swh-storage
- amqp
swh-lister-gitlab:
image: swh/listers-worker
build: ./dockerfiles/swh-listers-worker
env_file: ./listers.env
environment:
SWH_WORKER_INSTANCE: gitlab
depends_on:
- swh-listers-db
- swh-scheduler-api
- swh-storage
- amqp
swh-lister-npm:
image: swh/listers-worker
build: ./dockerfiles/swh-listers-worker
env_file: ./listers.env
environment:
SWH_WORKER_INSTANCE: npm
depends_on:
- swh-listers-db
- swh-scheduler-api
- swh-storage
- amqp
swh-lister-pypi:
image: swh/listers-worker
build: ./dockerfiles/swh-listers-worker
env_file: ./listers.env
environment:
SWH_WORKER_INSTANCE: pypi
depends_on:
- swh-listers-db
- swh-scheduler-api
- swh-storage
- amqp
# Indexer Celery workers
swh-indexer-mimetype:
image: swh/indexer-worker
build: ./dockerfiles/swh-indexer-worker
env_file: ./indexers.env
environment:
SWH_WORKER_INSTANCE: content_mimetype
depends_on:
- swh-scheduler-api
- swh-idx-storage
- swh-storage
- swh-objstorage
- amqp
swh-indexer-origin-head:
image: swh/indexer-worker
build: ./dockerfiles/swh-indexer-worker
env_file: ./indexers.env
environment:
SWH_WORKER_INSTANCE: origin_head
depends_on:
- swh-scheduler-api
- swh-idx-storage
- swh-storage
- amqp
swh-indexer-revision-metadata:
image: swh/indexer-worker
build: ./dockerfiles/swh-indexer-worker
env_file: ./indexers.env
environment:
SWH_WORKER_INSTANCE: revision_metadata
depends_on:
- swh-scheduler-api
- swh-idx-storage
- swh-storage
- swh-objstorage
- amqp
swh-indexer-origin-intrinsic-metadata:
image: swh/indexer-worker
build: ./dockerfiles/swh-indexer-worker
env_file: ./indexers.env
environment:
SWH_WORKER_INSTANCE: origin_intrinsic_metadata
depends_on:
- swh-scheduler-api
- swh-idx-storage
- swh-storage
- amqp
# Journal related
swh-storage-listener:
image: swh/storage-listener
build: ./dockerfiles/swh-storage-listener
env_file: ./storage.env
depends_on:
- swh-storage-db
- kafka
swh-journal-publisher:
image: swh/journal-publisher
build: ./dockerfiles/swh-journal-publisher
depends_on:
- kafka
- swh-storage-listener
swh-journal-client:
image: swh/journal-client
build: ./dockerfiles/swh-journal-client
depends_on:
- swh-journal-publisher
+# Loader Celery workers
+
+ swh-loader-debian:
+ image: swh/loaders-worker
+ build: ./dockerfiles/swh-loaders-worker
+ env_file: ./listers.env
+ environment:
+ SWH_WORKER_INSTANCE: debian
+ depends_on:
+ - swh-storage
+ - amqp
+
+ swh-loader-dir:
+ image: swh/loaders-worker
+ build: ./dockerfiles/swh-loaders-worker
+ env_file: ./listers.env
+ environment:
+ SWH_WORKER_INSTANCE: dir
+ depends_on:
+ - swh-storage
+ - amqp
+
+ swh-loader-git:
+ image: swh/loaders-worker
+ build: ./dockerfiles/swh-loaders-worker
+ env_file: ./listers.env
+ environment:
+ SWH_WORKER_INSTANCE: git
+ depends_on:
+ - swh-storage
+ - amqp
+
+ swh-loader-mercurial:
+ image: swh/loaders-worker
+ build: ./dockerfiles/swh-loaders-worker
+ env_file: ./listers.env
+ environment:
+ SWH_WORKER_INSTANCE: mercurial
+ depends_on:
+ - swh-storage
+ - amqp
+
+ swh-loader-pypi:
+ image: swh/loaders-worker
+ build: ./dockerfiles/swh-loaders-worker
+ env_file: ./listers.env
+ environment:
+ SWH_WORKER_INSTANCE: pypi
+ depends_on:
+ - swh-storage
+ - amqp
+
+ swh-loader-svn:
+ image: swh/loaders-worker
+ build: ./dockerfiles/swh-loaders-worker
+ env_file: ./listers.env
+ environment:
+ SWH_WORKER_INSTANCE: svn
+ depends_on:
+ - swh-storage
+ - amqp
+
+ swh-loader-tar:
+ image: swh/loaders-worker
+ build: ./dockerfiles/swh-loaders-worker
+ env_file: ./listers.env
+ environment:
+ SWH_WORKER_INSTANCE: tar
+ depends_on:
+ - swh-storage
+ - amqp
diff --git a/dockerfiles/swh-listers-worker/lister.yml b/dockerfiles/swh-listers-worker/lister.yml
index cc896ac..e4d8619 100644
--- a/dockerfiles/swh-listers-worker/lister.yml
+++ b/dockerfiles/swh-listers-worker/lister.yml
@@ -1,9 +1,9 @@
storage:
cls: remote
args:
url: http://swh-storage:5002/
scheduler:
cls: remote
args:
url: http://swh-scheduler-api:5008/
-lister_db_url: service=swh
+lister_db_url: postgresql:///?service=swh
diff --git a/dockerfiles/swh-loaders-worker/Dockerfile b/dockerfiles/swh-loaders-worker/Dockerfile
new file mode 100644
index 0000000..1c8a450
--- /dev/null
+++ b/dockerfiles/swh-loaders-worker/Dockerfile
@@ -0,0 +1,25 @@
+FROM python:3
+
+RUN export DEBIAN_FRONTEND=noninteractive && \
+ apt-get update && \
+ apt-get install -y \
+ libsystemd-dev libapr1-dev libaprutil1-dev libsvn-dev
+
+RUN pip install \
+ swh-loader-debian \
+ swh-loader-dir \
+ swh-loader-git \
+ swh-loader-mercurial \
+ swh-loader-pypi \
+ swh-loader-svn \
+ swh-loader-tar
+
+
+RUN useradd -ms /bin/bash swh
+COPY entrypoint.sh /
+
+COPY loader.yml /home/swh/.config/swh/
+RUN chown -R swh: /home/swh/.config/
+
+USER swh
+ENTRYPOINT ["/entrypoint.sh"]
diff --git a/dockerfiles/swh-loaders-worker/entrypoint.sh b/dockerfiles/swh-loaders-worker/entrypoint.sh
new file mode 100755
index 0000000..82e80df
--- /dev/null
+++ b/dockerfiles/swh-loaders-worker/entrypoint.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+set -e
+
+if [[ -d /src ]] ; then
+ for srcrepo in /src/swh-* ; do
+ pushd $srcrepo
+ echo "WARNING: $srcrepo will NOT be pip installed in dev mode"
+ echo " due to permission limitations."
+ pip install --user .
+ popd
+ done
+fi
+
+echo Installed Python packages:
+pip list
+
+mkdir -p ~/.config/swh/worker
+
+cat > ~/.config/swh/worker/${SWH_WORKER_INSTANCE}.ini <<EOF
+[main]
+task_broker = amqp://guest@amqp//
+task_modules = swh.loader.${SWH_WORKER_INSTANCE}.tasks
+task_queues = swh_loader_${SWH_WORKER_INSTANCE}
+task_soft_time_limit = 0
+EOF
+
+ln -s ~/.config/swh/loader.yml ~/.config/swh/loader-${SWH_WORKER_INSTANCE}.yml
+
+
+case "$1" in
+ "shell")
+ exec bash -i
+ ;;
+ *)
+ echo Starting the swh-loader Celery worker for ${SWH_WORKER_INSTANCE}
+ exec python -m celery worker \
+ --app=swh.scheduler.celery_backend.config.app \
+ --pool=prefork --events \
+ --concurrency=${CONCURRENCY} \
+ --maxtasksperchild=${MAX_TASKS_PER_CHILD} \
+ -Ofair --loglevel=${LOGLEVEL} --without-gossip \
+ --without-mingle --without-heartbeat \
+ --hostname ${SWH_WORKER_INSTANCE}.${HOSTNAME}
+ ;;
+esac
diff --git a/dockerfiles/swh-listers-worker/lister.yml b/dockerfiles/swh-loaders-worker/loader.yml
similarity index 55%
copy from dockerfiles/swh-listers-worker/lister.yml
copy to dockerfiles/swh-loaders-worker/loader.yml
index cc896ac..f977304 100644
--- a/dockerfiles/swh-listers-worker/lister.yml
+++ b/dockerfiles/swh-loaders-worker/loader.yml
@@ -1,9 +1,5 @@
storage:
cls: remote
args:
url: http://swh-storage:5002/
-scheduler:
- cls: remote
- args:
- url: http://swh-scheduler-api:5008/
lister_db_url: service=swh
diff --git a/dockerfiles/swh-scheduler-worker/entrypoint.sh b/dockerfiles/swh-scheduler-worker/entrypoint.sh
index bb0abf0..ccb7a24 100755
--- a/dockerfiles/swh-scheduler-worker/entrypoint.sh
+++ b/dockerfiles/swh-scheduler-worker/entrypoint.sh
@@ -1,47 +1,35 @@
#!/bin/bash
set -e
if [[ -d /src ]] ; then
for srcrepo in /src/swh-* ; do
pushd $srcrepo
pip install -e .
popd
done
fi
echo Installed Python packages:
pip list
echo "${PGHOST}:5432:${POSTGRES_DB}:${PGUSER}:${POSTGRES_PASSWORD}" > ~/.pgpass
cat > ~/.pg_service.conf <<EOF
[swh-scheduler]
dbname=${POSTGRES_DB}
host=${PGHOST}
port=5432
user=${PGUSER}
EOF
chmod 0600 ~/.pgpass
case "$1" in
"shell")
exec bash -i
;;
- "listener")
- echo Starting the swh-scheduler listener
- exec python -m swh.scheduler.celery_backend.listener
- ;;
- "runner")
- echo Starting the swh-scheduler runner
- exec sh -c 'while true; do
- echo running pending tasks at `/bin/date`;
- python -m swh.scheduler.celery_backend.runner;
- sleep 10;
- done' # beuark
- ;;
*)
- echo "Provide a command (shell|listener|runner)"
- exit 1
+ echo Starting the swh-scheduler $1
+ exec swh-scheduler --log-level ${LOGLEVEL} $@
;;
esac
diff --git a/scheduler.env b/scheduler.env
index ea489c8..941fb5c 100644
--- a/scheduler.env
+++ b/scheduler.env
@@ -1,5 +1,6 @@
POSTGRES_PASSWORD=testpassword
POSTGRES_DB=swh-scheduler
PGHOST=swh-scheduler-db
PGUSER=postgres
SWH_WORKER_INSTANCE=scheduler
+LOGLEVEL=INFO
File Metadata
Details
Attached
Mime Type
text/x-diff
Expires
Thu, Jul 3, 10:42 AM (2 w, 5 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3268177
Attached To
rDENV Development environment
Event Timeline
Log In to Comment