Page MenuHomeSoftware Heritage

No OneTemporary

diff --git a/docker-compose.yml b/docker-compose.yml
index e6b34fa..593904a 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,282 +1,353 @@
version: '2'
services:
amqp:
image: rabbitmq:3.6-management
ports:
- 5018:15672
zookeeper:
image: wurstmeister/zookeeper
ports:
- "2181:2181"
kafka:
image: wurstmeister/kafka
ports:
- "9092:9092"
env_file: ./kafka.env
depends_on:
- zookeeper
flower:
image: mher/flower
command: --broker=amqp://guest:guest@amqp:5672//
ports:
- 5555:5555
depends_on:
- amqp
# Scheduler
swh-scheduler-db:
image: postgres:10
environment:
POSTGRES_PASSWORD: testpassword
POSTGRES_DB: swh-scheduler
swh-scheduler-api:
image: swh/scheduler-api
build: ./dockerfiles/swh-scheduler-api
env_file: ./scheduler.env
depends_on:
- swh-scheduler-db
ports:
- 5008:5008
swh-scheduler-listener:
image: swh/scheduler-worker
build: ./dockerfiles/swh-scheduler-worker
env_file: ./scheduler.env
command: listener
depends_on:
- swh-scheduler-api
- amqp
swh-scheduler-runner:
image: swh/scheduler-worker
build: ./dockerfiles/swh-scheduler-worker
env_file: ./scheduler.env
- command: runner
+ command: runner -p 10
depends_on:
- swh-scheduler-api
- amqp
# Graph storage
swh-storage-db:
image: postgres:10
environment:
POSTGRES_PASSWORD: testpassword
POSTGRES_DB: swh-storage
swh-storage:
build: ./dockerfiles/swh-storage
image: swh/storage
ports:
- 5002:5002
depends_on:
- swh-storage-db
- swh-objstorage
env_file: ./storage.env
# Object storage
swh-objstorage:
build: ./dockerfiles/swh-objstorage
image: swh/objstorage
ports:
- 5003:5003
# Indexer storage
swh-idx-storage-db:
image: postgres:10
environment:
POSTGRES_PASSWORD: testpassword
POSTGRES_DB: swh-idx-storage
swh-idx-storage:
build: ./dockerfiles/swh-indexer-storage
image: swh/indexer-storage
ports:
- 5007:5007
depends_on:
- swh-idx-storage-db
environment:
POSTGRES_PASSWORD: testpassword
POSTGRES_DB: swh-idx-storage
PGHOST: swh-idx-storage-db
PGUSER: postgres
# Web interface
swh-web:
build: ./dockerfiles/swh-web
image: swh/web
ports:
- 8080:5004
depends_on:
- swh-objstorage
- swh-storage
- swh-idx-storage
swh-deposit-db:
image: postgres:10
env_file: ./deposit.env
swh-deposit:
build: ./dockerfiles/swh-deposit
image: swh/deposit
ports:
- 5006:5006
depends_on:
- swh-deposit-db
- swh-scheduler-api
env_file: ./deposit.env
environment:
PGHOST: swh-deposit-db
# Lister Celery workers
swh-listers-db:
image: postgres:10
environment:
POSTGRES_PASSWORD: testpassword
swh-lister-debian:
image: swh/listers-worker
build: ./dockerfiles/swh-listers-worker
env_file: ./listers.env
environment:
SWH_WORKER_INSTANCE: debian
depends_on:
- swh-listers-db
- swh-scheduler-api
- swh-storage
- amqp
swh-lister-github:
image: swh/listers-worker
build: ./dockerfiles/swh-listers-worker
env_file: ./listers.env
environment:
SWH_WORKER_INSTANCE: github
depends_on:
- swh-listers-db
- swh-scheduler-api
- swh-storage
- amqp
swh-lister-gitlab:
image: swh/listers-worker
build: ./dockerfiles/swh-listers-worker
env_file: ./listers.env
environment:
SWH_WORKER_INSTANCE: gitlab
depends_on:
- swh-listers-db
- swh-scheduler-api
- swh-storage
- amqp
swh-lister-npm:
image: swh/listers-worker
build: ./dockerfiles/swh-listers-worker
env_file: ./listers.env
environment:
SWH_WORKER_INSTANCE: npm
depends_on:
- swh-listers-db
- swh-scheduler-api
- swh-storage
- amqp
swh-lister-pypi:
image: swh/listers-worker
build: ./dockerfiles/swh-listers-worker
env_file: ./listers.env
environment:
SWH_WORKER_INSTANCE: pypi
depends_on:
- swh-listers-db
- swh-scheduler-api
- swh-storage
- amqp
# Indexer Celery workers
swh-indexer-mimetype:
image: swh/indexer-worker
build: ./dockerfiles/swh-indexer-worker
env_file: ./indexers.env
environment:
SWH_WORKER_INSTANCE: content_mimetype
depends_on:
- swh-scheduler-api
- swh-idx-storage
- swh-storage
- swh-objstorage
- amqp
swh-indexer-origin-head:
image: swh/indexer-worker
build: ./dockerfiles/swh-indexer-worker
env_file: ./indexers.env
environment:
SWH_WORKER_INSTANCE: origin_head
depends_on:
- swh-scheduler-api
- swh-idx-storage
- swh-storage
- amqp
swh-indexer-revision-metadata:
image: swh/indexer-worker
build: ./dockerfiles/swh-indexer-worker
env_file: ./indexers.env
environment:
SWH_WORKER_INSTANCE: revision_metadata
depends_on:
- swh-scheduler-api
- swh-idx-storage
- swh-storage
- swh-objstorage
- amqp
swh-indexer-origin-intrinsic-metadata:
image: swh/indexer-worker
build: ./dockerfiles/swh-indexer-worker
env_file: ./indexers.env
environment:
SWH_WORKER_INSTANCE: origin_intrinsic_metadata
depends_on:
- swh-scheduler-api
- swh-idx-storage
- swh-storage
- amqp
# Journal related
swh-storage-listener:
image: swh/storage-listener
build: ./dockerfiles/swh-storage-listener
env_file: ./storage.env
depends_on:
- swh-storage-db
- kafka
swh-journal-publisher:
image: swh/journal-publisher
build: ./dockerfiles/swh-journal-publisher
depends_on:
- kafka
- swh-storage-listener
swh-journal-client:
image: swh/journal-client
build: ./dockerfiles/swh-journal-client
depends_on:
- swh-journal-publisher
+# Loader Celery workers
+
+ swh-loader-debian:
+ image: swh/loaders-worker
+ build: ./dockerfiles/swh-loaders-worker
+ env_file: ./listers.env
+ environment:
+ SWH_WORKER_INSTANCE: debian
+ depends_on:
+ - swh-storage
+ - amqp
+
+ swh-loader-dir:
+ image: swh/loaders-worker
+ build: ./dockerfiles/swh-loaders-worker
+ env_file: ./listers.env
+ environment:
+ SWH_WORKER_INSTANCE: dir
+ depends_on:
+ - swh-storage
+ - amqp
+
+ swh-loader-git:
+ image: swh/loaders-worker
+ build: ./dockerfiles/swh-loaders-worker
+ env_file: ./listers.env
+ environment:
+ SWH_WORKER_INSTANCE: git
+ depends_on:
+ - swh-storage
+ - amqp
+
+ swh-loader-mercurial:
+ image: swh/loaders-worker
+ build: ./dockerfiles/swh-loaders-worker
+ env_file: ./listers.env
+ environment:
+ SWH_WORKER_INSTANCE: mercurial
+ depends_on:
+ - swh-storage
+ - amqp
+
+ swh-loader-pypi:
+ image: swh/loaders-worker
+ build: ./dockerfiles/swh-loaders-worker
+ env_file: ./listers.env
+ environment:
+ SWH_WORKER_INSTANCE: pypi
+ depends_on:
+ - swh-storage
+ - amqp
+
+ swh-loader-svn:
+ image: swh/loaders-worker
+ build: ./dockerfiles/swh-loaders-worker
+ env_file: ./listers.env
+ environment:
+ SWH_WORKER_INSTANCE: svn
+ depends_on:
+ - swh-storage
+ - amqp
+
+ swh-loader-tar:
+ image: swh/loaders-worker
+ build: ./dockerfiles/swh-loaders-worker
+ env_file: ./listers.env
+ environment:
+ SWH_WORKER_INSTANCE: tar
+ depends_on:
+ - swh-storage
+ - amqp
diff --git a/dockerfiles/swh-listers-worker/lister.yml b/dockerfiles/swh-listers-worker/lister.yml
index cc896ac..e4d8619 100644
--- a/dockerfiles/swh-listers-worker/lister.yml
+++ b/dockerfiles/swh-listers-worker/lister.yml
@@ -1,9 +1,9 @@
storage:
cls: remote
args:
url: http://swh-storage:5002/
scheduler:
cls: remote
args:
url: http://swh-scheduler-api:5008/
-lister_db_url: service=swh
+lister_db_url: postgresql:///?service=swh
diff --git a/dockerfiles/swh-loaders-worker/Dockerfile b/dockerfiles/swh-loaders-worker/Dockerfile
new file mode 100644
index 0000000..1c8a450
--- /dev/null
+++ b/dockerfiles/swh-loaders-worker/Dockerfile
@@ -0,0 +1,25 @@
+FROM python:3
+
+RUN export DEBIAN_FRONTEND=noninteractive && \
+ apt-get update && \
+ apt-get install -y \
+ libsystemd-dev libapr1-dev libaprutil1-dev libsvn-dev
+
+RUN pip install \
+ swh-loader-debian \
+ swh-loader-dir \
+ swh-loader-git \
+ swh-loader-mercurial \
+ swh-loader-pypi \
+ swh-loader-svn \
+ swh-loader-tar
+
+
+RUN useradd -ms /bin/bash swh
+COPY entrypoint.sh /
+
+COPY loader.yml /home/swh/.config/swh/
+RUN chown -R swh: /home/swh/.config/
+
+USER swh
+ENTRYPOINT ["/entrypoint.sh"]
diff --git a/dockerfiles/swh-loaders-worker/entrypoint.sh b/dockerfiles/swh-loaders-worker/entrypoint.sh
new file mode 100755
index 0000000..82e80df
--- /dev/null
+++ b/dockerfiles/swh-loaders-worker/entrypoint.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+set -e
+
+if [[ -d /src ]] ; then
+ for srcrepo in /src/swh-* ; do
+ pushd $srcrepo
+ echo "WARNING: $srcrepo will NOT be pip installed in dev mode"
+ echo " due to permission limitations."
+ pip install --user .
+ popd
+ done
+fi
+
+echo Installed Python packages:
+pip list
+
+mkdir -p ~/.config/swh/worker
+
+cat > ~/.config/swh/worker/${SWH_WORKER_INSTANCE}.ini <<EOF
+[main]
+task_broker = amqp://guest@amqp//
+task_modules = swh.loader.${SWH_WORKER_INSTANCE}.tasks
+task_queues = swh_loader_${SWH_WORKER_INSTANCE}
+task_soft_time_limit = 0
+EOF
+
+ln -s ~/.config/swh/loader.yml ~/.config/swh/loader-${SWH_WORKER_INSTANCE}.yml
+
+
+case "$1" in
+ "shell")
+ exec bash -i
+ ;;
+ *)
+ echo Starting the swh-loader Celery worker for ${SWH_WORKER_INSTANCE}
+ exec python -m celery worker \
+ --app=swh.scheduler.celery_backend.config.app \
+ --pool=prefork --events \
+ --concurrency=${CONCURRENCY} \
+ --maxtasksperchild=${MAX_TASKS_PER_CHILD} \
+ -Ofair --loglevel=${LOGLEVEL} --without-gossip \
+ --without-mingle --without-heartbeat \
+ --hostname ${SWH_WORKER_INSTANCE}.${HOSTNAME}
+ ;;
+esac
diff --git a/dockerfiles/swh-listers-worker/lister.yml b/dockerfiles/swh-loaders-worker/loader.yml
similarity index 55%
copy from dockerfiles/swh-listers-worker/lister.yml
copy to dockerfiles/swh-loaders-worker/loader.yml
index cc896ac..f977304 100644
--- a/dockerfiles/swh-listers-worker/lister.yml
+++ b/dockerfiles/swh-loaders-worker/loader.yml
@@ -1,9 +1,5 @@
storage:
cls: remote
args:
url: http://swh-storage:5002/
-scheduler:
- cls: remote
- args:
- url: http://swh-scheduler-api:5008/
lister_db_url: service=swh
diff --git a/dockerfiles/swh-scheduler-worker/entrypoint.sh b/dockerfiles/swh-scheduler-worker/entrypoint.sh
index bb0abf0..ccb7a24 100755
--- a/dockerfiles/swh-scheduler-worker/entrypoint.sh
+++ b/dockerfiles/swh-scheduler-worker/entrypoint.sh
@@ -1,47 +1,35 @@
#!/bin/bash
set -e
if [[ -d /src ]] ; then
for srcrepo in /src/swh-* ; do
pushd $srcrepo
pip install -e .
popd
done
fi
echo Installed Python packages:
pip list
echo "${PGHOST}:5432:${POSTGRES_DB}:${PGUSER}:${POSTGRES_PASSWORD}" > ~/.pgpass
cat > ~/.pg_service.conf <<EOF
[swh-scheduler]
dbname=${POSTGRES_DB}
host=${PGHOST}
port=5432
user=${PGUSER}
EOF
chmod 0600 ~/.pgpass
case "$1" in
"shell")
exec bash -i
;;
- "listener")
- echo Starting the swh-scheduler listener
- exec python -m swh.scheduler.celery_backend.listener
- ;;
- "runner")
- echo Starting the swh-scheduler runner
- exec sh -c 'while true; do
- echo running pending tasks at `/bin/date`;
- python -m swh.scheduler.celery_backend.runner;
- sleep 10;
- done' # beuark
- ;;
*)
- echo "Provide a command (shell|listener|runner)"
- exit 1
+ echo Starting the swh-scheduler $1
+ exec swh-scheduler --log-level ${LOGLEVEL} $@
;;
esac
diff --git a/scheduler.env b/scheduler.env
index ea489c8..941fb5c 100644
--- a/scheduler.env
+++ b/scheduler.env
@@ -1,5 +1,6 @@
POSTGRES_PASSWORD=testpassword
POSTGRES_DB=swh-scheduler
PGHOST=swh-scheduler-db
PGUSER=postgres
SWH_WORKER_INSTANCE=scheduler
+LOGLEVEL=INFO

File Metadata

Mime Type
text/x-diff
Expires
Thu, Jul 3, 10:42 AM (2 w, 5 h ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3268177

Event Timeline