Page MenuHomeSoftware Heritage

D837.id2681.diff
No OneTemporary

D837.id2681.diff

diff --git a/docker-compose.yml b/docker-compose.yml
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -104,3 +104,70 @@
- swh-objstorage
- swh-storage
- swh-idx-storage
+
+# Lister Celery workers
+
+ swh-listers-db:
+ image: postgres:10
+ environment:
+ POSTGRES_PASSWORD: testpassword
+
+ swh-lister-debian:
+ image: swh/listers-worker
+ build: ./dockerfiles/swh-listers-worker
+ env_file: ./listers.env
+ environment:
+ SWH_WORKER_INSTANCE: debian
+ depends_on:
+ - swh-listers-db
+ - swh-scheduler-api
+ - swh-storage
+ - amqp
+
+ swh-lister-github:
+ image: swh/listers-worker
+ build: ./dockerfiles/swh-listers-worker
+ env_file: ./listers.env
+ environment:
+ SWH_WORKER_INSTANCE: github
+ depends_on:
+ - swh-listers-db
+ - swh-scheduler-api
+ - swh-storage
+ - amqp
+
+ swh-lister-gitlab:
+ image: swh/listers-worker
+ build: ./dockerfiles/swh-listers-worker
+ env_file: ./listers.env
+ environment:
+ SWH_WORKER_INSTANCE: gitlab
+ depends_on:
+ - swh-listers-db
+ - swh-scheduler-api
+ - swh-storage
+ - amqp
+
+ swh-lister-npm:
+ image: swh/listers-worker
+ build: ./dockerfiles/swh-listers-worker
+ env_file: ./listers.env
+ environment:
+ SWH_WORKER_INSTANCE: npm
+ depends_on:
+ - swh-listers-db
+ - swh-scheduler-api
+ - swh-storage
+ - amqp
+
+ swh-lister-pypi:
+ image: swh/listers-worker
+ build: ./dockerfiles/swh-listers-worker
+ env_file: ./listers.env
+ environment:
+ SWH_WORKER_INSTANCE: pypi
+ depends_on:
+ - swh-listers-db
+ - swh-scheduler-api
+ - swh-storage
+ - amqp
diff --git a/dockerfiles/swh-listers-worker/Dockerfile b/dockerfiles/swh-listers-worker/Dockerfile
new file mode 100644
--- /dev/null
+++ b/dockerfiles/swh-listers-worker/Dockerfile
@@ -0,0 +1,16 @@
+FROM python:3
+
+RUN export DEBIAN_FRONTEND=noninteractive && \
+ apt-get update && \
+ apt-get install -y \
+ libsystemd-dev postgresql-client
+
+RUN pip install swh-lister
+RUN useradd -ms /bin/bash swh
+COPY entrypoint.sh /
+
+COPY lister.yml /home/swh/.config/swh/
+RUN chown -R swh: /home/swh/.config/
+
+USER swh
+ENTRYPOINT ["/entrypoint.sh"]
diff --git a/dockerfiles/swh-listers-worker/entrypoint.sh b/dockerfiles/swh-listers-worker/entrypoint.sh
new file mode 100755
--- /dev/null
+++ b/dockerfiles/swh-listers-worker/entrypoint.sh
@@ -0,0 +1,69 @@
+#!/bin/bash
+
+set -e
+
+if [[ -d /src ]] ; then
+ for srcrepo in /src/swh-* ; do
+ pushd $srcrepo
+ echo "WARNING: $srcrepo wil NOT be pip installed in dev mode"
+ echo " due to permission limitations."
+ pip install --user .
+ popd
+ done
+fi
+
+export POSTGRES_DB=swh-lister-${SWH_WORKER_INSTANCE}
+
+echo "${PGHOST}:5432:postgres:${PGUSER}:${POSTGRES_PASSWORD}" > ~/.pgpass
+echo "${PGHOST}:5432:${POSTGRES_DB}:${PGUSER}:${POSTGRES_PASSWORD}" >> ~/.pgpass
+cat > ~/.pg_service.conf <<EOF
+[swh]
+dbname=${POSTGRES_DB}
+host=${PGHOST}
+port=5432
+user=${PGUSER}
+EOF
+
+chmod 0400 ~/.pgpass
+
+mkdir -p ~/.config/swh/worker
+
+cat > ~/.config/swh/worker/${SWH_WORKER_INSTANCE}.ini <<EOF
+[main]
+task_broker = amqp://guest@amqp//
+task_modules = swh.lister.${SWH_WORKER_INSTANCE}.tasks
+task_queues = swh_lister_${SWH_WORKER_INSTANCE}
+task_soft_time_limit = 0
+EOF
+
+ln -s ~/.config/swh/lister.yml ~/.config/swh/lister-${SWH_WORKER_INSTANCE}.yml
+
+
+case "$1" in
+ "shell")
+ exec bash -i
+ ;;
+ *)
+ echo Setup ${POSTGRES_DB} database for ${SWH_WORKER_INSTANCE}
+
+ if psql -lqt | cut -d \| -f 1 | grep -qw ${POSTGRES_DB}; then
+ echo Database already exists, nothing to do
+ else
+ echo Creating database
+ createdb ${POSTGRES_DB}
+ echo Initialize database
+ python -m swh.lister.cli --create-tables --with-data \
+ --db-url postgres://${PGUSER}@${PGHOST}/${POSTGRES_DB} \
+ --lister ${SWH_WORKER_INSTANCE}
+ fi
+ echo Starting the swh-lister Celery worker for ${SWH_WORKER_INSTANCE}
+ exec python -m celery worker \
+ --app=swh.scheduler.celery_backend.config.app \
+ --pool=prefork --events \
+ --concurrency=${CONCURRENCY} \
+ --maxtasksperchild=${MAX_TASKS_PER_CHILD} \
+ -Ofair --loglevel=${LOGLEVEL} --without-gossip \
+ --without-mingle --without-heartbeat \
+ --hostname ${SWH_WORKER_INSTANCE}.${HOSTNAME}
+ ;;
+esac
diff --git a/dockerfiles/swh-listers-worker/lister.yml b/dockerfiles/swh-listers-worker/lister.yml
new file mode 100644
--- /dev/null
+++ b/dockerfiles/swh-listers-worker/lister.yml
@@ -0,0 +1,9 @@
+storage:
+ cls: remote
+ args:
+ url: http://swh-storage:5002/
+scheduler:
+ cls: remote
+ args:
+ url: http://swh-scheduler-api:5008/
+lister_db_url: service=swh
diff --git a/listers.env b/listers.env
new file mode 100644
--- /dev/null
+++ b/listers.env
@@ -0,0 +1,6 @@
+CONCURRENCY=1
+MAX_TASKS_PER_CHILD=10
+LOGLEVEL=DEBUG
+POSTGRES_PASSWORD=testpassword
+PGHOST=swh-listers-db
+PGUSER=postgres

File Metadata

Mime Type
text/plain
Expires
Thu, Jul 3, 3:26 PM (1 w, 1 d ago)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
3219721

Event Timeline