diff --git a/apps/swh-loader-mercurial/Dockerfile b/apps/swh-loader-mercurial/Dockerfile new file mode 100644 --- /dev/null +++ b/apps/swh-loader-mercurial/Dockerfile @@ -0,0 +1,33 @@ +# Deeply inspired from the Dockerfile of the swh-graph project +FROM python:3.10-bullseye + +RUN apt-get -y update && \ + apt-get -y upgrade && \ + apt-get install -y libcmph-dev librdkafka-dev \ + mercurial && \ + apt clean && \ + addgroup --gid 1000 swh && \ + useradd --gid 1000 --uid 1000 -m -d /opt/swh swh && \ + mkdir /etc/swh + +USER swh +WORKDIR /opt/swh + +COPY --chown=swh:swh requirements-frozen.txt /opt/swh +COPY --chown=swh:swh entrypoint.sh /opt/swh + +ENV PYTHONPATH=/opt/swh +ENV PATH=/opt/swh/.local/bin:$PATH + +RUN chmod u+x /opt/swh/entrypoint.sh && \ + /usr/local/bin/python -m pip install --upgrade pip && \ + pip install --no-cache-dir -r requirements-frozen.txt && \ + pip install gunicorn + +ENV SWH_CONFIG_FILENAME=/etc/swh/config.yml +ENV SWH_WORKER_INSTANCE=loader +ENV CONCURRENCY=1 +ENV MAX_TASKS_PER_CHILD=5 +ENV LOGLEVEL=INFO + +ENTRYPOINT "/opt/swh/entrypoint.sh" diff --git a/apps/swh-loader-mercurial/entrypoint.sh b/apps/swh-loader-mercurial/entrypoint.sh new file mode 100644 --- /dev/null +++ b/apps/swh-loader-mercurial/entrypoint.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +set -e + +case "$1" in + "shell") + shift + echo "Running command $@" + exec bash -i "$@" + ;; + *) + echo Starting the swh Celery worker for ${SWH_WORKER_INSTANCE} + exec python -m celery \ + --app=swh.scheduler.celery_backend.config.app \ + worker \ + --pool=prefork --events \ + --concurrency=${CONCURRENCY} \ + --max-tasks-per-child=${MAX_TASKS_PER_CHILD} \ + -Ofair --loglevel=${LOGLEVEL} \ + --without-gossip --without-mingle --without-heartbeat \ + --hostname "${SWH_WORKER_INSTANCE}@%h" + ;; +esac diff --git a/apps/swh-loader-mercurial/requirements.txt b/apps/swh-loader-mercurial/requirements.txt new file mode 100644 --- /dev/null +++ b/apps/swh-loader-mercurial/requirements.txt @@ -0,0 +1,2 @@ +swh-loader-mercurial +