diff --git a/apps/swh-loader-git/Dockerfile b/apps/swh-loader-git/Dockerfile new file mode 100644 index 0000000..3bd6bd7 --- /dev/null +++ b/apps/swh-loader-git/Dockerfile @@ -0,0 +1,32 @@ +# Deeply inspired from the Dockerfile of the swh-graph project +FROM python:3.10-bullseye + +RUN apt-get -y update && \ + apt-get -y upgrade && \ + apt-get install -y libcmph-dev librdkafka-dev && \ + apt clean && \ + addgroup --gid 1000 swh && \ + useradd --gid 1000 --uid 1000 -m -d /opt/swh swh && \ + mkdir /etc/swh + +USER swh +WORKDIR /opt/swh + +COPY --chown=swh:swh requirements-frozen.txt /opt/swh +COPY --chown=swh:swh entrypoint.sh /opt/swh + +ENV PYTHONPATH=/opt/swh +ENV PATH=/opt/swh/.local/bin:$PATH + +RUN chmod u+x /opt/swh/entrypoint.sh && \ + /usr/local/bin/python -m pip install --upgrade pip && \ + pip install --no-cache-dir -r requirements-frozen.txt && \ + pip install gunicorn + +ENV SWH_CONFIG_FILENAME=/etc/swh/config.yml +ENV SWH_WORKER_INSTANCE=loader +ENV CONCURRENCY=1 +ENV MAX_TASKS_PER_CHILD=1 +ENV LOGLEVEL=INFO + +ENTRYPOINT "/opt/swh/entrypoint.sh" diff --git a/apps/swh-loader-git/entrypoint.sh b/apps/swh-loader-git/entrypoint.sh new file mode 100644 index 0000000..7b9542b --- /dev/null +++ b/apps/swh-loader-git/entrypoint.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +set -e + +source /srv/softwareheritage/utils/pyutils.sh +setup_pip + +case "$1" in + "shell") + shift + echo "Running command $@" + exec bash -i "$@" + ;; + *) + echo Starting the swh Celery worker for ${SWH_WORKER_INSTANCE} + exec python -m celery \ + --app=swh.scheduler.celery_backend.config.app \ + worker \ + --pool=prefork --events \ + --concurrency=${CONCURRENCY} \ + --max-tasks-per-child=${MAX_TASKS_PER_CHILD} \ + -Ofair --loglevel=${LOGLEVEL} \ + --hostname "${SWH_WORKER_INSTANCE}@%h" + ;; +esac diff --git a/apps/swh-loader-git/requirements-frozen.txt b/apps/swh-loader-git/requirements-frozen.txt new file mode 100644 index 0000000..782803c --- /dev/null +++ b/apps/swh-loader-git/requirements-frozen.txt @@ -0,0 +1,87 @@ +aiohttp==3.8.1 +aiohttp-utils==3.1.1 +aiosignal==1.2.0 +amqp==5.1.1 +async-timeout==4.0.2 +attrs==22.1.0 +attrs-strict==1.0.0 +billiard==3.6.4.0 +blinker==1.5 +cassandra-driver==3.25.0 +celery==5.2.7 +certifi==2022.6.15 +cffi==1.15.1 +chardet==5.0.0 +charset-normalizer==2.1.1 +click==8.1.3 +click-didyoumean==0.3.0 +click-plugins==1.1.1 +click-repl==0.2.0 +confluent-kafka==1.9.2 +Deprecated==1.2.13 +dulwich==0.20.45 +exceptiongroup==1.0.0rc8 +Flask==2.2.2 +frozenlist==1.3.1 +geomet==0.2.1.post1 +gunicorn==20.1.0 +humanize==4.3.0 +hypothesis==6.54.4 +idna==3.3 +importlib-metadata==4.12.0 +iniconfig==1.1.1 +iso8601==1.0.2 +itsdangerous==2.1.2 +Jinja2==3.1.2 +kombu==5.2.4 +MarkupSafe==2.1.1 +mirakuru==2.4.2 +msgpack==1.0.4 +multidict==6.0.2 +mypy-extensions==0.4.3 +packaging==21.3 +pika==1.3.0 +pkginfo==1.8.3 +pluggy==1.0.0 +port-for==0.6.2 +prompt-toolkit==3.0.30 +psutil==5.9.1 +psycopg2==2.9.3 +py==1.11.0 +pycparser==2.21 +pyparsing==3.0.9 +pytest==7.1.2 +pytest-postgresql==3.1.3 +python-dateutil==2.8.2 +python-debian==0.1.44 +python-magic==0.4.27 +python-mimeparse==1.6.0 +pytz==2022.2.1 +PyYAML==6.0 +redis==4.3.4 +requests==2.28.1 +retrying==1.3.3 +sentry-sdk==1.9.5 +six==1.16.0 +sortedcontainers==2.4.0 +swh.core==2.14.0 +swh.counters==0.9.1 +swh.journal==1.1.0 +swh.loader.core==3.5.0 +swh.loader.git==1.9.1 +swh.model==6.4.0 +swh.objstorage==2.0.1 +swh.perfecthash==0.1.2 +swh.scheduler==1.2.1 +swh.storage==1.6.0 +tenacity==8.0.1 +toml==0.10.2 +tomli==2.0.1 +typing_extensions==4.3.0 +urllib3==1.26.12 +vine==5.0.0 +wcwidth==0.2.5 +Werkzeug==2.2.2 +wrapt==1.14.1 +yarl==1.8.1 +zipp==3.8.1 diff --git a/apps/swh-loader-git/requirements.txt b/apps/swh-loader-git/requirements.txt new file mode 100644 index 0000000..ff835ca --- /dev/null +++ b/apps/swh-loader-git/requirements.txt @@ -0,0 +1 @@ +swh-loader-git