diff --git a/apps/swh-lister/Dockerfile b/apps/swh-lister/Dockerfile new file mode 100644 --- /dev/null +++ b/apps/swh-lister/Dockerfile @@ -0,0 +1,33 @@ +FROM python:3.10-bullseye + +RUN apt-get -y update && \ + apt-get -y upgrade && \ + apt-get install -y libcmph-dev librdkafka-dev \ + opam git r-base-core r-cran-jsonlite && \ + apt clean && \ + addgroup --gid 1000 swh && \ + useradd --gid 1000 --uid 1000 -m -d /opt/swh swh && \ + mkdir /etc/swh + +USER swh +WORKDIR /opt/swh + +COPY --chown=swh:swh requirements-frozen.txt /opt/swh +COPY --chown=swh:swh entrypoint.sh /opt/swh + +ENV PYTHONPATH=/opt/swh +ENV PATH=/opt/swh/.local/bin:$PATH + +RUN chmod u+x /opt/swh/entrypoint.sh && \ + /usr/local/bin/python -m pip install --upgrade pip && \ + pip install --no-cache-dir -r requirements-frozen.txt && \ + pip install gunicorn + +ENV SWH_CONFIG_FILENAME=/etc/swh/config.yml +ENV SWH_WORKER_INSTANCE=lister +ENV CONCURRENCY=1 +ENV MAX_TASKS_PER_CHILD=1 +ENV LOGLEVEL=INFO +ENV SWH_LOG_TARGET=journal + +ENTRYPOINT "/opt/swh/entrypoint.sh" diff --git a/apps/swh-lister/entrypoint.sh b/apps/swh-lister/entrypoint.sh new file mode 100644 --- /dev/null +++ b/apps/swh-lister/entrypoint.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +set -e + +source /srv/softwareheritage/utils/pyutils.sh +setup_pip + +case "$1" in + "shell") + shift + echo "Running command $@" + exec bash -i "$@" + ;; + *) + echo Starting the swh Celery worker for ${SWH_WORKER_INSTANCE} + exec python -m celery \ + --app=swh.scheduler.celery_backend.config.app \ + worker \ + --pool=prefork --events \ + --concurrency=${CONCURRENCY} \ + --maxtasksperchild=${MAX_TASKS_PER_CHILD} \ + -Ofair --loglevel=${LOGLEVEL} \ + --without-gossip --without-mingle --without-heartbeat \ + --hostname "${SWH_WORKER_INSTANCE}@%h" + ;; +esac diff --git a/apps/swh-lister/requirements-frozen.txt b/apps/swh-lister/requirements-frozen.txt new file mode 100644 --- /dev/null +++ b/apps/swh-lister/requirements-frozen.txt @@ -0,0 +1,99 @@ +aiohttp==3.8.1 +aiohttp-utils==3.1.1 +aiosignal==1.2.0 +amqp==5.1.1 +async-timeout==4.0.2 +attrs==22.1.0 +attrs-strict==1.0.0 +beautifulsoup4==4.11.1 +billiard==3.6.4.0 +blinker==1.5 +cassandra-driver==3.25.0 +celery==5.2.7 +certifi==2022.6.15 +cffi==1.15.1 +chardet==5.0.0 +charset-normalizer==2.1.1 +click==8.1.3 +click-didyoumean==0.3.0 +click-plugins==1.1.1 +click-repl==0.2.0 +confluent-kafka==1.9.2 +cryptography==37.0.4 +Deprecated==1.2.13 +distro==1.7.0 +exceptiongroup==1.0.0rc9 +Flask==2.2.2 +frozenlist==1.3.1 +geomet==0.2.1.post1 +gunicorn==20.1.0 +httplib2==0.20.4 +humanize==4.3.0 +hypothesis==6.54.5 +idna==3.3 +importlib-metadata==4.12.0 +iniconfig==1.1.1 +iso8601==1.0.2 +itsdangerous==2.1.2 +jaraco.classes==3.2.2 +jeepney==0.8.0 +Jinja2==3.1.2 +keyring==23.9.1 +kombu==5.2.4 +launchpadlib==1.10.16 +lazr.restfulclient==0.14.4 +lazr.uri==1.0.6 +lxml==4.9.1 +MarkupSafe==2.1.1 +mirakuru==2.4.2 +more-itertools==8.14.0 +msgpack==1.0.4 +multidict==6.0.2 +mypy-extensions==0.4.3 +oauthlib==3.2.0 +packaging==21.3 +pika==1.3.0 +pluggy==1.0.0 +port-for==0.6.2 +prompt-toolkit==3.0.31 +psutil==5.9.2 +psycopg2==2.9.3 +py==1.11.0 +pycparser==2.21 +pyparsing==3.0.9 +pytest==7.1.3 +pytest-postgresql==3.1.3 +python-dateutil==2.8.2 +python-debian==0.1.47 +python-magic==0.4.27 +python-mimeparse==1.6.0 +pytz==2022.2.1 +PyYAML==6.0 +redis==4.3.4 +requests==2.28.1 +SecretStorage==3.3.3 +sentry-sdk==1.9.8 +six==1.16.0 +sortedcontainers==2.4.0 +soupsieve==2.3.2.post1 +swh.core==2.14.0 +swh.counters==0.9.1 +swh.journal==1.1.0 +swh.lister==2.9.3 +swh.model==6.4.1 +swh.objstorage==2.0.1 +swh.perfecthash==0.1.2 +swh.scheduler==1.2.1 +swh.storage==1.6.0 +tenacity==8.0.1 +tomli==2.0.1 +typing_extensions==4.3.0 +urllib3==1.26.12 +vine==5.0.0 +wadllib==1.3.6 +wcwidth==0.2.5 +Werkzeug==2.2.2 +wrapt==1.14.1 +xmltodict==0.13.0 +yarl==1.8.1 +zipp==3.8.1 diff --git a/apps/swh-lister/requirements.txt b/apps/swh-lister/requirements.txt new file mode 100644 --- /dev/null +++ b/apps/swh-lister/requirements.txt @@ -0,0 +1,2 @@ +swh-lister +