diff --git a/Dockerfile b/Dockerfile --- a/Dockerfile +++ b/Dockerfile @@ -14,29 +14,31 @@ apt-get clean && \ rm -rf /var/lib/apt/lists/* -RUN pip install --upgrade pip setuptools wheel -RUN pip install swh-deposit swh-indexer swh-journal swh-lister swh-loader-debian \ - swh-loader-dir swh-loader-git swh-loader-mercurial swh-loader-pypi \ - swh-loader-svn swh-loader-tar swh-storage swh-objstorage \ - swh-scheduler swh-vault swh-web +RUN useradd -md /srv/softwareheritage -s /bin/bash swh +USER swh +RUN python3 -m venv /srv/softwareheritage/venv +ENV PATH="/srv/softwareheritage/venv/bin:${PATH}" +RUN pip install --upgrade pip setuptools wheel RUN pip install gunicorn -COPY services/swh-deposit/entrypoint.sh /swh-deposit/entrypoint.sh -COPY services/swh-indexer-journal-client/entrypoint.sh /swh-indexer-journal-client/entrypoint.sh -COPY services/swh-indexer-storage/entrypoint.sh /swh-indexer-storage/entrypoint.sh -COPY services/swh-indexer-worker/entrypoint.sh /swh-indexer-worker/entrypoint.sh -COPY services/swh-listers-worker/entrypoint.sh /swh-listers-worker/entrypoint.sh -COPY services/swh-loaders-worker/entrypoint.sh /swh-loaders-worker/entrypoint.sh -COPY services/swh-objstorage/entrypoint.sh /swh-objstorage/entrypoint.sh -COPY services/swh-scheduler-api/entrypoint.sh /swh-scheduler-api/entrypoint.sh -COPY services/swh-scheduler-worker/entrypoint.sh /swh-scheduler-worker/entrypoint.sh -COPY services/swh-storage/entrypoint.sh /swh-storage/entrypoint.sh -COPY services/swh-vault/entrypoint.sh /swh-vault/entrypoint.sh -COPY services/swh-web/entrypoint.sh /swh-web/entrypoint.sh - -COPY utils/pgsql.sh /swh-utils/pgsql.sh - -RUN useradd -ms /bin/bash swh +RUN pip install \ + swh-deposit \ + swh-indexer \ + swh-journal \ + swh-lister \ + swh-loader-debian \ + swh-loader-dir \ + swh-loader-git \ + swh-loader-mercurial \ + swh-loader-pypi \ + swh-loader-svn \ + swh-loader-tar \ + swh-storage \ + swh-objstorage \ + swh-scheduler \ + swh-vault \ + swh-web +COPY utils/*.sh /srv/softwareheritage/utils/ RUN mkdir -p /srv/softwareheritage/objects diff --git a/docker-compose.storage-replica.yml b/docker-compose.storage-replica.yml --- a/docker-compose.storage-replica.yml +++ b/docker-compose.storage-replica.yml @@ -1,15 +1,12 @@ version: '2' services: - # override web app to use the replica swh-web: environment: SWH_CONFIG_FILENAME: /web-replica.yml volumes: - "./conf/web-replica.yml:/web-replica.yml:ro" - depends_on: - - swh-storage-replica # create a dedicated db for the replica swh-storage-replica-db: @@ -22,8 +19,8 @@ # and an RPC server swh-storage-replica: - build: ./ image: swh/stack + build: ./ depends_on: - swh-storage-replica-db - swh-objstorage @@ -31,6 +28,7 @@ - ./env/storage-db-replica.env environment: SWH_CONFIG_FILENAME: /storage-replica.yml + entrypoint: /entrypoint.sh volumes: - "./conf/storage-replica.yml:/storage-replica.yml:ro" - "./services/swh-storage/entrypoint.sh:/entrypoint.sh:ro" @@ -38,8 +36,8 @@ # and the background process that keeps the replica in sync with the # main graph swh-storage-replica-replayer: - build: ./ image: swh/stack + build: ./ depends_on: - swh-storage-replica-db - swh-objstorage @@ -47,6 +45,7 @@ - ./env/storage-db-replica.env environment: SWH_CONFIG_FILENAME: /storage-replica.yml + entrypoint: /entrypoint.sh volumes: - "./conf/storage-replica.yml:/storage-replica.yml:ro" - "./services/swh-storage-replayer/entrypoint.sh:/entrypoint.sh:ro" diff --git a/docker-compose.yml b/docker-compose.yml --- a/docker-compose.yml +++ b/docker-compose.yml @@ -85,50 +85,53 @@ env_file: - ./env/scheduler-db.env - ./env/scheduler.env - entrypoint: /swh-scheduler-api/entrypoint.sh environment: SWH_CONFIG_FILENAME: /scheduler.yml SWH_SCHEDULER_CONFIG_FILE: /scheduler.yml + entrypoint: /entrypoint.sh depends_on: - swh-scheduler-db ports: - 5008:5008 volumes: - "./conf/scheduler.yml:/scheduler.yml:ro" + - "./services/swh-scheduler-api/entrypoint.sh:/entrypoint.sh:ro" swh-scheduler-listener: image: swh/stack build: ./ - entrypoint: /swh-scheduler-worker/entrypoint.sh env_file: - ./env/scheduler-db.env - ./env/scheduler.env environment: SWH_CONFIG_FILENAME: /scheduler.yml SWH_SCHEDULER_CONFIG_FILE: /scheduler.yml + entrypoint: /entrypoint.sh command: listener depends_on: - swh-scheduler-api - amqp volumes: - "./conf/scheduler.yml:/scheduler.yml:ro" + - "./services/swh-scheduler-worker/entrypoint.sh:/entrypoint.sh:ro" swh-scheduler-runner: image: swh/stack build: ./ - entrypoint: /swh-scheduler-worker/entrypoint.sh env_file: - ./env/scheduler-db.env - ./env/scheduler.env environment: SWH_CONFIG_FILENAME: /scheduler.yml SWH_SCHEDULER_CONFIG_FILE: /scheduler.yml + entrypoint: /entrypoint.sh command: runner -p 10 depends_on: - swh-scheduler-api - amqp volumes: - "./conf/scheduler.yml:/scheduler.yml:ro" + - "./services/swh-scheduler-worker/entrypoint.sh:/entrypoint.sh:ro" # Graph storage @@ -141,9 +144,8 @@ PGHOST: swh-storage: - build: ./ image: swh/stack - entrypoint: /swh-storage/entrypoint.sh + build: ./ ports: - 5002:5002 depends_on: @@ -154,21 +156,24 @@ - ./env/storage-db.env environment: SWH_CONFIG_FILENAME: /storage.yml + entrypoint: /entrypoint.sh volumes: - "./conf/storage.yml:/storage.yml:ro" + - "./services/swh-storage/entrypoint.sh:/entrypoint.sh:ro" # Object storage swh-objstorage: build: ./ image: swh/stack - entrypoint: /swh-objstorage/entrypoint.sh ports: - 5003:5003 environment: SWH_CONFIG_FILENAME: /objstorage.yml + entrypoint: /entrypoint.sh volumes: - "./conf/objstorage.yml:/objstorage.yml:ro" + - "./services/swh-objstorage/entrypoint.sh:/entrypoint.sh:ro" # Indexer storage @@ -181,9 +186,8 @@ PGHOST: swh-idx-storage: - build: ./ image: swh/stack - entrypoint: /swh-indexer-storage/entrypoint.sh + build: ./ ports: - 5007:5007 depends_on: @@ -192,15 +196,16 @@ - ./env/indexers-db.env environment: SWH_CONFIG_FILENAME: /indexer_storage.yml + entrypoint: /entrypoint.sh volumes: - "./conf/indexer_storage.yml:/indexer_storage.yml:ro" + - "./services/swh-indexer-storage/entrypoint.sh:/entrypoint.sh:ro" # Web interface swh-web: build: ./ image: swh/stack - entrypoint: /swh-web/entrypoint.sh ports: - 5004:5004 depends_on: @@ -215,6 +220,7 @@ entrypoint: /entrypoint.sh volumes: - "./conf/web.yml:/web.yml:ro" + - "./services/swh-web/entrypoint.sh:/entrypoint.sh:ro" - "./services/swh-web/settings.py:/srv/softwareheritage/localsettings.py:ro" swh-deposit-db: @@ -226,9 +232,8 @@ PGHOST: swh-deposit: - build: ./ - entrypoint: /swh-deposit/entrypoint.sh image: swh/stack + build: ./ ports: - 5006:5006 depends_on: @@ -237,8 +242,10 @@ env_file: - ./env/deposit-db.env - ./env/deposit.env + entrypoint: /entrypoint.sh volumes: - "./conf/deposit.yml:/deposit.yml:ro" + - "./services/swh-deposit/entrypoint.sh:/entrypoint.sh:ro" swh-vault-db: image: postgres:11 @@ -249,9 +256,8 @@ PGHOST: swh-vault-api: - build: ./ image: swh/stack - entrypoint: /swh-vault/entrypoint.sh + build: ./ env_file: - ./env/vault-db.env environment: @@ -264,21 +270,24 @@ - swh-objstorage - swh-storage - swh-scheduler-api + entrypoint: /entrypoint.sh volumes: - "./conf/vault-api.yml:/vault-api.yml:ro" + - "./services/swh-vault/entrypoint.sh:/entrypoint.sh:ro" swh-vault-worker: - build: ./ image: swh/stack - entrypoint: /swh-vault/entrypoint.sh + build: ./ command: worker environment: SWH_CONFIG_FILENAME: /cooker.yml depends_on: - swh-vault-api - swh-storage + entrypoint: /entrypoint.sh volumes: - "./conf/vault-worker.yml:/cooker.yml:ro" + - "./services/swh-vault/entrypoint.sh:/entrypoint.sh:ro" # Lister Celery workers @@ -294,7 +303,6 @@ swh-lister: image: swh/stack build: ./ - entrypoint: /swh-listers-worker/entrypoint.sh env_file: - ./env/listers-db.env - ./env/listers.env @@ -310,15 +318,16 @@ - swh-scheduler-runner - swh-storage - amqp + entrypoint: /entrypoint.sh volumes: - "./conf/lister.yml:/lister.yml:ro" + - "./services/swh-listers-worker/entrypoint.sh:/entrypoint.sh:ro" # Loader Celery workers swh-loader: image: swh/stack build: ./ - entrypoint: /swh-loaders-worker/entrypoint.sh env_file: - ./env/listers.env user: swh @@ -327,11 +336,13 @@ STATSD_PORT: 9125 SWH_WORKER_INSTANCE: loader SWH_CONFIG_FILENAME: /loader.yml + entrypoint: /entrypoint.sh depends_on: - swh-storage - amqp volumes: - "./conf/loader.yml:/loader.yml:ro" + - "./services/swh-loaders-worker/entrypoint.sh:/entrypoint.sh:ro" # Indexer Celery workers @@ -339,13 +350,13 @@ image: swh/stack build: ./ user: swh - entrypoint: /swh-indexer-worker/entrypoint.sh env_file: - ./env/indexers-db.env - ./env/indexers.env environment: STATSD_HOST: prometheus-statsd-exporter STATSD_PORT: 9125 + entrypoint: /entrypoint.sh depends_on: - swh-scheduler-runner - swh-idx-storage @@ -354,16 +365,18 @@ - amqp volumes: - "./conf/indexer.yml:/indexer.yml:ro" + - "./services/swh-indexer-worker/entrypoint.sh:/entrypoint.sh:ro" # Journal related swh-indexer-journal-client: image: swh/stack build: ./ - entrypoint: /swh-indexer-journal-client/entrypoint.sh + entrypoint: /entrypoint.sh depends_on: - kafka - swh-storage - swh-scheduler-api volumes: - "./conf/indexer_journal_client.yml:/etc/softwareheritage/indexer/journal_client.yml:ro" + - "./services/swh-indexer-journal-client/entrypoint.sh:/entrypoint.sh:ro" diff --git a/services/swh-deposit/entrypoint.sh b/services/swh-deposit/entrypoint.sh --- a/services/swh-deposit/entrypoint.sh +++ b/services/swh-deposit/entrypoint.sh @@ -2,24 +2,16 @@ set -ex -if [[ -d /src ]] ; then - pwd - for src_repo in /src/swh-* ; do - pushd $src_repo - echo "Installing ${src_repo}" - pip install -e . - popd - done -fi - -source /swh-utils/pgsql.sh +source /srv/softwareheritage/utils/pyutils.sh +setup_pip +source /srv/softwareheritage/utils/pgsql.sh setup_pgsql if [ "$1" = 'shell' ] ; then exec bash -i else - + wait_pgsql echo "Migrating db" diff --git a/services/swh-indexer-storage/entrypoint.sh b/services/swh-indexer-storage/entrypoint.sh --- a/services/swh-indexer-storage/entrypoint.sh +++ b/services/swh-indexer-storage/entrypoint.sh @@ -1,23 +1,11 @@ #!/bin/bash set -e -export PATH=${HOME}/.local/bin:${PATH} -if [[ -d /src ]] ; then - for srcrepo in /src/swh-* ; do - pushd $srcrepo - echo "WARNING: $srcrepo will NOT be pip installed in dev mode" - echo " due to permission limitations." - pip install --user . - popd - done -fi - -echo Installed Python packages: -pip list - -source /swh-utils/pgsql.sh +source /srv/softwareheritage/utils/pyutils.sh +setup_pip +source /srv/softwareheritage/utils/pgsql.sh setup_pgsql case "$1" in diff --git a/services/swh-indexer-worker/entrypoint.sh b/services/swh-indexer-worker/entrypoint.sh --- a/services/swh-indexer-worker/entrypoint.sh +++ b/services/swh-indexer-worker/entrypoint.sh @@ -1,21 +1,11 @@ #!/bin/bash set -e -export PATH=${HOME}/.local/bin:${PATH} -if [[ -d /src ]] ; then - for srcrepo in /src/swh-* ; do - pushd $srcrepo - echo "WARNING: $srcrepo will NOT be pip installed in dev mode" - echo " due to permission limitations." - pip install --user . - popd - done -fi - - -source /swh-utils/pgsql.sh +source /srv/softwareheritage/utils/pyutils.sh +setup_pip +source /srv/softwareheritage/utils/pgsql.sh setup_pgsql case "$1" in diff --git a/services/swh-listers-worker/entrypoint.sh b/services/swh-listers-worker/entrypoint.sh --- a/services/swh-listers-worker/entrypoint.sh +++ b/services/swh-listers-worker/entrypoint.sh @@ -1,23 +1,11 @@ #!/bin/bash set -e -export PATH=${HOME}/.local/bin:${PATH} -if [[ -d /src ]] ; then - for srcrepo in /src/swh-* ; do - pushd $srcrepo - echo "WARNING: $srcrepo wil NOT be pip installed in dev mode" - echo " due to permission limitations." - pip install --user . - popd - done -fi - -echo Installed Python packages: -pip list - -source /swh-utils/pgsql.sh +source /srv/softwareheritage/utils/pyutils.sh +setup_pip +source /srv/softwareheritage/utils/pgsql.sh setup_pgsql case "$1" in @@ -34,12 +22,13 @@ echo Creating database createdb ${POSTGRES_DB} - echo Initialize database - python -m swh.lister.cli \ - --db-url postgres://${PGUSER}@${PGHOST}/${POSTGRES_DB} \ - all fi + echo Initialize database + python -m swh.lister.cli \ + --db-url postgres://${PGUSER}@${PGHOST}/${POSTGRES_DB} \ + all + echo Waiting for RabbitMQ to start wait-for-it amqp:5672 -s --timeout=0 diff --git a/services/swh-loaders-worker/entrypoint.sh b/services/swh-loaders-worker/entrypoint.sh --- a/services/swh-loaders-worker/entrypoint.sh +++ b/services/swh-loaders-worker/entrypoint.sh @@ -1,21 +1,9 @@ #!/bin/bash set -e -export PATH=${HOME}/.local/bin:${PATH} - -if [[ -d /src ]] ; then - for srcrepo in /src/swh-* ; do - pushd $srcrepo - echo "WARNING: $srcrepo will NOT be pip installed in dev mode" - echo " due to permission limitations." - pip install --user . - popd - done -fi - -echo Installed Python packages: -pip list +source /srv/softwareheritage/utils/pyutils.sh +setup_pip case "$1" in "shell") diff --git a/services/swh-objstorage/entrypoint.sh b/services/swh-objstorage/entrypoint.sh --- a/services/swh-objstorage/entrypoint.sh +++ b/services/swh-objstorage/entrypoint.sh @@ -2,16 +2,8 @@ set -e -echo Step 1 -if [[ -d /src ]] ; then - echo Yes - for srcrepo in /src/swh-* ; do - echo installing $srcrepo - pushd $srcrepo - pip install -e . - popd - done -fi +source /srv/softwareheritage/utils/pyutils.sh +setup_pip echo Installed Python packages: pip list diff --git a/services/swh-scheduler-api/entrypoint.sh b/services/swh-scheduler-api/entrypoint.sh --- a/services/swh-scheduler-api/entrypoint.sh +++ b/services/swh-scheduler-api/entrypoint.sh @@ -2,19 +2,10 @@ set -e -if [[ -d /src ]] ; then - for srcrepo in /src/swh-* ; do - pushd $srcrepo - pip install -e . - popd - done -fi - -echo Installed Python packages: -pip list - -source /swh-utils/pgsql.sh +source /srv/softwareheritage/utils/pyutils.sh +setup_pip +source /srv/softwareheritage/utils/pgsql.sh setup_pgsql case "$1" in diff --git a/services/swh-scheduler-worker/entrypoint.sh b/services/swh-scheduler-worker/entrypoint.sh --- a/services/swh-scheduler-worker/entrypoint.sh +++ b/services/swh-scheduler-worker/entrypoint.sh @@ -2,19 +2,10 @@ set -e -if [[ -d /src ]] ; then - for srcrepo in /src/swh-* ; do - pushd $srcrepo - pip install -e . - popd - done -fi - -echo Installed Python packages: -pip list - -source /swh-utils/pgsql.sh +source /srv/softwareheritage/utils/pyutils.sh +setup_pip +source /srv/softwareheritage/utils/pgsql.sh setup_pgsql case "$1" in diff --git a/services/swh-storage-listener/entrypoint.sh b/services/swh-storage-listener/entrypoint.sh --- a/services/swh-storage-listener/entrypoint.sh +++ b/services/swh-storage-listener/entrypoint.sh @@ -2,19 +2,10 @@ set -e -if [[ -d /src ]] ; then - for srcrepo in /src/swh-* ; do - pushd $srcrepo - pip install -e . - popd - done -fi - -echo Installed Python packages: -pip list - -source /swh-utils/pgsql.sh +source /srv/softwareheritage/utils/pyutils.sh +setup_pip +source /srv/softwareheritage/utils/pgsql.sh setup_pgsql case "$1" in diff --git a/services/swh-storage-replayer/entrypoint.sh b/services/swh-storage-replayer/entrypoint.sh --- a/services/swh-storage-replayer/entrypoint.sh +++ b/services/swh-storage-replayer/entrypoint.sh @@ -2,19 +2,10 @@ set -e -if [[ -d /src ]] ; then - for srcrepo in /src/swh-* ; do - pushd $srcrepo - pip install -e . - popd - done -fi - -echo Installed Python packages: -pip list - -source /swh-utils/pgsql.sh +source /srv/softwareheritage/utils/pyutils.sh +setup_pip +source /srv/softwareheritage/utils/pgsql.sh setup_pgsql case "$1" in @@ -29,7 +20,9 @@ --db-name ${POSTGRES_DB} echo Starting the swh-storage Kafka storage replayer - exec swh-journal replay --broker kafka --prefix swh.journal.objects \ + exec swh-journal replay \ + --broker kafka \ + --prefix swh.journal.objects \ --consumer-id swh.storage.replica ;; esac diff --git a/services/swh-storage/entrypoint.sh b/services/swh-storage/entrypoint.sh --- a/services/swh-storage/entrypoint.sh +++ b/services/swh-storage/entrypoint.sh @@ -2,19 +2,10 @@ set -e -if [[ -d /src ]] ; then - for srcrepo in /src/swh-* ; do - pushd $srcrepo - pip install -e . - popd - done -fi - -echo Installed Python packages: -pip list - -source /swh-utils/pgsql.sh +source /srv/softwareheritage/utils/pyutils.sh +setup_pip +source /srv/softwareheritage/utils/pgsql.sh setup_pgsql case "$1" in diff --git a/services/swh-vault/entrypoint.sh b/services/swh-vault/entrypoint.sh --- a/services/swh-vault/entrypoint.sh +++ b/services/swh-vault/entrypoint.sh @@ -2,19 +2,10 @@ set -e -if [[ -d /src ]] ; then - for srcrepo in /src/swh-* ; do - pushd $srcrepo - pip install -e . - popd - done -fi - -echo Installed Python packages: -pip list - -source /swh-utils/pgsql.sh +source /srv/softwareheritage/utils/pyutils.sh +setup_pip +source /srv/softwareheritage/utils/pgsql.sh setup_pgsql case "$1" in diff --git a/services/swh-web/entrypoint.sh b/services/swh-web/entrypoint.sh --- a/services/swh-web/entrypoint.sh +++ b/services/swh-web/entrypoint.sh @@ -2,28 +2,19 @@ set -e -if [[ -d /src ]] ; then - for srcrepo in /src/swh-* ; do - pushd $srcrepo - echo installing $srcrepo - pip install -e . - popd - done -fi - -echo Installed Python packages: -pip list +source /srv/softwareheritage/utils/pyutils.sh +setup_pip case "$1" in "shell") exec bash -i ;; *) - echo "Migrating db" + echo "Migrating db using ${DJANGO_SETTINGS_MODULE}" django-admin migrate --settings=${DJANGO_SETTINGS_MODULE} echo "Creating admin user" - echo "from django.contrib.auth import get_user_model; User = get_user_model(); User.objects.create_superuser('admin', 'admin@swh-web.org', 'admin')" | python3 -m swh.web.manage shell + echo "from django.contrib.auth import get_user_model; User = get_user_model(); User.objects.create_superuser('admin', 'admin@swh-web.org', 'admin')" | python3 -m swh.web.manage shell || true echo "starting the swh-web server" exec gunicorn --bind 0.0.0.0:5004 \ diff --git a/utils/pyutils.sh b/utils/pyutils.sh new file mode 100755 --- /dev/null +++ b/utils/pyutils.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +setup_pip () { + echo Using pip from $(which pip) + + if [[ -d /src ]] ; then + for srcrepo in /src/swh-* ; do + pip install $srcrepo + done + fi + + echo Installed Python packages: + pip list +}