diff --git a/docker-compose.yml b/docker-compose.yml --- a/docker-compose.yml +++ b/docker-compose.yml @@ -31,6 +31,7 @@ command: # Needed for the reverse-proxy - "--web.external-url=/prometheus" + - "--config.file=/etc/prometheus/prometheus.yml" volumes: - "./prometheus.yml:/etc/prometheus/prometheus.yml:ro" restart: unless-stopped @@ -259,14 +260,13 @@ # Indexer Celery workers - swh-indexer-mimetype: + swh-indexer: image: swh/indexer-worker build: ./dockerfiles/swh-indexer-worker env_file: ./indexers.env environment: STATSD_HOST: prometheus-statsd-exporter STATSD_PORT: 9125 - SWH_WORKER_INSTANCE: content_mimetype depends_on: - swh-scheduler-runner - swh-idx-storage @@ -274,44 +274,6 @@ - swh-objstorage - amqp - swh-indexer-license: - image: swh/indexer-worker - build: ./dockerfiles/swh-indexer-worker - env_file: ./indexers.env - environment: - STATSD_HOST: prometheus-statsd-exporter - STATSD_PORT: 9125 - SWH_WORKER_INSTANCE: content_fossology_license - depends_on: - - swh-scheduler-runner - - swh-idx-storage - - swh-storage - - swh-objstorage - - amqp - - swh-indexer-origin-head: - image: swh/indexer-worker - build: ./dockerfiles/swh-indexer-worker - env_file: ./indexers.env - environment: - STATSD_HOST: prometheus-statsd-exporter - STATSD_PORT: 9125 - SWH_WORKER_INSTANCE: origin_head - depends_on: - - swh-scheduler-api - - swh-scheduler-runner - - swh-idx-storage - - swh-storage - - amqp - - swh-indexer-revision-metadata: - image: swh/indexer-worker - build: ./dockerfiles/swh-indexer-worker - env_file: ./indexers.env - environment: - STATSD_HOST: prometheus-statsd-exporter - STATSD_PORT: 9125 - SWH_WORKER_INSTANCE: revision_metadata depends_on: - swh-scheduler-runner - swh-idx-storage @@ -319,20 +281,6 @@ - swh-objstorage - amqp - swh-indexer-origin-intrinsic-metadata: - image: swh/indexer-worker - build: ./dockerfiles/swh-indexer-worker - env_file: ./indexers.env - environment: - STATSD_HOST: prometheus-statsd-exporter - STATSD_PORT: 9125 - SWH_WORKER_INSTANCE: origin_intrinsic_metadata - depends_on: - - swh-scheduler-runner - - swh-idx-storage - - swh-storage - - amqp - swh-indexer-journal-client: image: swh/indexer-journal-client build: ./dockerfiles/swh-indexer-journal-client diff --git a/dockerfiles/swh-indexer-storage/Dockerfile b/dockerfiles/swh-indexer-storage/Dockerfile --- a/dockerfiles/swh-indexer-storage/Dockerfile +++ b/dockerfiles/swh-indexer-storage/Dockerfile @@ -5,7 +5,9 @@ apt-get install -y \ libsystemd-dev postgresql-client +RUN pip install --upgrade pip setuptools wheel RUN pip install swh-indexer + COPY indexer_storage.yml / COPY entrypoint.sh / diff --git a/dockerfiles/swh-indexer-worker/Dockerfile b/dockerfiles/swh-indexer-worker/Dockerfile --- a/dockerfiles/swh-indexer-worker/Dockerfile +++ b/dockerfiles/swh-indexer-worker/Dockerfile @@ -5,16 +5,13 @@ apt-get install -y \ libsystemd-dev postgresql-client +RUN pip install --upgrade pip setuptools wheel RUN pip install swh-indexer + RUN useradd -ms /bin/bash swh -COPY entrypoint.sh / +USER swh -COPY mimetype.yml /home/swh/.config/swh/indexer/ -COPY fossology_license.yml /home/swh/.config/swh/indexer/ -COPY origin_head.yml /home/swh/.config/swh/indexer/ -COPY revision_metadata.yml /home/swh/.config/swh/indexer/ -COPY origin_intrinsic_metadata.yml /home/swh/.config/swh/indexer/ -RUN chown -R swh: /home/swh/.config/ +COPY entrypoint.sh / +COPY indexer.yml /home/swh/ -USER swh ENTRYPOINT ["/entrypoint.sh"] diff --git a/dockerfiles/swh-indexer-worker/entrypoint.sh b/dockerfiles/swh-indexer-worker/entrypoint.sh --- a/dockerfiles/swh-indexer-worker/entrypoint.sh +++ b/dockerfiles/swh-indexer-worker/entrypoint.sh @@ -36,6 +36,6 @@ --maxtasksperchild=${MAX_TASKS_PER_CHILD} \ -Ofair --loglevel=${LOGLEVEL} --without-gossip \ --without-mingle --without-heartbeat \ - --hostname "${SWH_WORKER_INSTANCE}.%h" + --hostname "${SWH_WORKER_INSTANCE}@%h" ;; esac diff --git a/dockerfiles/swh-indexer-worker/fossology_license.yml b/dockerfiles/swh-indexer-worker/fossology_license.yml deleted file mode 100644 --- a/dockerfiles/swh-indexer-worker/fossology_license.yml +++ /dev/null @@ -1,12 +0,0 @@ -storage: - cls: remote - args: - url: http://swh-storage:5002/ -objstorage: - cls: remote - args: - url: http://swh-objstorage:5003/ -indexer_storage: - cls: remote - args: - url: http://swh-idx-storage:5007/ diff --git a/dockerfiles/swh-indexer-worker/revision_metadata.yml b/dockerfiles/swh-indexer-worker/indexer.yml rename from dockerfiles/swh-indexer-worker/revision_metadata.yml rename to dockerfiles/swh-indexer-worker/indexer.yml --- a/dockerfiles/swh-indexer-worker/revision_metadata.yml +++ b/dockerfiles/swh-indexer-worker/indexer.yml @@ -2,18 +2,17 @@ cls: remote args: url: http://swh-storage:5002/ - objstorage: cls: remote args: url: http://swh-objstorage:5003/ - indexer_storage: cls: remote args: url: http://swh-idx-storage:5007/ - scheduler: cls: remote args: url: http://swh-scheduler-api:5008/ +celery: + task_broker: amqp://guest:guest@amqp// diff --git a/dockerfiles/swh-indexer-worker/mimetype.yml b/dockerfiles/swh-indexer-worker/mimetype.yml deleted file mode 100644 --- a/dockerfiles/swh-indexer-worker/mimetype.yml +++ /dev/null @@ -1,12 +0,0 @@ -storage: - cls: remote - args: - url: http://swh-storage:5002/ -objstorage: - cls: remote - args: - url: http://swh-objstorage:5003/ -indexer_storage: - cls: remote - args: - url: http://swh-idx-storage:5007/ diff --git a/dockerfiles/swh-indexer-worker/origin_head.yml b/dockerfiles/swh-indexer-worker/origin_head.yml deleted file mode 100644 --- a/dockerfiles/swh-indexer-worker/origin_head.yml +++ /dev/null @@ -1,19 +0,0 @@ -storage: - cls: remote - args: - url: http://swh-storage:5002/ - -indexer_storage: - cls: remote - args: - url: http://swh-idx-storage:5007/ - -scheduler: - cls: remote - args: - url: http://swh-scheduler-api:5008/ - -tasks: - revision_metadata: indexer_revision_metadata - origin_intrinsic_metadata: indexer_origin_metadata - diff --git a/dockerfiles/swh-indexer-worker/origin_intrinsic_metadata.yml b/dockerfiles/swh-indexer-worker/origin_intrinsic_metadata.yml deleted file mode 100644 --- a/dockerfiles/swh-indexer-worker/origin_intrinsic_metadata.yml +++ /dev/null @@ -1,9 +0,0 @@ -storage: - cls: remote - args: - url: http://swh-storage:5002/ - -indexer_storage: - cls: remote - args: - url: http://swh-idx-storage:5007/ diff --git a/dockerfiles/swh-listers-worker/Dockerfile b/dockerfiles/swh-listers-worker/Dockerfile --- a/dockerfiles/swh-listers-worker/Dockerfile +++ b/dockerfiles/swh-listers-worker/Dockerfile @@ -5,10 +5,12 @@ apt-get install -y \ libsystemd-dev postgresql-client +RUN pip install --upgrade pip setuptools wheel RUN pip install swh-lister + RUN useradd -ms /bin/bash swh -COPY entrypoint.sh / +COPY entrypoint.sh / COPY lister.yml /home/swh/.config/swh/ RUN chown -R swh: /home/swh/.config/ diff --git a/dockerfiles/swh-listers-worker/entrypoint.sh b/dockerfiles/swh-listers-worker/entrypoint.sh --- a/dockerfiles/swh-listers-worker/entrypoint.sh +++ b/dockerfiles/swh-listers-worker/entrypoint.sh @@ -66,6 +66,6 @@ --maxtasksperchild=${MAX_TASKS_PER_CHILD} \ -Ofair --loglevel=${LOGLEVEL} --without-gossip \ --without-mingle --without-heartbeat \ - --hostname "${SWH_WORKER_INSTANCE}@%h" + --hostname "lister-${SWH_WORKER_INSTANCE}@%h" ;; esac diff --git a/dockerfiles/swh-listers-worker/lister.yml b/dockerfiles/swh-listers-worker/lister.yml --- a/dockerfiles/swh-listers-worker/lister.yml +++ b/dockerfiles/swh-listers-worker/lister.yml @@ -2,8 +2,13 @@ cls: remote args: url: http://swh-storage:5002/ + scheduler: cls: remote args: url: http://swh-scheduler-api:5008/ + +celery: + task_broker: amqp://guest:guest@amqp// + lister_db_url: postgresql:///?service=swh diff --git a/dockerfiles/swh-loaders-worker/entrypoint.sh b/dockerfiles/swh-loaders-worker/entrypoint.sh --- a/dockerfiles/swh-loaders-worker/entrypoint.sh +++ b/dockerfiles/swh-loaders-worker/entrypoint.sh @@ -41,6 +41,6 @@ --maxtasksperchild=${MAX_TASKS_PER_CHILD} \ -Ofair --loglevel=${LOGLEVEL} --without-gossip \ --without-mingle --without-heartbeat \ - --hostname "${SWH_WORKER_INSTANCE}@%h" + --hostname "loader-${SWH_WORKER_INSTANCE}@%h" ;; esac diff --git a/dockerfiles/swh-loaders-worker/loader.yml b/dockerfiles/swh-loaders-worker/loader.yml --- a/dockerfiles/swh-loaders-worker/loader.yml +++ b/dockerfiles/swh-loaders-worker/loader.yml @@ -3,3 +3,5 @@ args: url: http://swh-storage:5002/ lister_db_url: postgresql:///?service=swh +celery: + task_broker: amqp://guest:guest@amqp// diff --git a/dockerfiles/swh-objstorage/Dockerfile b/dockerfiles/swh-objstorage/Dockerfile --- a/dockerfiles/swh-objstorage/Dockerfile +++ b/dockerfiles/swh-objstorage/Dockerfile @@ -5,9 +5,11 @@ apt-get install -y \ libsystemd-dev +RUN pip install -U pip setuptools wheel RUN pip install swh-objstorage -COPY objstorage.yml / + COPY entrypoint.sh / +COPY objstorage.yml / RUN mkdir -p /srv/softwareheritage/objects diff --git a/dockerfiles/swh-objstorage/objstorage.yml b/dockerfiles/swh-objstorage/objstorage.yml --- a/dockerfiles/swh-objstorage/objstorage.yml +++ b/dockerfiles/swh-objstorage/objstorage.yml @@ -2,4 +2,4 @@ cls: pathslicing args: root: /srv/softwareheritage/objects - slicing: 0:2/2:4/4:6 + slicing: 0:5 diff --git a/dockerfiles/swh-scheduler-api/Dockerfile b/dockerfiles/swh-scheduler-api/Dockerfile --- a/dockerfiles/swh-scheduler-api/Dockerfile +++ b/dockerfiles/swh-scheduler-api/Dockerfile @@ -5,7 +5,9 @@ apt-get install -y \ libsystemd-dev postgresql-client +RUN pip install --upgrade pip setuptools wheel RUN pip install swh-scheduler + COPY scheduler.yml / COPY entrypoint.sh / diff --git a/dockerfiles/swh-scheduler-api/entrypoint.sh b/dockerfiles/swh-scheduler-api/entrypoint.sh --- a/dockerfiles/swh-scheduler-api/entrypoint.sh +++ b/dockerfiles/swh-scheduler-api/entrypoint.sh @@ -40,5 +40,5 @@ --db-name ${POSTGRES_DB} echo Starting the swh-scheduler API server - exec swh-scheduler api-server /scheduler.yml + exec swh-scheduler --log-level ${LOGLEVEL} -C /scheduler.yml api-server esac diff --git a/dockerfiles/swh-scheduler-api/scheduler.yml b/dockerfiles/swh-scheduler-api/scheduler.yml --- a/dockerfiles/swh-scheduler-api/scheduler.yml +++ b/dockerfiles/swh-scheduler-api/scheduler.yml @@ -1,4 +1,4 @@ scheduler: cls: local args: - scheduling_db: service=swh-scheduler + db: service=swh-scheduler diff --git a/dockerfiles/swh-scheduler-worker/Dockerfile b/dockerfiles/swh-scheduler-worker/Dockerfile --- a/dockerfiles/swh-scheduler-worker/Dockerfile +++ b/dockerfiles/swh-scheduler-worker/Dockerfile @@ -5,12 +5,10 @@ apt-get install -y \ libsystemd-dev postgresql-client libpq-dev +RUN pip install --upgrade pip setuptools wheel RUN pip install swh-scheduler --no-binary psycopg2 -RUN mkdir -p /etc/softwareheritage/worker/ -COPY scheduler.yml /etc/softwareheritage/ -# XXX this should NOT be needed. -# Some cleanup work in config files handling seems necessary... -RUN ln -s /etc/softwareheritage/scheduler.yml /etc/softwareheritage/worker/ + +COPY scheduler.yml / COPY entrypoint.sh / ENTRYPOINT ["/entrypoint.sh"] diff --git a/dockerfiles/swh-scheduler-worker/entrypoint.sh b/dockerfiles/swh-scheduler-worker/entrypoint.sh --- a/dockerfiles/swh-scheduler-worker/entrypoint.sh +++ b/dockerfiles/swh-scheduler-worker/entrypoint.sh @@ -30,6 +30,6 @@ ;; *) echo Starting the swh-scheduler $1 - exec swh-scheduler --log-level ${LOGLEVEL} $@ + exec swh-scheduler --log-level ${LOGLEVEL} -C /scheduler.yml $@ ;; esac diff --git a/dockerfiles/swh-scheduler-worker/scheduler.yml b/dockerfiles/swh-scheduler-worker/scheduler.yml --- a/dockerfiles/swh-scheduler-worker/scheduler.yml +++ b/dockerfiles/swh-scheduler-worker/scheduler.yml @@ -1,2 +1,8 @@ -task_broker: amqp://guest:guest@amqp// -scheduling_db: postgresql:///?service=swh-scheduler +scheduler: + cls: local + args: + db: postgresql:///?service=swh-scheduler +celery: + task_broker: amqp://guest:guest@amqp// + broker_transport_options: + max_retries: 1 diff --git a/dockerfiles/swh-storage/Dockerfile b/dockerfiles/swh-storage/Dockerfile --- a/dockerfiles/swh-storage/Dockerfile +++ b/dockerfiles/swh-storage/Dockerfile @@ -5,7 +5,9 @@ apt-get install -y \ libsystemd-dev postgresql-client +RUN pip install --upgrade pip setuptools wheel RUN pip install swh-storage + COPY storage.yml / COPY entrypoint.sh / diff --git a/indexers.env b/indexers.env --- a/indexers.env +++ b/indexers.env @@ -1,3 +1,5 @@ -CONCURRENCY=1 +CONCURRENCY=4 MAX_TASKS_PER_CHILD=10 LOGLEVEL=DEBUG +SWH_WORKER_INSTANCE=indexer +SWH_CONFIG_FILENAME=/home/swh/indexer.yml