diff --git a/Dockerfile.loaders b/Dockerfile.loaders new file mode 100644 index 0000000..173ac7e --- /dev/null +++ b/Dockerfile.loaders @@ -0,0 +1,12 @@ +ARG BASE + +FROM $BASE + +COPY swh-scheduler /app/swh-scheduler +COPY swh-loader-core /app/swh-loader-core +COPY swh-loader-git /app/swh-loader-git +# COPY swh-loader-svn /app/swh/swh-loader-svn +# COPY swh-loader-mercurial /app/swh/swh-loader-mercurial + +RUN pip install /app/swh-scheduler && pip install /app/swh-loader-core && pip install /app/swh-loader-git +# pip install /app/swh-loader-svn && pip install /app/swh-loader-mercurial diff --git a/kubernetes/40-loaders.yml b/kubernetes/40-loaders.yml new file mode 100644 index 0000000..b8a2ad2 --- /dev/null +++ b/kubernetes/40-loaders.yml @@ -0,0 +1,133 @@ +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: loaders +data: + config.yml: | + storage: + cls: pipeline + steps: + - cls: buffer + min_batch_size: + content: 10000 + content_bytes: 104857600 + directory: 1000 + revision: 1000 + - cls: filter + - cls: retry + - cls: remote + url: http://storage:5002/ + + celery: + task_broker: amqp://guest:guest@amqp// + task_queues: + - swh.loader.dir.tasks.LoadDirRepository + - swh.loader.git.tasks.LoadDiskGitRepository + - swh.loader.git.tasks.UncompressAndLoadDiskGitRepository + - swh.loader.git.tasks.UpdateGitRepository + - swh.loader.package.archive.tasks.LoadArchive + - swh.loader.package.cran.tasks.LoadCRAN + - swh.loader.package.debian.tasks.LoadDebian + - swh.loader.package.npm.tasks.LoadNpm + - swh.loader.package.pypi.tasks.LoadPyPI + + # - swh.loader.mercurial.tasks.LoadArchiveMercurial + # - swh.loader.mercurial.tasks.LoadMercurial + # - swh.loader.svn.tasks.DumpMountAndLoadSvnRepository + # - swh.loader.svn.tasks.LoadSvnRepository + # - swh.loader.svn.tasks.MountAndLoadSvnRepository + entrypoint-init.sh: | + #!/bin/bash + + set -e + + # echo Waiting for RabbitMQ to start + wait-for-it amqp:5672 -s --timeout=0 + + # echo Register task types in scheduler database + wait-for-it scheduler:5008 -s --timeout=0 + + swh scheduler --url http://scheduler:5008 task-type register + entrypoint.sh: | + #!/bin/bash + + set -e + + echo Starting the swh Celery worker + exec python -m celery \ + --app=swh.scheduler.celery_backend.config.app \ + worker \ + --pool=prefork --events \ + --concurrency=${CONCURRENCY} \ + --max-tasks-per-child=${MAX_TASKS_PER_CHILD} \ + -Ofair --loglevel=${LOGLEVEL} \ + --hostname "swh-worker@foobar" + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: loaders + labels: + app: loaders +spec: + replicas: 1 + selector: + matchLabels: + app: loaders + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 1 + template: + metadata: + labels: + app: loaders + spec: + initContainers: + - name: loaders-init + image: swh/loaders:latest + imagePullPolicy: Always + command: + - /entrypoint.sh + volumeMounts: + - name: config + mountPath: /etc/softwareheritage/config.yml + subPath: config.yml + readOnly: true + - name: config + mountPath: /entrypoint.sh + subPath: entrypoint-init.sh + readOnly: true + containers: + - name: loaders + image: swh/loaders:latest + imagePullPolicy: Always + command: + - /entrypoint.sh + ports: + - containerPort: 5002 + env: + - name: CONCURRENCY + value: "1" + - name: MAX_TASKS_PER_CHILD + value: "5" + - name: LOGLEVEL + value: "DEBUG" + - name: SWH_CONFIG_FILENAME + value: /etc/softwareheritage/config.yml + volumeMounts: + - name: config + mountPath: /etc/softwareheritage/config.yml + subPath: config.yml + readOnly: true + - name: config + mountPath: /entrypoint.sh + subPath: entrypoint.sh + readOnly: true + volumes: + - name: config + configMap: + name: loaders + defaultMode: 0777 diff --git a/skaffold.yaml b/skaffold.yaml index 13fc4bb..69d05c8 100644 --- a/skaffold.yaml +++ b/skaffold.yaml @@ -1,44 +1,51 @@ apiVersion: skaffold/v2beta13 kind: Config metadata: name: swh-environment build: artifacts: - image: swh/stack-base docker: dockerfile: Dockerfile - image: swh/objstorage docker: dockerfile: Dockerfile.objstorage requires: - image: swh/stack-base alias: BASE - image: swh/storage docker: dockerfile: Dockerfile.storage requires: - image: swh/stack-base alias: BASE - image: swh/scheduler docker: dockerfile: Dockerfile.scheduler requires: - image: swh/stack-base alias: BASE - image: swh/webapp docker: dockerfile: Dockerfile.webapp requires: - image: swh/stack-base alias: BASE + - image: swh/loaders + docker: + dockerfile: Dockerfile.loaders + requires: + - image: swh/stack-base + alias: BASE deploy: kubectl: manifests: - kubernetes/01-journal.yml - kubernetes/05-storage-db.yml - kubernetes/10-objstorage.yml - kubernetes/11-storage.yml - kubernetes/15-scheduler-db.yml - kubernetes/20-scheduler.yml - kubernetes/25-rabbitmq.yml - kubernetes/30-webapp.yml + - kubernetes/40-loaders.yml