diff --git a/kubernetes/40-loaders.yml b/kubernetes/40-loaders.yml index 30c3f19..ef09437 100644 --- a/kubernetes/40-loaders.yml +++ b/kubernetes/40-loaders.yml @@ -1,140 +1,144 @@ --- apiVersion: v1 kind: ConfigMap metadata: name: loaders data: config.yml: | storage: cls: pipeline steps: - cls: buffer min_batch_size: content: 10000 content_bytes: 104857600 directory: 1000 revision: 1000 - cls: filter - cls: retry - cls: remote url: http://storage:5002/ celery: task_broker: amqp://guest:guest@amqp// task_queues: - swh.loader.dir.tasks.LoadDirRepository - swh.loader.git.tasks.LoadDiskGitRepository - swh.loader.git.tasks.UncompressAndLoadDiskGitRepository - swh.loader.git.tasks.UpdateGitRepository - swh.loader.package.archive.tasks.LoadArchive - swh.loader.package.cran.tasks.LoadCRAN - swh.loader.package.debian.tasks.LoadDebian - swh.loader.package.npm.tasks.LoadNpm - swh.loader.package.pypi.tasks.LoadPyPI # - swh.loader.mercurial.tasks.LoadArchiveMercurial # - swh.loader.mercurial.tasks.LoadMercurial # - swh.loader.svn.tasks.DumpMountAndLoadSvnRepository # - swh.loader.svn.tasks.LoadSvnRepository # - swh.loader.svn.tasks.MountAndLoadSvnRepository entrypoint-init.sh: | #!/bin/bash set -e # echo Waiting for RabbitMQ to start wait-for-it amqp:5672 -s --timeout=0 # echo Register task types in scheduler database wait-for-it scheduler:5008 -s --timeout=0 swh scheduler --url http://scheduler:5008 task-type register entrypoint.sh: | #!/bin/bash set -e echo Starting the swh Celery worker exec python -m celery \ --app=swh.scheduler.celery_backend.config.app \ worker \ --pool=prefork --events \ --concurrency=${CONCURRENCY} \ --max-tasks-per-child=${MAX_TASKS_PER_CHILD} \ -Ofair --loglevel=${LOGLEVEL} \ --hostname "swh-worker@foobar" --- apiVersion: apps/v1 kind: Deployment metadata: name: loaders labels: app: loaders spec: replicas: 1 selector: matchLabels: app: loaders strategy: type: RollingUpdate rollingUpdate: maxSurge: 1 template: metadata: labels: app: loaders spec: initContainers: - name: loaders-init image: swh/loaders:latest imagePullPolicy: Always command: - /entrypoint.sh volumeMounts: - name: config mountPath: /etc/softwareheritage/config.yml subPath: config.yml readOnly: true - name: config mountPath: /entrypoint.sh subPath: entrypoint-init.sh readOnly: true containers: - name: loaders image: swh/loaders:latest imagePullPolicy: Always command: - /entrypoint.sh ports: - containerPort: 5002 resources: requests: memory: "256Mi" cpu: "200m" limits: memory: "3000Mi" cpu: "1200m" + lifecycle: + preStop: + exec: + command: ["kill", "1"] env: - name: CONCURRENCY value: "1" - name: MAX_TASKS_PER_CHILD value: "5" - name: LOGLEVEL value: "INFO" - name: SWH_CONFIG_FILENAME value: /etc/softwareheritage/config.yml volumeMounts: - name: config mountPath: /etc/softwareheritage/config.yml subPath: config.yml readOnly: true - name: config mountPath: /entrypoint.sh subPath: entrypoint.sh readOnly: true volumes: - name: config configMap: name: loaders defaultMode: 0777 diff --git a/kubernetes/45-listers.yml b/kubernetes/45-listers.yml index 374d169..17a9f0d 100644 --- a/kubernetes/45-listers.yml +++ b/kubernetes/45-listers.yml @@ -1,137 +1,141 @@ --- apiVersion: v1 kind: ConfigMap metadata: name: listers data: config.yml: | scheduler: cls: remote url: http://scheduler:5008/ celery: task_broker: amqp://guest:guest@amqp// task_queues: - swh.lister.bitbucket.tasks.FullBitBucketRelister - swh.lister.bitbucket.tasks.IncrementalBitBucketLister - swh.lister.bitbucket.tasks.RangeBitBucketLister - swh.lister.cgit.tasks.CGitListerTask - swh.lister.cran.tasks.CRANListerTask - swh.lister.debian.tasks.DebianListerTask - swh.lister.gitea.tasks.FullGiteaRelister - swh.lister.gitea.tasks.IncrementalGiteaLister - swh.lister.gitea.tasks.RangeGiteaLister - swh.lister.github.tasks.FullGitHubRelister - swh.lister.github.tasks.IncrementalGitHubLister - swh.lister.github.tasks.RangeGitHubLister - swh.lister.gitlab.tasks.FullGitLabRelister - swh.lister.gitlab.tasks.IncrementalGitLabLister - swh.lister.gitlab.tasks.RangeGitLabLister - swh.lister.gnu.tasks.GNUListerTask - swh.lister.npm.tasks.NpmIncrementalListerTask - swh.lister.npm.tasks.NpmListerTask - swh.lister.launchpad.tasks.IncrementalLaunchpadLister - swh.lister.launchpad.tasks.FullLaunchpadLister - swh.lister.packagist.tasks.PackagistListerTask - swh.lister.phabricator.tasks.FullPhabricatorLister - swh.lister.phabricator.tasks.IncrementalPhabricatorLister - swh.lister.pypi.tasks.PyPIListerTask entrypoint-init.sh: | #!/bin/bash set -e # echo Waiting for RabbitMQ to start wait-for-it amqp:5672 -s --timeout=0 # echo Register task types in scheduler database wait-for-it scheduler:5008 -s --timeout=0 swh scheduler --url http://scheduler:5008 task-type register entrypoint.sh: | #!/bin/bash set -e echo Starting the swh listers exec python -m celery \ --app=swh.scheduler.celery_backend.config.app \ worker \ --pool=prefork --events \ --concurrency=${CONCURRENCY} \ --max-tasks-per-child=${MAX_TASKS_PER_CHILD} \ -Ofair --loglevel=${LOGLEVEL} \ --hostname "${HOSTNAME}" --- apiVersion: apps/v1 kind: Deployment metadata: name: listers labels: app: listers spec: replicas: 1 selector: matchLabels: app: listers strategy: type: RollingUpdate rollingUpdate: maxSurge: 1 template: metadata: labels: app: listers spec: initContainers: - name: listers-init image: swh/listers:latest imagePullPolicy: Always command: - /entrypoint.sh volumeMounts: - name: config mountPath: /etc/softwareheritage/config.yml subPath: config.yml readOnly: true - name: config mountPath: /entrypoint.sh subPath: entrypoint-init.sh readOnly: true containers: - name: listers image: swh/listers:latest imagePullPolicy: Always command: - /entrypoint.sh resources: requests: memory: "100Mi" cpu: "20m" limits: memory: "150Mi" cpu: "100m" + lifecycle: + preStop: + exec: + command: ["kill", "1"] env: - name: CONCURRENCY value: "1" - name: MAX_TASKS_PER_CHILD value: "5" - name: LOGLEVEL value: "INFO" - name: SWH_CONFIG_FILENAME value: /etc/softwareheritage/config.yml volumeMounts: - name: config mountPath: /etc/softwareheritage/config.yml subPath: config.yml readOnly: true - name: config mountPath: /entrypoint.sh subPath: entrypoint.sh readOnly: true volumes: - name: config configMap: name: listers defaultMode: 0777