diff --git a/swh/templates/cookers/configmap.yaml b/swh/templates/cookers/configmap.yaml new file mode 100644 index 0000000..2a342f3 --- /dev/null +++ b/swh/templates/cookers/configmap.yaml @@ -0,0 +1,68 @@ +{{ if .Values.cookers.enabled -}} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: cooker-utils + namespace: {{ $.Values.namespace }} +data: + pre-stop-idempotent.sh: | + #!/bin/bash + + # pre-stop hook can be triggered multiple times but we want it to be applied only + # once so container can warm-shutdown properly. + + # When celery receives multiple times the sigterm signal, this ends up doing an + # immediate shutdown which prevents long-standing tasks to finish properly. + + set -ex + + WITNESS_FILE=/tmp/already-stopped + + # to support near-immediate concurrent calls + sleep $(echo | awk '{print rand()}) + + if [ ! -e $WITNESS_FILE ]; then + touch $WITNESS_FILE + kill 1 + fi + +{{ range $cooker_type, $deployment_config := .Values.cookers.deployments }} +{{ $cooker_name := ( print "cooker-" $cooker_type ) }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ $cooker_name }}-template + namespace: {{ $.Values.namespace }} +data: + config.yml.template: | + max_bundle_size: 1073741824 + storage: + cls: retry + storage: + cls: remote + url: http://{{ $.Values.cookers.storage.host }}:{{ $.Values.cookers.storage.port }}/ + vault: + cls: remote + url: http://{{ $.Values.cookers.vault.host }}:{{ $.Values.cookers.vault.port }}/ + celery: + task_broker: amqp://${AMQP_USERNAME}:${AMQP_PASSWORD}@{{ $.Values.loaders.amqp.host }}:{{ $.Values.loaders.amqp.port }}/ + task_queues: + {{- range $queue := get $deployment_config "queues" }} + - {{ $queue }} + {{- end }} + init-container-entrypoint.sh: | + #!/bin/bash + + set -e + + CONFIG_FILE=/etc/swh/config.yml + + # substitute environment variables when creating the default config.yml + eval echo \""$( $CONFIG_FILE + + exit 0 +{{ end }} +{{- end -}} diff --git a/swh/templates/cookers/deployment.yaml b/swh/templates/cookers/deployment.yaml new file mode 100644 index 0000000..cad239b --- /dev/null +++ b/swh/templates/cookers/deployment.yaml @@ -0,0 +1,132 @@ +{{ if .Values.cookers.enabled -}} +{{- $configurationChecksum := include (print $.Template.BasePath "/cookers/configmap.yaml") . -}} +{{- range $cooker_type, $deployment_config := .Values.cookers.deployments -}} +{{- $cooker_name := ( print "cooker-" $cooker_type ) -}} +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ $cooker_name }} + namespace: {{ $.Values.namespace }} + labels: + app: {{ $cooker_name }} +spec: + revisionHistoryLimit: 2 + selector: + matchLabels: + app: {{ $cooker_name }} + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 1 + template: + metadata: + labels: + app: {{ $cooker_name }} + annotations: + # Force a rollout upgrade if the configuration changes + checksum/config: {{ $configurationChecksum | sha256sum }} + spec: + {{- if $.Values.cookers.affinity }} + affinity: + {{ toYaml $.Values.cookers.affinity | nindent 8 }} + {{- end }} + initContainers: + - name: prepare-configuration + image: debian:bullseye + imagePullPolicy: Always + env: + - name: AMQP_USERNAME + valueFrom: + secretKeyRef: + name: common-secrets + key: rabbitmq-amqp-username + # 'name' secret must exist & include that ^ key + optional: false + - name: AMQP_PASSWORD + valueFrom: + secretKeyRef: + name: common-secrets + key: rabbitmq-amqp-password + # 'name' secret must exist & include that ^ key + optional: false + command: + - /entrypoint.sh + volumeMounts: + - name: configuration-template + mountPath: /entrypoint.sh + subPath: "init-container-entrypoint.sh" + readOnly: true + - name: configuration + mountPath: /etc/swh + - name: configuration-template + mountPath: /etc/swh/configuration-template + containers: + - name: cookers + resources: + requests: + memory: {{ get $deployment_config "requestedMemory" | default "512Mi" }} + cpu: {{ get $deployment_config "requestedCpu" | default "500m" }} + limits: + memory: "4000Mi" + cpu: "1200m" + image: {{ $.Values.swh_cookers_image }}:{{ $.Values.swh_cookers_image_version }} + imagePullPolicy: Always + command: + - /bin/bash + args: + - -c + - /opt/swh/entrypoint.sh + lifecycle: + preStop: + exec: + command: ["/pre-stop.sh"] + env: + - name: STATSD_HOST + value: {{ $.Values.statsdExternalHost | default "prometheus-statsd-exporter" }} + - name: STATSD_PORT + value: {{ $.Values.statsdPort | default "9125" | quote }} + - name: MAX_TASKS_PER_CHILD + value: {{ get $deployment_config "maxTasksPerChild" | default 1 | quote }} + - name: LOGLEVEL + value: {{ get $deployment_config "logLevel" | default "INFO" | quote }} + - name: SWH_CONFIG_FILENAME + value: /etc/swh/config.yml + - name: SWH_SENTRY_ENVIRONMENT + value: {{ $.Values.sentry.environment }} + - name: SWH_MAIN_PACKAGE + value: swh.vault + - name: SWH_SENTRY_DSN + valueFrom: + secretKeyRef: + name: common-secrets + key: vault-cookers-sentry-dsn + # 'name' secret must exist & include key "host" + optional: false + volumeMounts: + - name: cooker-utils + mountPath: /pre-stop.sh + subPath: "pre-stop.sh" + - name: configuration + mountPath: /etc/swh + volumes: + - name: configuration + emptyDir: {} + - name: configuration-template + configMap: + name: {{ $cooker_name }}-template + defaultMode: 0777 + items: + - key: "config.yml.template" + path: "config.yml.template" + - key: "init-container-entrypoint.sh" + path: "init-container-entrypoint.sh" + - name: cooker-utils + configMap: + name: cooker-utils + defaultMode: 0777 + items: + - key: "pre-stop-idempotent.sh" + path: "pre-stop.sh" +{{ end }} +{{- end -}} diff --git a/swh/templates/cookers/keda-autoscaling.yaml b/swh/templates/cookers/keda-autoscaling.yaml new file mode 100644 index 0000000..776cefb --- /dev/null +++ b/swh/templates/cookers/keda-autoscaling.yaml @@ -0,0 +1,61 @@ +{{ if .Values.cookers.enabled -}} +{{- range $cooker_type, $deployment_config := .Values.cookers.deployments -}} +{{ if get $deployment_config "autoScaling" }} +{{- $autoscalingConfig := get $deployment_config "autoScaling" -}} +{{- $cooker_name := ( print "cooker-" $cooker_type ) -}} +--- +apiVersion: keda.sh/v1alpha1 +kind: TriggerAuthentication +metadata: + name: amqp-authentication-{{ $cooker_name }} + namespace: {{ $.Values.namespace }} +spec: + secretTargetRef: + - parameter: host # "host" is required by the scalerObject trigger metadata + name: common-secrets + key: rabbitmq-http-host + +--- +apiVersion: keda.sh/v1alpha1 +kind: ScaledObject +metadata: + name: {{ $cooker_name }}-operators + namespace: {{ $.Values.namespace }} +spec: + scaleTargetRef: + apiVersion: apps/v1 # Optional. Default: apps/v1 + kind: Deployment # Optional. Default: Deployment + # Mandatory. Must be in same namespace as ScaledObject + name: {{ $cooker_name }} + # envSourceContainerName: {container-name} # Optional. Default: + # .spec.template.spec.containers[0] + pollingInterval: 30 # Optional. Default: 30 seconds + cooldownPeriod: 3600 # Optional. Default: 300 seconds + idleReplicaCount: 0 # Optional. Must be less than + # minReplicaCount + minReplicaCount: {{ get $autoscalingConfig "minReplicaCount" | default 0 }} + maxReplicaCount: {{ get $autoscalingConfig "maxReplicaCount" | default 5 }} + triggers: + {{- range $queue := get $deployment_config "queues" }} + - type: rabbitmq + authenticationRef: + name: amqp-authentication-{{ $cooker_name }} + metadata: + protocol: auto # Optional. Specifies protocol to use, + # either amqp or http, or auto to + # autodetect based on the `host` value. + # Default value is auto. + mode: QueueLength # QueueLength to trigger on number of msgs in queue + excludeUnacknowledged: "false" # QueueLength should include unacked messages + # Implies "http" protocol is used + value: {{ get $autoscalingConfig "queueThreshold" | default 1 | quote }} + queueName: {{ $queue }} + vhostName: / # Optional. If not specified, use the vhost in the + # `host` connection string. Alternatively, you can + # use existing environment variables to read + # configuration from: See details in "Parameter + # list" section hostFromEnv: RABBITMQ_HOST% + {{- end }} +{{ end }} +{{ end }} +{{- end -}} diff --git a/swh/values/default.yaml b/swh/values/default.yaml index dbeb773..3a9c446 100644 --- a/swh/values/default.yaml +++ b/swh/values/default.yaml @@ -1,56 +1,67 @@ namespace: swh storage_replayer: storageClass: cassandra # only cassandra is currently supported maxMessagesBytes: "524288000" journalBrokers: # The name of the secret containing the BROKER_USER_PASSWORD value secretName: storage-replayer-broker-secret cassandra: keySpace: swh consistencyLevel: LOCAL_QUORUM affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - key: node-role.kubernetes.io/etcd operator: NotIn values: - "true" +cookers: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: "swh/cooker" + operator: In + values: + - "true" + loaders: affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - key: "swh/loader" operator: In values: - "true" listers: affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - key: "swh/lister" operator: In values: - "true" graphql: affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - key: swh/rpc operator: In values: - "true" statsd_exporter: enabled: true diff --git a/swh/values/staging.yaml b/swh/values/staging.yaml index 2cfc3b7..b9a9c92 100644 --- a/swh/values/staging.yaml +++ b/swh/values/staging.yaml @@ -1,323 +1,345 @@ sentry: environment: staging loaders: enabled: true storage: host: storage1.internal.staging.swh.network port: 5002 amqp: host: scheduler0.internal.staging.swh.network port: 5672 deployments: addforgenow: image: swh_loader_git_image requestedMemory: 256Mi requestedCpu: 200m queues: - add_forge_now:swh.loader.git.tasks.UpdateGitRepository ackLate: true autoScaling: queueThreshold: 1 maxReplicaCount: 2 sentrySwhPackage: swh.loader.git archive: requestedMemory: 256Mi requestedCpu: 200m image: swh_loader_package_image queues: - swh.loader.package.archive.tasks.LoadArchive autoScaling: maxReplicaCount: 1 sentrySwhPackage: swh.loader.core bzr: requestedMemory: 256Mi requestedCpu: 200m queues: - swh.loader.bzr.tasks.LoadBazaar autoScaling: maxReplicaCount: 1 sentrySwhPackage: swh.loader.bzr cran: requestedMemory: 256Mi requestedCpu: 200m image: swh_loader_package_image queues: - swh.loader.package.cran.tasks.LoadCRAN autoScaling: maxReplicaCount: 1 sentrySwhPackage: swh.loader.core cvs: requestedMemory: 256Mi requestedCpu: 200m queues: - swh.loader.cvs.tasks.LoadCvsRepository autoScaling: maxReplicaCount: 1 sentrySwhPackage: swh.loader.cvs debian: requestedMemory: 256Mi requestedCpu: 200m image: swh_loader_package_image queues: - swh.loader.package.debian.tasks.LoadDebian autoScaling: maxReplicaCount: 1 sentrySwhPackage: swh.loader.core deposit: requestedMemory: 256Mi requestedCpu: 200m image: swh_loader_package_image queues: - swh.loader.package.deposit.tasks.LoadDeposit autoScaling: queueThreshold: 1 maxReplicaCount: 2 sentrySwhPackage: swh.loader.core extraConfig: deposit: url: "https://deposit-rp.internal.staging.swh.network/1/private" auth: username: "${DEPOSIT_USERNAME}" password: "${DEPOSIT_PASSWORD}" default_filename: archive.tar git: requestedMemory: 256Mi requestedCpu: 200m queues: - swh.loader.git.tasks.UpdateGitRepository - swh.loader.git.tasks.LoadDiskGitRepository - swh.loader.git.tasks.UncompressAndLoadDiskGitRepository autoScaling: maxReplicaCount: 2 sentrySwhPackage: swh.loader.git golang: image: swh_loader_package_image requestedMemory: 256Mi requestedCpu: 200m queues: - swh.loader.package.golang.tasks.LoadGolang autoScaling: maxReplicaCount: 1 sentrySwhPackage: swh.loader.core highpriority: requestedMemory: 256Mi requestedCpu: 200m queues: - save_code_now:swh.loader.bzr.tasks.LoadBazaar - save_code_now:swh.loader.git.tasks.UpdateGitRepository - save_code_now:swh.loader.git.tasks.LoadDiskGitRepository - save_code_now:swh.loader.git.tasks.UncompressAndLoadDiskGitRepository - save_code_now:swh.loader.mercurial.tasks.LoadArchiveMercurial - save_code_now:swh.loader.mercurial.tasks.LoadMercurial - save_code_now:swh.loader.svn.tasks.LoadSvnRepository - save_code_now:swh.loader.svn.tasks.MountAndLoadSvnRepository - save_code_now:swh.loader.svn.tasks.DumpMountAndLoadSvnRepository - save_code_now:swh.loader.package.archive.tasks.LoadArchive ackLate: true autoScaling: queueThreshold: 1 maxReplicaCount: 2 sentrySwhPackage: swh.loader.highpriority maven: requestedMemory: 256Mi requestedCpu: 200m image: swh_loader_package_image queues: - swh.loader.package.maven.tasks.LoadMaven autoScaling: maxReplicaCount: 1 sentrySwhPackage: swh.loader.core mercurial: requestedMemory: 256Mi requestedCpu: 200m queues: - swh.loader.mercurial.tasks.LoadArchiveMercurial - swh.loader.mercurial.tasks.LoadMercurial autoScaling: maxReplicaCount: 1 sentrySwhPackage: swh.loader.mercurial nixguix: requestedMemory: 256Mi requestedCpu: 200m image: swh_loader_package_image queues: - swh.loader.package.nixguix.tasks.LoadNixguix autoScaling: queueThreshold: 1 maxReplicaCount: 1 sentrySwhPackage: swh.loader.core extraConfig: unsupported_file_extensions: - patch - iso - whl - gem - pom - msi - pod - png - rock - ttf - jar - c - el - rpm - diff npm: requestedMemory: 256Mi requestedCpu: 200m image: swh_loader_package_image queues: - swh.loader.package.npm.tasks.LoadNpm autoScaling: maxReplicaCount: 1 sentrySwhPackage: swh.loader.core opam: requestedMemory: 256Mi requestedCpu: 200m image: swh_loader_package_image queues: - swh.loader.package.opam.tasks.LoadOpam autoScaling: maxReplicaCount: 1 sentrySwhPackage: swh.loader.core extraConfig: # FIXME: Find a way to avoid this initialization step in loader task initialize_opam_root: true pypi: requestedMemory: 256Mi requestedCpu: 200m image: swh_loader_package_image queues: - swh.loader.package.pypi.tasks.LoadPyPI autoScaling: maxReplicaCount: 1 sentrySwhPackage: swh.loader.core pubdev: requestedMemory: 256Mi requestedCpu: 200m image: swh_loader_package_image queues: - swh.loader.package.pubdev.tasks.LoadPubDev autoScaling: maxReplicaCount: 1 sentrySwhPackage: swh.loader.core svn: requestedMemory: 256Mi requestedCpu: 200m queues: - swh.loader.svn.tasks.LoadSvnRepository - swh.loader.svn.tasks.MountAndLoadSvnRepository - swh.loader.svn.tasks.DumpMountAndLoadSvnRepository autoScaling: maxReplicaCount: 1 sentrySwhPackage: swh.loader.svn listers: enabled: true sentrySwhPackage: swh.lister storage: host: storage1.internal.staging.swh.network port: 5002 scheduler: host: scheduler0.internal.staging.swh.network port: 5008 amqp: host: scheduler0.internal.staging.swh.network deployments: all: maxTasksperchild: 3 queues: - swh.lister.cgit.tasks.CGitListerTask - swh.lister.cran.tasks.CRANListerTask - swh.lister.debian.tasks.DebianListerTask - swh.lister.gitea.tasks.IncrementalGiteaLister - swh.lister.gitea.tasks.RangeGiteaLister - swh.lister.gitea.tasks.FullGiteaRelister - swh.lister.gitlab.tasks.IncrementalGitLabLister - swh.lister.gitlab.tasks.RangeGitLabLister - swh.lister.gitlab.tasks.FullGitLabRelister - swh.lister.npm.tasks.NpmListerTask - swh.lister.phabricator.tasks.FullPhabricatorLister - swh.lister.pypi.tasks.PyPIListerTask autoScaling: maxReplicaCount: 2 bower: queues: - swh.lister.bower.tasks.BowerListerTask autoScaling: maxReplicaCount: 1 bitbucket: queues: - swh.lister.bitbucket.tasks.IncrementalBitBucketLister - swh.lister.bitbucket.tasks.FullBitBucketRelister autoScaling: maxReplicaCount: 1 gnu-full: queues: - swh.lister.gnu.tasks.GNUListerTask autoScaling: maxReplicaCount: 1 gogs-full: queues: - swh.lister.gogs.tasks.FullGogsRelister autoScaling: maxReplicaCount: 1 golang: queues: - swh.lister.golang.tasks.FullGolangLister - swh.lister.golang.tasks.IncrementalGolangLister autoScaling: maxReplicaCount: 1 launchpad: queues: - swh.lister.launchpad.tasks.FullLaunchpadLister - swh.lister.launchpad.tasks.IncrementalLaunchpadLister autoScaling: maxReplicaCount: 1 maven: queues: - swh.lister.maven.tasks.FullMavenLister - swh.lister.maven.tasks.IncrementalMavenLister autoScaling: maxReplicaCount: 1 opam: queues: - swh.lister.opam.tasks.OpamListerTask autoScaling: maxReplicaCount: 1 pubdev: queues: - swh.lister.pubdev.tasks.PubDevListerTask autoScaling: maxReplicaCount: 1 sourceforge: queues: - swh.lister.sourceforge.tasks.FullSourceForgeLister - swh.lister.sourceforge.tasks.IncrementalSourceForgeLister autoScaling: maxReplicaCount: 1 +cookers: + enabled: true + storage: + host: storage1.internal.staging.swh.network + port: 5002 + vault: + host: vault.internal.staging.swh.network + port: 5005 + amqp: + host: scheduler0.internal.staging.swh.network + deployments: + simple: + queues: + - swh.vault.cooking_tasks.SWHCookingTask + autoScaling: + maxReplicaCount: 2 + batch: + queues: + - swh.vault.cooking_tasks.SWHBatchCookingTask + autoScaling: + maxReplicaCount: 2 + graphql: enabled: true sentry_enabled: true backends: storage: host: webapp.internal.staging.swh.network port: 5002 search: host: search0.internal.staging.swh.network port: 5010 replicas: 1 gunicorn: threads: 4 workers: 2 timeout: 3600 ingress: enabled: true httpPath: / host: graphql.staging.swh.network logLevel: DEBUG diff --git a/values-swh-application-versions.yaml b/values-swh-application-versions.yaml index 3e5c8b2..616ca3e 100644 --- a/values-swh-application-versions.yaml +++ b/values-swh-application-versions.yaml @@ -1,25 +1,27 @@ # This file references the last version of all the softwareheritage images # It's used to manage the automatic update of the environments +swh_cookers_image: softwareheritage/vault_cookers +swh_cookers_image_version: "20220926.1" swh_storage_replayer_image: softwareheritage/storage-replayer swh_storage_replayer_image_version: "20220819.1" swh_graphql_image: softwareheritage/graphql swh_graphql_image_version: "20220913.2" swh_listers_image: softwareheritage/lister swh_listers_image_version: "20220921.1" swh_loaders_image: softwareheritage/loaders swh_loaders_image_version: "20220826.1" swh_loader_bzr_image: softwareheritage/loader_bzr swh_loader_bzr_image_version: "20220909.1" swh_loader_cvs_image: softwareheritage/loader_cvs swh_loader_cvs_image_version: "20220919.2" swh_loader_git_image: softwareheritage/loader_git swh_loader_git_image_version: "20220908.1" swh_loader_highpriority_image: softwareheritage/loader_highpriority swh_loader_highpriority_image_version: "20220908.1" swh_loader_mercurial_image: softwareheritage/loader_mercurial swh_loader_mercurial_image_version: "20220919.1" swh_loader_package_image: softwareheritage/loader_package swh_loader_package_image_version: "20220921.1" swh_loader_svn_image: softwareheritage/loader_svn swh_loader_svn_image_version: "20220919.1"