diff --git a/swh/templates/loaders/configmap.yaml b/swh/templates/loaders/configmap.yaml new file mode 100644 --- /dev/null +++ b/swh/templates/loaders/configmap.yaml @@ -0,0 +1,64 @@ +{{ if .Values.loaders.enabled -}} +{{- range $loader_type, $deployment_config := .Values.loaders.deployments -}} +{{- $loader_name := ( print "loader-" $loader_type ) -}} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ $loader_name }} + namespace: {{ $.Values.namespace }} +data: + config.yml: | + storage: + cls: pipeline + steps: + - cls: buffer + min_batch_size: + content: 1000 + content_bytes: 52428800 + directory: 1000 + directory_entries: 12000 + revision: 1000 + revision_parents: 2000 + revision_bytes: 52428800 + release: 1000 + release_bytes: 52428800 + extid: 1000 + - cls: filter + - cls: retry + - cls: remote + url: http://{{ $.Values.loaders.storage.host }}:{{ $.Values.loaders.storage.port }}/ + + celery: + task_broker: ##amqp_host## + task_queues: + {{- range $queue := get $deployment_config "task_queues" }} + - {{ $queue }} + {{- end }} + entrypoint.sh: | + #!/bin/bash + + set -e + + # Create the full config filename + cat /etc/softwareheritage/config.yml > $SWH_CONFIG_FILENAME + # contains required credentials for git loader (with metadata loader inside) + # ignored by the other loaders + cat /etc/credentials/metadata-fetcher/data >> $SWH_CONFIG_FILENAME + + # Install the rabbitmq host information + sed -i 's,##amqp_host##,'$RABBITMQ_HOST',g' $SWH_CONFIG_FILENAME + + echo Starting the swh Celery worker + exec python -m celery \ + --app=swh.scheduler.celery_backend.config.app \ + worker \ + --pool=prefork \ + --concurrency=${CONCURRENCY} \ + --max-tasks-per-child=${MAX_TASKS_PER_CHILD} \ + -Ofair --loglevel=${LOGLEVEL} \ + --without-gossip \ + --without-mingle \ + --hostname "${HOSTNAME}" +{{ end }} +{{- end -}} diff --git a/swh/templates/loaders/deployment.yaml b/swh/templates/loaders/deployment.yaml new file mode 100644 --- /dev/null +++ b/swh/templates/loaders/deployment.yaml @@ -0,0 +1,100 @@ +{{ if .Values.loaders.enabled -}} +{{- range $loader_type, $deployment_config := .Values.loaders.deployments -}} +{{- $loader_name := ( print "loader-" $loader_type ) -}} + +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ $loader_name }} + namespace: {{ $.Values.namespace }} + labels: + app: {{ $loader_name }} +spec: + selector: + matchLabels: + app: {{ $loader_name }} + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 1 + template: + metadata: + labels: + app: {{ $loader_name }} + spec: + containers: + - name: loaders + image: {{ $.Values.swh_loaders_image }}:{{ $.Values.swh_loaders_image_version }} + imagePullPolicy: Always + command: + - /entrypoint.sh + resources: + requests: + memory: {{ get $deployment_config "requestedMemory" | default "512Mi" }} + cpu: {{ get $deployment_config "requestedCpu" | default "500m" }} + limits: + memory: "4000Mi" + cpu: "1200m" + lifecycle: + preStop: + exec: + command: ["kill", "1"] + env: + - name: STATSD_HOST + value: {{ $.Values.statsdExternalHost | default "prometheus-statsd-exporter" }} + - name: STATSD_PORT + value: {{ $.Values.statsdPort | default "9125" | quote }} + - name: CONCURRENCY + value: "1" + - name: MAX_TASKS_PER_CHILD + value: "5" + - name: LOGLEVEL + value: "INFO" + - name: SWH_CONFIG_FILENAME + # FIXME: built by entrypoint.sh, determine how to properly declare this + value: /tmp/config.yml + - name: SWH_SENTRY_ENVIRONMENT + value: {{ $.Values.sentry.environment }} + - name: SWH_MAIN_PACKAGE + value: {{ get $deployment_config "swhpackage" }} + - name: SWH_SENTRY_DSN + valueFrom: + secretKeyRef: + name: {{ $loader_name }}-sentry-secrets + key: sentry-dsn + # 'name' secret must exist & include key "host" + optional: false + - name: RABBITMQ_HOST + valueFrom: + secretKeyRef: + name: amqp-access-credentials + key: host + # 'name' secret must exist & include key "host" + optional: false + volumeMounts: + - name: config + mountPath: /etc/softwareheritage/config.yml + subPath: config.yml + readOnly: true + - name: config + mountPath: /entrypoint.sh + subPath: entrypoint.sh + readOnly: true + - name: metadata-fetcher-credentials + mountPath: /etc/credentials/metadata-fetcher + readOnly: true + - mountPath: /tmp + name: tmp-volume + volumes: + - name: config + configMap: + name: {{ $loader_name }} + defaultMode: 0777 + - name: tmp-volume + emptyDir: {} + - name: metadata-fetcher-credentials + secret: + secretName: metadata-fetcher-credentials +{{ end }} +{{- end -}} diff --git a/worker/templates/autoscale.yaml b/swh/templates/loaders/keda-autoscaling.yaml copy from worker/templates/autoscale.yaml copy to swh/templates/loaders/keda-autoscaling.yaml --- a/worker/templates/autoscale.yaml +++ b/swh/templates/loaders/keda-autoscaling.yaml @@ -1,9 +1,14 @@ +{{ if .Values.loaders.enabled -}} +{{- range $loader_type, $deployment_config := .Values.loaders.deployments -}} +{{ if get $deployment_config "autoScaling" }} +{{- $autoscalingConfig := get $deployment_config "autoScaling" -}} +{{- $loader_name := ( print "loader-" $loader_type ) -}} --- apiVersion: keda.sh/v1alpha1 kind: TriggerAuthentication metadata: name: amqp-authentication - namespace: ns-{{ .Values.loader.name }}-{{ .Values.loader.type }} + namespace: {{ $.Values.namespace }} spec: secretTargetRef: # Optional. - parameter: host @@ -14,22 +19,22 @@ apiVersion: keda.sh/v1alpha1 kind: ScaledObject metadata: - name: loaders-{{ .Values.loader.name }}-{{ .Values.loader.type }}-operators - namespace: ns-{{ .Values.loader.name }}-{{ .Values.loader.type }} + name: {{ $loader_name }}-operators + namespace: {{ $.Values.namespace }} spec: scaleTargetRef: apiVersion: apps/v1 # Optional. Default: apps/v1 kind: Deployment # Optional. Default: Deployment # Mandatory. Must be in same namespace as ScaledObject - name: {{ .Values.loader.name }}-{{ .Values.loader.type }} + name: {{ $loader_name }} # envSourceContainerName: {container-name} # Optional. Default: # .spec.template.spec.containers[0] pollingInterval: 30 # Optional. Default: 30 seconds cooldownPeriod: 300 # Optional. Default: 300 seconds idleReplicaCount: 0 # Optional. Must be less than # minReplicaCount - minReplicaCount: {{ .Values.swh.loader.replicas.min }} # Optional. Default: 0 - maxReplicaCount: {{ .Values.swh.loader.replicas.max }} # Optional. Default: 100 + minReplicaCount: {{ get $autoscalingConfig "minReplicaCount" | default 0 }} + maxReplicaCount: {{ get $autoscalingConfig "maxReplicaCount" | default 5 }} fallback: # Optional. Section to specify fallback # options failureThreshold: 3 # Mandatory if fallback section is @@ -50,7 +55,7 @@ value: 2 periodSeconds: 15 triggers: - {{- range .Values.amqp.queues }} + {{- range $queue := get $deployment_config "task_queues" }} - type: rabbitmq authenticationRef: name: amqp-authentication @@ -63,12 +68,16 @@ # autodetect based on the `host` value. # Default value is auto. mode: QueueLength # QueueLength or MessageRate - value: {{ $.Values.amqp.queue_threshold | quote }} # message backlog or publish/sec. - # target per instance - queueName: {{ . }} + # message backlog or publish/sec. + # target per instance + value: {{ get $autoscalingConfig "queue_threshold" | default 100 }} + queueName: {{ $queue }} vhostName: / # Optional. If not specified, use the vhost in the # `host` connection string. Alternatively, you can # use existing environment variables to read # configuration from: See details in "Parameter # list" section hostFromEnv: RABBITMQ_HOST% {{- end }} +{{ end }} +{{ end }} +{{- end -}} diff --git a/swh/templates/loaders/services.yaml b/swh/templates/loaders/services.yaml new file mode 100644 --- /dev/null +++ b/swh/templates/loaders/services.yaml @@ -0,0 +1,24 @@ +{{ if .Values.loaders.enabled -}} +{{- range $loader_type, $deployment_config := .Values.loaders.deployments -}} +{{- $loader_name := ( print "loader-" $loader_type ) -}} +--- +apiVersion: v1 +kind: Service +metadata: + name: storage-{{ $loader_name }} + namespace: {{ $.Values.namespace }} +spec: + type: ExternalName + externalName: {{ $.Values.loaders.storage.host }} + +--- +apiVersion: v1 +kind: Service +metadata: + name: amqp-{{ $loader_name }} + namespace: {{ $.Values.namespace }} +spec: + type: ExternalName + externalName: {{ $.Values.loaders.amqp.host }} +{{ end }} +{{- end -}} diff --git a/swh/values.yaml b/swh/values.yaml --- a/swh/values.yaml +++ b/swh/values.yaml @@ -45,6 +45,12 @@ # lagThreashold: 1000 # minReplicaCount: 1 # maxReplicaCount: 10 +loaders: + enabled: false + deployments: + # Example of deployments + # git: + statsd_exporter: enabled: false diff --git a/swh/values/default.yaml b/swh/values/default.yaml --- a/swh/values/default.yaml +++ b/swh/values/default.yaml @@ -1,4 +1,6 @@ namespace: swh +sentry: + environment: staging storage_replayer: storageClass: cassandra # only cassandra is currently supported diff --git a/swh/values/staging.yaml b/swh/values/staging.yaml new file mode 100644 --- /dev/null +++ b/swh/values/staging.yaml @@ -0,0 +1,21 @@ +loaders: + enabled: true + storage: + host: storage.internal.staging.swh.network + port: 5002 + amqp: + host: scheduler0.internal.staging.swh.network + deployments: + git: + requestedMemory: 256Mi + requestedCpu: 200m + task_queues: + - swh.loader.git.tasks.UpdateGitRepository + - swh.loader.git.tasks.LoadDiskGitRepository + - swh.loader.git.tasks.UncompressAndLoadDiskGitRepository + autoScaling: + queue_threshold: 5 # spawn worker per increment of `value` messages + minReplicacount: 1 + maxReplicaCount: 3 + swhpackage: swh.loader.git + diff --git a/values-swh-application-versions.yaml b/values-swh-application-versions.yaml --- a/values-swh-application-versions.yaml +++ b/values-swh-application-versions.yaml @@ -3,3 +3,5 @@ swh_storage_replayer_image: softwareheritage/storage-replayer swh_storage_replayer_image_version: "20220819.1" +swh_loaders_image: softwareheritage/loaders +swh_loaders_image_version: 2022-05-17 diff --git a/worker/templates/autoscale.yaml b/worker/templates/autoscale.yaml --- a/worker/templates/autoscale.yaml +++ b/worker/templates/autoscale.yaml @@ -28,8 +28,8 @@ cooldownPeriod: 300 # Optional. Default: 300 seconds idleReplicaCount: 0 # Optional. Must be less than # minReplicaCount - minReplicaCount: {{ .Values.swh.loader.replicas.min }} # Optional. Default: 0 - maxReplicaCount: {{ .Values.swh.loader.replicas.max }} # Optional. Default: 100 + minReplicaCount: {{ .Values.swh.loader.replicas.min | default 0 }} # Optional. Default: 0 + maxReplicaCount: {{ .Values.swh.loader.replicas.max | default 5 }} # Optional. Default: 100 fallback: # Optional. Section to specify fallback # options failureThreshold: 3 # Mandatory if fallback section is