diff --git a/swh/Chart.yaml b/swh/Chart.yaml new file mode 100644 --- /dev/null +++ b/swh/Chart.yaml @@ -0,0 +1,24 @@ +apiVersion: v2 +name: swh +description: A Helm chart to deploy the softwareheritage stack + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.1.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +# It is recommended to use it with quotes. +appVersion: 0.1.0 diff --git a/swh/Readme.md b/swh/Readme.md new file mode 100644 diff --git a/swh/templates/statsd-exporter/configmap.yaml b/swh/templates/statsd-exporter/configmap.yaml new file mode 100644 --- /dev/null +++ b/swh/templates/statsd-exporter/configmap.yaml @@ -0,0 +1,65 @@ +{{ if .Values.statsd_exporter.enabled -}} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: prometheus-statsd-exporter + namespace: {{ .Values.namespace }} +data: + config.yml: | + defaults: + timer_type: histogram + buckets: + - .005 + - .01 + - .025 + - .05 + - .1 + - .25 + - .5 + - .75 + - 1 + - 2 + - 5 + - 10 + - 15 + - 30 + - 45 + - 60 + - 120 + - 300 + - 600 + - 900 + - 1800 + - 2700 + - 3600 + - 7200 + mappings: + - match: "(.*_percent)" + name: "${1}" + match_type: regex + observer_type: histogram + histogram_options: + buckets: + - 0.0 + - 0.05 + - 0.1 + - 0.15 + - 0.2 + - 0.25 + - 0.3 + - 0.35 + - 0.4 + - 0.45 + - 0.5 + - 0.55 + - 0.6 + - 0.65 + - 0.7 + - 0.75 + - 0.8 + - 0.85 + - 0.9 + - 0.95 + - 1. +{{- end -}} diff --git a/swh/templates/statsd-exporter/deployment.yaml b/swh/templates/statsd-exporter/deployment.yaml new file mode 100644 --- /dev/null +++ b/swh/templates/statsd-exporter/deployment.yaml @@ -0,0 +1,37 @@ +{{ if .Values.statsd_exporter.enabled -}} +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: prometheus-statsd-exporter + namespace: {{ .Values.namespace }} + labels: + app: prometheus-statsd-exporter +spec: + replicas: 1 + selector: + matchLabels: + app: prometheus-statsd-exporter + template: + metadata: + labels: + app: prometheus-statsd-exporter + spec: + containers: + - name: prometheus-statsd-exporter + image: {{ .Values.statsd_exporter.image }}:{{ .Values.statsd_exporter.imageVersion }} + imagePullPolicy: Always + args: + - "--statsd.mapping-config=/etc/prometheus/statsd-mapping.yml" + ports: + - containerPort: 9125 + volumeMounts: + - name: config + mountPath: /etc/prometheus/statsd-mapping.yml + subPath: config.yml + readOnly: true + volumes: + - name: config + configMap: + name: prometheus-statsd-exporter +{{- end -}} diff --git a/swh/templates/statsd-exporter/service.yaml b/swh/templates/statsd-exporter/service.yaml new file mode 100644 --- /dev/null +++ b/swh/templates/statsd-exporter/service.yaml @@ -0,0 +1,22 @@ +{{ if .Values.statsd_exporter.enabled -}} +--- +apiVersion: v1 +kind: Service +metadata: + name: prometheus-statsd-exporter + namespace: {{ .Values.namespace }} + labels: + app: prometheus-statsd-exporter +spec: + type: ClusterIP + selector: + app: prometheus-statsd-exporter + ports: + - name: statsd + port: 9125 + targetPort: 9125 + protocol: UDP + - name: http + port: 9102 + targetPort: 9102 +{{- end -}} diff --git a/swh/templates/statsd-exporter/servicemonitor.yaml b/swh/templates/statsd-exporter/servicemonitor.yaml new file mode 100644 --- /dev/null +++ b/swh/templates/statsd-exporter/servicemonitor.yaml @@ -0,0 +1,18 @@ +{{ if .Values.statsd_exporter.enabled -}} +--- +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: swh-statsd-exporter + namespace: {{ .Values.namespace }} +spec: + endpoints: + - path: /metrics + port: http + interval: 10s + selector: + matchLabels: + app: prometheus-statsd-exporter + namespaceSelector: + any: true +{{- end -}} diff --git a/swh/templates/storage-replayer/configmap.yaml b/swh/templates/storage-replayer/configmap.yaml new file mode 100644 --- /dev/null +++ b/swh/templates/storage-replayer/configmap.yaml @@ -0,0 +1,42 @@ +{{ if .Values.storage_replayer.enabled -}} +{{- range $deployment, $deployment_config := .Values.storage_replayer.deployments -}} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + namespace: {{ $.Values.namespace }} + name: storage-replayer-configuration-{{ $deployment }}-template +data: + config.yml.template: | + storage: + cls: {{ $.Values.storage_replayer.storageClass }} + hosts: + {{- range $seed := $.Values.storage_replayer.cassandra.seeds }} + - {{ $seed }} + {{- end }} + keyspace: {{ $.Values.storage_replayer.cassandra.keySpace }} + consistency_level: {{ $.Values.storage_replayer.cassandra.consistencyLevel }} + objstorage: + cls: noop + + journal_client: + cls: kafka + brokers: + {{- range $broker := $.Values.storage_replayer.journalBrokers.hosts }} + - {{ $broker }} + {{- end }} + sasl.username: {{ $.Values.storage_replayer.journalBrokers.user }} + sasl.password: ${BROKER_USER_PASSWORD} + security.protocol: sasl_ssl + sasl.mechanism: SCRAM-SHA-512 + # The prefix must match the username + group_id: {{ $.Values.storage_replayer.journalBrokers.user }}-cassandra-replayer-{{ $deployment }} + batch_size: {{ get $deployment_config "batchSize" | default "200" }} + message.max.bytes: {{ $.Values.storage_replayer.maxMessagesBytes }} + privileged: {{ get $deployment_config "privileged" | default "false" }} + object_types: + {{- range $object := get $deployment_config "objects" }} + - {{ $object }} + {{- end }} +{{ end }} +{{- end -}} diff --git a/swh/templates/storage-replayer/deployment.yaml b/swh/templates/storage-replayer/deployment.yaml new file mode 100644 --- /dev/null +++ b/swh/templates/storage-replayer/deployment.yaml @@ -0,0 +1,86 @@ +{{ if .Values.storage_replayer.enabled -}} +{{- $configurationChecksum := include (print $.Template.BasePath "/storage-replayer/configmap.yaml") . -}} +{{- range $deployment, $deployment_config := .Values.storage_replayer.deployments -}} +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + namespace: {{ $.Values.namespace }} + name: storage-replayer-{{ $deployment }} + labels: + app: storage-replayer-{{ $deployment }} + annotations: + checksum/config: {{ $configurationChecksum | sha256sum }} +spec: + revisionHistoryLimit: 3 + selector: + matchLabels: + app: storage-replayer-{{ $deployment }} + template: + metadata: + labels: + app: storage-replayer-{{ $deployment }} + spec: + {{- if $.Values.storage_replayer.affinity }} + affinity: + {{ toYaml $.Values.storage_replayer.affinity | nindent 8 }} + {{- end }} + initContainers: + - name: prepare-configuration + image: debian:bullseye + imagePullPolicy: Always + envFrom: + - secretRef: + name: {{ $.Values.storage_replayer.journalBrokers.secretName }} + command: + - /bin/bash + args: + - -c + - eval echo "\"$( /etc/swh/config.yml + volumeMounts: + - name: configuration + mountPath: /etc/swh + - name: configuration-template + mountPath: /etc/swh/configuration-template + {{- if $.Values.storage_replayer.cassandra.initKeyspace }} + - name: init-database + image: {{ $.Values.swh_storage_replayer_image }}:{{ $.Values.swh_storage_replayer_image_version }} + imagePullPolicy: Always + command: + - /bin/bash + args: + - -c + - eval "echo \"from swh.storage.cassandra import create_keyspace; create_keyspace(['{{ first $.Values.storage_replayer.cassandra.seeds }}'], 'swh')\" | python3" + {{- end }} + containers: + - name: storage-replayer + resources: + requests: + memory: {{ get $deployment_config "requestedMemory" | default "512Mi" }} + cpu: {{ get $deployment_config "requestedCpu" | default "500m" }} + image: {{ $.Values.swh_storage_replayer_image }}:{{ $.Values.swh_storage_replayer_image_version }} + command: + - /bin/bash + args: + - -c + - /opt/swh/entrypoint.sh + env: + - name: STATSD_HOST + value: {{ $.Values.statsdExternalHost | default "prometheus-statsd-exporter" }} + - name: STATSD_PORT + value: {{ $.Values.statsdPort | default "9125" | quote }} + imagePullPolicy: Always + volumeMounts: + - name: configuration + mountPath: /etc/swh + volumes: + - name: configuration + emptyDir: {} + - name: configuration-template + configMap: + name: storage-replayer-configuration-{{ $deployment }}-template + items: + - key: "config.yml.template" + path: "config.yml.template" +{{ end }} +{{- end -}} diff --git a/swh/templates/storage-replayer/keda-autoscaling.yaml b/swh/templates/storage-replayer/keda-autoscaling.yaml new file mode 100644 --- /dev/null +++ b/swh/templates/storage-replayer/keda-autoscaling.yaml @@ -0,0 +1,28 @@ +{{ if .Values.storage_replayer.enabled -}} +{{- range $deployment, $deployment_config := .Values.storage_replayer.deployments -}} +{{ if get $deployment_config "autoScaling" }} +{{- $autoscalingConfig := get $deployment_config "autoScaling" -}} +--- +apiVersion: keda.sh/v1alpha1 +kind: ScaledObject +metadata: + name: storage-replayer-{{ $deployment }}-scaledobject + namespace: {{ $.Values.namespace }} +spec: + scaleTargetRef: + name: storage-replayer-{{ $deployment }} + pollingInterval: {{ get $autoscalingConfig "poolInterval" | default 120 }} + minReplicaCount: {{ get $autoscalingConfig "minReplicaCount" | default 1 }} + maxReplicaCount: {{ get $autoscalingConfig "maxReplicaCount" | default 5 }} + triggers: + - type: kafka + metadata: + bootstrapServers: {{ first $.Values.storage_replayer.journalBrokers.hosts }} + consumerGroup: {{ $.Values.storage_replayer.journalBrokers.user }}-cassandra-replayer-{{ $deployment }} + lagThreshold: {{ get $autoscalingConfig "lagThreshold" | default 1000 | quote }} + offsetResetPolicy: earliest + authenticationRef: + name: keda-storage-replayer-trigger-authentication +{{ end }} +{{ end }} +{{- end -}} diff --git a/swh/templates/storage-replayer/keda-secrets.yaml b/swh/templates/storage-replayer/keda-secrets.yaml new file mode 100644 --- /dev/null +++ b/swh/templates/storage-replayer/keda-secrets.yaml @@ -0,0 +1,33 @@ +{{ if .Values.storage_replayer.enabled -}} +--- +apiVersion: v1 +kind: Secret +metadata: + name: keda-storage-replayer-kafka-secrets + namespace: {{ .Values.namespace }} +type: Opaque +stringData: + sasl: "scram_sha512" + username: {{ .Values.storage_replayer.journalBrokers.user }} + tls: "enable" +--- +apiVersion: keda.sh/v1alpha1 +kind: TriggerAuthentication +metadata: + name: keda-storage-replayer-trigger-authentication + namespace: {{ .Values.namespace }} +spec: + secretTargetRef: + - parameter: sasl + name: keda-storage-replayer-kafka-secrets + key: sasl + - parameter: username + name: keda-storage-replayer-kafka-secrets + key: username + - parameter: tls + name: keda-storage-replayer-kafka-secrets + key: tls + - parameter: password + name: {{ .Values.storage_replayer.journalBrokers.secretName }} + key: BROKER_USER_PASSWORD +{{- end -}} diff --git a/swh/values.yaml b/swh/values.yaml new file mode 100644 --- /dev/null +++ b/swh/values.yaml @@ -0,0 +1,53 @@ +namespace: swh + +# Example of services activation +# Only implemented for storage_replayer for now +#storage: +# enabled: false +# +#graphql: +# enabled: false + +storage_replayer: + enabled: false + storageClass: cassandra # only cassandra is currently supported + maxMessagesBytes: "524288000" + journalBrokers: + # The name of the secret containing the BROKER_USER_PASSWORD value + secretName: storage-replayer-broker-secret + hosts: + - broker1 + - broker2 + user: myuser + cassandra: + initKeyspace: false # useful to ensure the cassandra database is bootstrapped + seeds: + - seed1 + - seed2 + keySpace: swh + consistencyLevel: LOCAL_QUORUM + deployments: + # Example of deployments + # origins: + # privileged: false + # objects: + # - origin + # - origin_visits + # - origin_visits_status + # batchSize: 250 + # revisions: + # privileged: false + # objects: + # - revision + # batchSize: 1000 + # autoScaling: + # poolInterval: 120 + # lagThreashold: 1000 + # minReplicaCount: 1 + # maxReplicaCount: 10 + +statsd_exporter: + enabled: false + image: prom/statsd-exporter + imageVersion: "v0.22.7" + diff --git a/swh/values/default.yaml b/swh/values/default.yaml new file mode 100644 --- /dev/null +++ b/swh/values/default.yaml @@ -0,0 +1,23 @@ +namespace: swh + +storage_replayer: + storageClass: cassandra # only cassandra is currently supported + maxMessagesBytes: "524288000" + journalBrokers: + # The name of the secret containing the BROKER_USER_PASSWORD value + secretName: storage-replayer-broker-secret + cassandra: + keySpace: swh + consistencyLevel: LOCAL_QUORUM + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node-role.kubernetes.io/etcd + operator: NotIn + values: + - "true" + +statsd_exporter: + enabled: true diff --git a/swh/values/production.yaml b/swh/values/production.yaml new file mode 100644 --- /dev/null +++ b/swh/values/production.yaml @@ -0,0 +1,82 @@ +storage_replayer: + enabled: true + journalBrokers: + hosts: + - kafka1.internal.softwareheritage.org:9094 + - kafka2.internal.softwareheritage.org:9094 + - kafka3.internal.softwareheritage.org:9094 + - kafka4.internal.softwareheritage.org:9094 + user: swh-cassandra-replayer-prod + cassandra: + initKeyspace: true # only to bootstrap a new cassandra database + seeds: + - cassandra01.internal.softwareheritage.org + - cassandra02.internal.softwareheritage.org + - cassandra03.internal.softwareheritage.org + - cassandra04.internal.softwareheritage.org + - cassandra05.internal.softwareheritage.org + - cassandra06.internal.softwareheritage.org + deployments: + directory: + objects: + - directory + batchSize: 250 + requestedCpu: 500m + requestedMemory: 150Mi + autoScaling: + maxReplicaCount: 24 + origin: + objects: + - origin + batchSize: 1000 + autoScaling: + maxReplicaCount: 10 + origin-visit: + objects: + - origin_visit + batchSize: 1000 + requestedCpu: 400m + requestedMemory: 400Mi + autoScaling: + maxReplicaCount: 20 + origin-visit-status: + objects: + - origin_visit_status + batchSize: 1000 + requestedCpu: 500m + requestedMemory: 300Mi + autoScaling: + maxReplicaCount: 30 + release: + objects: + - release + batchSize: 1000 + privileged: true + requestedCpu: 600m + requestedMemory: 300Mi + autoScaling: + maxReplicaCount: 10 + revision: + objects: + - release + batchSize: 1000 + privileged: true + requestedCpu: 1000m + requestedMemory: 300Mi + autoScaling: + maxReplicaCount: 10 + skipped-content: + objects: + - skipped_content + batchSize: 100 + autoScaling: + maxReplicaCount: 5 + snapshot: + objects: + - snapshot + batchSize: 250 + requestedCpu: 450m + requestedMemory: 250Mi + autoScaling: + maxReplicaCount: 24 + diff --git a/values-swh-application-versions.yaml b/values-swh-application-versions.yaml new file mode 100644 --- /dev/null +++ b/values-swh-application-versions.yaml @@ -0,0 +1,5 @@ +# This file references the last version of all the softwareheritage images +# It's used to manage the automatic update of the environments + +swh_storage_replayer_image: softwareheritage/storage-replayer +swh_storage_replayer_image_version: "20220819.1"