diff --git a/swh/templates/storage-replayer/configmap.yaml b/swh/templates/storage-replayer/configmap.yaml index 0a72206..8cc2eb8 100644 --- a/swh/templates/storage-replayer/configmap.yaml +++ b/swh/templates/storage-replayer/configmap.yaml @@ -1,42 +1,47 @@ {{ if .Values.storage_replayer.enabled -}} {{- range $deployment, $deployment_config := .Values.storage_replayer.deployments -}} --- apiVersion: v1 kind: ConfigMap metadata: namespace: {{ $.Values.namespace }} name: storage-replayer-configuration-{{ $deployment }}-template data: config.yml.template: | storage: cls: {{ $.Values.storage_replayer.storageClass }} hosts: {{- range $seed := $.Values.storage_replayer.cassandra.seeds }} - {{ $seed }} {{- end }} keyspace: {{ $.Values.storage_replayer.cassandra.keySpace }} consistency_level: {{ $.Values.storage_replayer.cassandra.consistencyLevel }} + {{- if $deployment_config.specific_options -}} + {{- range $option, $value := $deployment_config.specific_options }} + {{ $option }}: {{ $value }} + {{- end }} + {{- end }} objstorage: cls: noop journal_client: cls: kafka brokers: {{- range $broker := $.Values.storage_replayer.journalBrokers.hosts }} - {{ $broker }} {{- end }} sasl.username: {{ $.Values.storage_replayer.journalBrokers.user }} sasl.password: ${BROKER_USER_PASSWORD} security.protocol: sasl_ssl sasl.mechanism: SCRAM-SHA-512 # The prefix must match the username group_id: {{ $.Values.storage_replayer.journalBrokers.user }}-cassandra-replayer-{{ $deployment }} batch_size: {{ get $deployment_config "batchSize" | default "200" }} message.max.bytes: {{ $.Values.storage_replayer.maxMessagesBytes }} privileged: {{ get $deployment_config "privileged" | default "false" }} object_types: {{- range $object := get $deployment_config "objects" }} - {{ $object }} {{- end }} {{ end }} {{- end -}} diff --git a/swh/templates/storage-replayer/deployment.yaml b/swh/templates/storage-replayer/deployment.yaml index fa871b0..6927749 100644 --- a/swh/templates/storage-replayer/deployment.yaml +++ b/swh/templates/storage-replayer/deployment.yaml @@ -1,97 +1,97 @@ {{ if .Values.storage_replayer.enabled -}} {{- $configurationChecksum := include (print $.Template.BasePath "/storage-replayer/configmap.yaml") . -}} {{- range $deployment, $deployment_config := .Values.storage_replayer.deployments -}} --- apiVersion: apps/v1 kind: Deployment metadata: namespace: {{ $.Values.namespace }} name: storage-replayer-{{ $deployment }} labels: app: storage-replayer-{{ $deployment }} spec: revisionHistoryLimit: 2 selector: matchLabels: app: storage-replayer-{{ $deployment }} template: metadata: labels: app: storage-replayer-{{ $deployment }} annotations: checksum/config: {{ $configurationChecksum | sha256sum }} spec: {{- if $.Values.storage_replayer.affinity }} affinity: - {{ toYaml $.Values.storage_replayer.affinity | nindent 8 }} + {{- toYaml $.Values.storage_replayer.affinity | nindent 8 }} {{- end }} initContainers: - name: prepare-configuration image: debian:bullseye imagePullPolicy: Always envFrom: - secretRef: name: {{ $.Values.storage_replayer.journalBrokers.secretName }} command: - /bin/bash args: - -c - eval echo "\"$( /etc/swh/config.yml volumeMounts: - name: configuration mountPath: /etc/swh - name: configuration-template mountPath: /etc/swh/configuration-template {{- if $.Values.storage_replayer.cassandra.initKeyspace }} - name: init-database image: {{ $.Values.swh_storage_replayer_image }}:{{ $.Values.swh_storage_replayer_image_version }} imagePullPolicy: Always command: - /bin/bash args: - -c - eval "echo \"from swh.storage.cassandra import create_keyspace; create_keyspace(['{{ first $.Values.storage_replayer.cassandra.seeds }}'], 'swh')\" | python3" {{- end }} containers: - name: storage-replayer resources: requests: memory: {{ get $deployment_config "requestedMemory" | default "512Mi" }} cpu: {{ get $deployment_config "requestedCpu" | default "500m" }} image: {{ $.Values.swh_storage_replayer_image }}:{{ $.Values.swh_storage_replayer_image_version }} command: - /bin/bash args: - -c - /opt/swh/entrypoint.sh env: - name: STATSD_HOST value: {{ $.Values.statsdExternalHost | default "prometheus-statsd-exporter" }} - name: STATSD_PORT value: {{ $.Values.statsdPort | default "9125" | quote }} - name: SWH_SENTRY_ENVIRONMENT value: {{ $.Values.sentry.environment }} - name: SWH_MAIN_PACKAGE value: swh.storage - name: SWH_SENTRY_DSN valueFrom: secretKeyRef: name: storage-replayer-sentry-secrets key: sentry-dsn # 'name' secret must exist & include key "host" optional: true imagePullPolicy: Always volumeMounts: - name: configuration mountPath: /etc/swh volumes: - name: configuration emptyDir: {} - name: configuration-template configMap: name: storage-replayer-configuration-{{ $deployment }}-template items: - key: "config.yml.template" path: "config.yml.template" {{ end }} {{- end -}} diff --git a/swh/values/cassandra-replay.yaml b/swh/values/cassandra-replay.yaml new file mode 100644 index 0000000..f8687eb --- /dev/null +++ b/swh/values/cassandra-replay.yaml @@ -0,0 +1,153 @@ +namespace: cassandra-replay +sentry: + environment: production + +storage_replayer: + enabled: true + + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: "swh/replayer" + operator: In + values: + - "true" + journalBrokers: + hosts: + - kafka1.internal.softwareheritage.org:9094 + - kafka2.internal.softwareheritage.org:9094 + - kafka3.internal.softwareheritage.org:9094 + - kafka4.internal.softwareheritage.org:9094 + user: swh-cassandra-replayer-prod + cassandra: + initKeyspace: true # only to bootstrap a new cassandra database + seeds: + - cassandra01.internal.softwareheritage.org + - cassandra02.internal.softwareheritage.org + - cassandra03.internal.softwareheritage.org + - cassandra04.internal.softwareheritage.org + - cassandra05.internal.softwareheritage.org + - cassandra06.internal.softwareheritage.org + deployments: + # content: + # objects: + # - content + # requestedCpu: 425m + # requestedMemory: 200Mi + # autoScaling: + # maxReplicaCount: 0 + # minReplicaCount: 0 + # directory: + # objects: + # - directory + # batchSize: 250 + # requestedCpu: 350m + # requestedMemory: 250Mi + # autoScaling: + # maxReplicaCount: 20 + # specific_options: + # directory_entries_insert_algo: batch + extid: + objects: + - extid + batchSize: 1000 + # Full replay + #requestedCpu: 400m + requestedMemory: 200Mi + #Follow up consumption + requestedCpu: 50m + autoScaling: + maxReplicaCount: 5 + metadata: + objects: + - metadata_authority + - metadata_fetcher + # follow up consumption + requestedCpu: 50m + requestedMemory: 100Mi + autoScaling: + maxReplicaCount: 5 + raw-extrinsic-metadata: + objects: + - raw_extrinsic_metadata + batchSize: 250 + # Full replay + #requestedCpu: 400m + requestedMemory: 200Mi + # follow up consumption + requestedCpu: 50m + autoScaling: + maxReplicaCount: 5 + origin: + objects: + - origin + batchSize: 1000 + # Full replay + #requestedCpu: 400m + requestedMemory: 200Mi + #Follow up consumption + requestedCpu: 50m + autoScaling: + maxReplicaCount: 5 + origin-visit: + objects: + - origin_visit + batchSize: 1000 + # Full replay + # requestedCpu: 400m + # requestedMemory: 400Mi + #Follow up consumption + requestedCpu: 100m + requestedMemory: 100Mi + autoScaling: + maxReplicaCount: 5 + origin-visit-status: + objects: + - origin_visit_status + batchSize: 1000 + requestedCpu: 500m + requestedMemory: 300Mi + autoScaling: + maxReplicaCount: 64 + release: + objects: + - release + batchSize: 1000 + privileged: true + # Full replay + #requestedCpu: 600m + requestedMemory: 300Mi + # follow up consumption + requestedCpu: 50m + autoScaling: + maxReplicaCount: 5 + revision: + objects: + - revision + batchSize: 1000 + privileged: true + requestedCpu: 1000m + requestedMemory: 300Mi + autoScaling: + maxReplicaCount: 10 + skipped-content: + objects: + - skipped_content + batchSize: 100 + # Full replay + #requestedCpu: 300m + requestedMemory: 400Mi + # follow up consumption + requestedCpu: 50m + autoScaling: + maxReplicaCount: 5 + snapshot: + objects: + - snapshot + batchSize: 250 + requestedCpu: 400m + requestedMemory: 250Mi + autoScaling: + maxReplicaCount: 10 diff --git a/swh/values/production.yaml b/swh/values/production.yaml deleted file mode 100644 index c07f79e..0000000 --- a/swh/values/production.yaml +++ /dev/null @@ -1,85 +0,0 @@ -sentry: - environment: production - -storage_replayer: - enabled: true - journalBrokers: - hosts: - - kafka1.internal.softwareheritage.org:9094 - - kafka2.internal.softwareheritage.org:9094 - - kafka3.internal.softwareheritage.org:9094 - - kafka4.internal.softwareheritage.org:9094 - user: swh-cassandra-replayer-prod - cassandra: - initKeyspace: true # only to bootstrap a new cassandra database - seeds: - - cassandra01.internal.softwareheritage.org - - cassandra02.internal.softwareheritage.org - - cassandra03.internal.softwareheritage.org - - cassandra04.internal.softwareheritage.org - - cassandra05.internal.softwareheritage.org - - cassandra06.internal.softwareheritage.org - deployments: - directory: - objects: - - directory - batchSize: 250 - requestedCpu: 500m - requestedMemory: 150Mi - autoScaling: - maxReplicaCount: 24 - origin: - objects: - - origin - batchSize: 1000 - autoScaling: - maxReplicaCount: 10 - origin-visit: - objects: - - origin_visit - batchSize: 1000 - requestedCpu: 400m - requestedMemory: 400Mi - autoScaling: - maxReplicaCount: 20 - origin-visit-status: - objects: - - origin_visit_status - batchSize: 1000 - requestedCpu: 500m - requestedMemory: 300Mi - autoScaling: - maxReplicaCount: 30 - release: - objects: - - release - batchSize: 1000 - privileged: true - requestedCpu: 600m - requestedMemory: 300Mi - autoScaling: - maxReplicaCount: 10 - revision: - objects: - - release - batchSize: 1000 - privileged: true - requestedCpu: 1000m - requestedMemory: 300Mi - autoScaling: - maxReplicaCount: 10 - skipped-content: - objects: - - skipped_content - batchSize: 100 - autoScaling: - maxReplicaCount: 5 - snapshot: - objects: - - snapshot - batchSize: 250 - requestedCpu: 450m - requestedMemory: 250Mi - autoScaling: - maxReplicaCount: 24 -