diff --git a/worker/.gitignore b/worker/.gitignore new file mode 100644 index 0000000..136a928 --- /dev/null +++ b/worker/.gitignore @@ -0,0 +1,2 @@ +*.secret.yaml + diff --git a/worker/.helmignore b/worker/.helmignore new file mode 100644 index 0000000..0e8a0eb --- /dev/null +++ b/worker/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/worker/Chart.yaml b/worker/Chart.yaml new file mode 100644 index 0000000..dc9e2eb --- /dev/null +++ b/worker/Chart.yaml @@ -0,0 +1,24 @@ +apiVersion: v2 +name: worker +description: A Helm chart to deploy dynamic workers on Kubernetes cluster + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.1.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +# It is recommended to use it with quotes. +appVersion: "1.16.0" diff --git a/worker/README.md b/worker/README.md new file mode 100644 index 0000000..d602318 --- /dev/null +++ b/worker/README.md @@ -0,0 +1,138 @@ +# Goal + +- autoscaling workers depending on repositories to load and allocated resources. + +# keda + +This uses KEDA - K(ubernetes) E(vents)-D(riven) A(utoscaling): +``` +$ helm repo add kedacore https://kedacore.github.io/charts +$ helm repo update +swhworker@poc-rancher:~$ kubectl create namespace keda +namespace/keda created +swhworker@poc-rancher:~$ helm install keda kedacore/keda --namespace keda +NAME: keda +LAST DEPLOYED: Fri Oct 8 09:48:40 2021 +NAMESPACE: keda +STATUS: deployed +REVISION: 1 +TEST SUITE: None +``` +source: https://keda.sh/docs/2.4/deploy/ + +# helm + +We use helm to ease the cluster application management. + +# Install + +Install the worker declaration from this directory in the cluster +``` +$ export KUBECONFIG=export KUBECONFIG=staging-workers.yaml +$ TYPE=git; REL=workers-$TYPE; \ + helm install -f ./loader-$TYPE.staging.values.yaml $REL ../worker +$ TYPE=pypi; REL=workers-$TYPE; \ + helm install -f ./loader-$TYPE.staging.values.yaml $REL ../worker +``` + +Where: +``` +$ cat ../loader-git.staging.values.yaml +# Default values for worker. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +amqp: + host: scheduler0.internal.staging.swh.network + queue_threshold: 10 # spawn worker per increment of `value` messages + queues: + - swh.loader.git.tasks.UpdateGitRepository + - swh.loader.git.tasks.LoadDiskGitRepository + - swh.loader.git.tasks.UncompressAndLoadDiskGitRepository + +storage: + host: storage1.internal.staging.swh.network + +loader: + name: loaders + type: git +``` + +# List + +List currently deployed applications: + +``` +$ helm list +helm list +NAME NAMESPACE REVISION UPDATED STATUS CHART APP VERSION +workers-bzr default 1 2022-04-29 12:59:32.111950055 +0200 CEST deployed worker-0.1.0 1.16.0 +workers-git default 4 2022-04-29 12:50:12.322826487 +0200 CEST deployed worker-0.1.0 1.16.0 +workers-pypi default 1 2022-04-29 12:51:22.506259018 +0200 CEST deployed worker-0.1.0 1.16.0 +``` + +# Upgrade + +When adapting the worker definition, you can apply the changes by upgrading the +deployed application: + +``` +$ TYPE=git; REL=workers-$TYPE; \ + helm upgrade -f ./loader-$TYPE.staging.values.yaml $REL ../worker +``` + +# Secrets + +The current work requires credentials (installed as secret within the cluster): +- metadata fetcher credentials `metadata-fetcher-credentials` +- amqp credentials `` + +More details describing the secrets: +``` +$ kubectl describe secret metadata-fetcher-credentials +``` + +Installed through: + +``` +$ TYPE=git # Replace mentions below in the yaml files +$ kubectl -f $SECRET_FILE apply --namespaces ns-loader-$TYPE +# for secret file in { +# instances/loaders-$TYPE-metadata-fetcher-credentials.secret.yaml +# ./loader-$TYPE-sentry.secret.yaml +# ./amqp-access-credentials.secret.yaml +# ... +# } +$ cat instances/loaders-metadata-fetcher.secret.yaml +apiVersion: v1 +kind: Secret +metadata: + name: metadata-fetcher-credentials +type: Opaque +stringData: + data: | + metadata_fetcher_credentials: + github: + github: + - username: + password: + - ... +$ cat ./amqp-access-credentials.secret.yaml +apiVersion: v1 +kind: Secret +metadata: + name: amqp-access-credentials + namespace: ns-loaders-$TYPE +type: Opaque +stringData: + host: amqp://:@scheduler0.internal.staging.swh.network:5672/%2f +$ cat ./loaders-$TYPE-sentry.secrets.yaml +apiVersion: v1 +kind: Secret +metadata: + name: loaders-$TYPE-sentry-secrets + namespace: ns-loaders-$TYPE +type: Opaque +stringData: + sentry-dsn: https://@sentry.softwareheritage.org/8 +``` diff --git a/worker/instances/loaders-bzr.staging.values.yaml b/worker/instances/loaders-bzr.staging.values.yaml new file mode 100644 index 0000000..896c426 --- /dev/null +++ b/worker/instances/loaders-bzr.staging.values.yaml @@ -0,0 +1,16 @@ +# Default values for worker. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +amqp: + host: scheduler0.internal.staging.swh.network + queue_threshold: 10 # spawn worker per increment of `value` messages + queues: + - swh.loader.bzr.tasks.LoadBazaar + +storage: + host: storage1.internal.staging.swh.network + +loader: + name: loaders + type: bzr diff --git a/worker/instances/loaders-cvs.staging.values.yaml b/worker/instances/loaders-cvs.staging.values.yaml new file mode 100644 index 0000000..fe3594b --- /dev/null +++ b/worker/instances/loaders-cvs.staging.values.yaml @@ -0,0 +1,16 @@ +# Default values for worker. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +amqp: + host: scheduler0.internal.staging.swh.network + queue_threshold: 10 # spawn worker per increment of `value` messages + queues: + - swh.loader.cvs.tasks.LoadCvsRepository + +storage: + host: storage1.internal.staging.swh.network + +loader: + name: loaders + type: cvs diff --git a/worker/instances/loaders-git.staging.values.yaml b/worker/instances/loaders-git.staging.values.yaml new file mode 100644 index 0000000..a0f8187 --- /dev/null +++ b/worker/instances/loaders-git.staging.values.yaml @@ -0,0 +1,27 @@ +# Default values for worker. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +amqp: + host: scheduler0.internal.staging.swh.network + queue_threshold: 5 # spawn worker per increment of `value` messages + queues: + - swh.loader.git.tasks.UpdateGitRepository + - swh.loader.git.tasks.LoadDiskGitRepository + - swh.loader.git.tasks.UncompressAndLoadDiskGitRepository + +storage: + host: storage1.internal.staging.swh.network + +loader: + name: loaders + type: git + +swh: + loader: + replicas: + min: 1 + max: 10 + +sentry: + swhpackage: swh.loader.git diff --git a/worker/instances/loaders-maven.staging.values.yaml b/worker/instances/loaders-maven.staging.values.yaml new file mode 100644 index 0000000..c7a3c67 --- /dev/null +++ b/worker/instances/loaders-maven.staging.values.yaml @@ -0,0 +1,16 @@ +# Default values for worker. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +amqp: + host: scheduler0.internal.staging.swh.network + queue_threshold: 10 # spawn worker per increment of `value` messages + queues: + - swh.loader.package.maven.tasks.LoadMaven + +storage: + host: storage1.internal.staging.swh.network + +loader: + name: loaders + type: maven diff --git a/worker/instances/loaders-npm.staging.values.yaml b/worker/instances/loaders-npm.staging.values.yaml new file mode 100644 index 0000000..8143f51 --- /dev/null +++ b/worker/instances/loaders-npm.staging.values.yaml @@ -0,0 +1,16 @@ +# Default values for worker. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +amqp: + host: scheduler0.internal.staging.swh.network + queue_threshold: 10 # spawn worker per increment of `value` messages + queues: + - swh.loader.package.npm.tasks.LoadNpm + +storage: + host: storage1.internal.staging.swh.network + +loader: + name: loaders + type: npm diff --git a/worker/instances/loaders-pypi.staging.values.yaml b/worker/instances/loaders-pypi.staging.values.yaml new file mode 100644 index 0000000..9b040b3 --- /dev/null +++ b/worker/instances/loaders-pypi.staging.values.yaml @@ -0,0 +1,17 @@ +# Default values for worker. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +amqp: + host: scheduler0.internal.staging.swh.network + queue_threshold: 10 # spawn worker per increment of `value` messages + queues: + - swh.loader.package.pypi.tasks.LoadPyPI + +storage: + host: storage1.internal.staging.swh.network + +loader: + name: loaders + type: pypi + diff --git a/worker/instances/loaders-svn.staging.values.yaml b/worker/instances/loaders-svn.staging.values.yaml new file mode 100644 index 0000000..6bd88a4 --- /dev/null +++ b/worker/instances/loaders-svn.staging.values.yaml @@ -0,0 +1,18 @@ +# Default values for worker. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +amqp: + host: scheduler0.internal.staging.swh.network + queue_threshold: 10 # spawn worker per increment of `value` messages + queues: + - swh.loader.svn.tasks.LoadSvnRepository + - swh.loader.svn.tasks.MountAndLoadSvnRepository + - swh.loader.svn.tasks.DumpMountAndLoadSvnRepository + +storage: + host: storage1.internal.staging.swh.network + +loader: + name: loaders + type: svn diff --git a/worker/templates/autoscale.yaml b/worker/templates/autoscale.yaml new file mode 100644 index 0000000..a2fe028 --- /dev/null +++ b/worker/templates/autoscale.yaml @@ -0,0 +1,74 @@ +--- +apiVersion: keda.sh/v1alpha1 +kind: TriggerAuthentication +metadata: + name: amqp-authentication + namespace: ns-{{ .Values.loader.name }}-{{ .Values.loader.type }} +spec: + secretTargetRef: # Optional. + - parameter: host + name: amqp-access-credentials + key: host + +--- +apiVersion: keda.sh/v1alpha1 +kind: ScaledObject +metadata: + name: loaders-{{ .Values.loader.name }}-{{ .Values.loader.type }}-operators + namespace: ns-{{ .Values.loader.name }}-{{ .Values.loader.type }} +spec: + scaleTargetRef: + apiVersion: apps/v1 # Optional. Default: apps/v1 + kind: Deployment # Optional. Default: Deployment + # Mandatory. Must be in same namespace as ScaledObject + name: {{ .Values.loader.name }}-{{ .Values.loader.type }} + # envSourceContainerName: {container-name} # Optional. Default: + # .spec.template.spec.containers[0] + pollingInterval: 30 # Optional. Default: 30 seconds + cooldownPeriod: 300 # Optional. Default: 300 seconds + idleReplicaCount: 0 # Optional. Must be less than + # minReplicaCount + minReplicaCount: {{ .Values.swh.loader.replicas.min }} # Optional. Default: 0 + maxReplicaCount: {{ .Values.swh.loader.replicas.max }} # Optional. Default: 100 + fallback: # Optional. Section to specify fallback + # options + failureThreshold: 3 # Mandatory if fallback section is + # included + replicas: 6 # Mandatory if fallback section is + # included + advanced: # Optional. Section to specify advanced + # options + restoreToOriginalReplicaCount: false # Optional. Default: false + horizontalPodAutoscalerConfig: # Optional. Section to specify HPA + # related options + behavior: # Optional. Use to modify HPA's scaling + # behavior + scaleDown: + stabilizationWindowSeconds: 60 # default 300 + policies: + - type: Percent + value: 2 + periodSeconds: 15 + triggers: + {{- range .Values.amqp.queues }} + - type: rabbitmq + authenticationRef: + name: amqp-authentication + metadata: + host: host + # Optional. If not specified, it must be done + # by using TriggerAuthentication. + protocol: auto # Optional. Specifies protocol to use, + # either amqp or http, or auto to + # autodetect based on the `host` value. + # Default value is auto. + mode: QueueLength # QueueLength or MessageRate + value: {{ $.Values.amqp.queue_threshold | quote }} # message backlog or publish/sec. + # target per instance + queueName: {{ . }} + vhostName: / # Optional. If not specified, use the vhost in the + # `host` connection string. Alternatively, you can + # use existing environment variables to read + # configuration from: See details in "Parameter + # list" section hostFromEnv: RABBITMQ_HOST% + {{- end }} diff --git a/worker/templates/config-map.yaml b/worker/templates/config-map.yaml new file mode 100644 index 0000000..b0100f7 --- /dev/null +++ b/worker/templates/config-map.yaml @@ -0,0 +1,57 @@ +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ .Values.loader.name }}-{{ .Values.loader.type }} + namespace: ns-{{ .Values.loader.name }}-{{ .Values.loader.type }} +data: + config.yml: | + storage: + cls: pipeline + steps: + - cls: buffer + min_batch_size: + content: 1000 + content_bytes: 52428800 + directory: 1000 + directory_entries: 12000 + revision: 1000 + revision_parents: 2000 + revision_bytes: 52428800 + release: 1000 + release_bytes: 52428800 + extid: 1000 + - cls: filter + - cls: retry + - cls: remote + url: http://{{ .Values.storage.host }}:5002/ + + celery: + task_broker: ##amqp_host## + task_queues: + {{- range .Values.amqp.queues }} + - {{ . }} + {{- end }} + entrypoint.sh: | + #!/bin/bash + + set -e + + # Create the full config filename + cat /etc/softwareheritage/config.yml > $SWH_CONFIG_FILENAME + # contains required credentials for git loader (with metadata loader inside) + # ignored by the other loaders + cat /etc/credentials/metadata-fetcher/data >> $SWH_CONFIG_FILENAME + + # Install the rabbitmq host information + sed -i 's,##amqp_host##,'$RABBITMQ_HOST',g' $SWH_CONFIG_FILENAME + + echo Starting the swh Celery worker + exec python -m celery \ + --app=swh.scheduler.celery_backend.config.app \ + worker \ + --pool=prefork \ + --concurrency=${CONCURRENCY} \ + --max-tasks-per-child=${MAX_TASKS_PER_CHILD} \ + -Ofair --loglevel=${LOGLEVEL} \ + --hostname "${HOSTNAME}" diff --git a/worker/templates/deployment.yaml b/worker/templates/deployment.yaml new file mode 100644 index 0000000..3bf31e3 --- /dev/null +++ b/worker/templates/deployment.yaml @@ -0,0 +1,91 @@ +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ .Values.loader.name }}-{{ .Values.loader.type }} + namespace: ns-{{ .Values.loader.name }}-{{ .Values.loader.type }} + labels: + app: {{ .Values.loader.name }}-{{ .Values.loader.type }} +spec: + replicas: {{ .Values.swh.loader.replicas.min }} + selector: + matchLabels: + app: {{ .Values.loader.name }}-{{ .Values.loader.type }} + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 1 + template: + metadata: + labels: + app: {{ .Values.loader.name }}-{{ .Values.loader.type }} + spec: + containers: + - name: loaders + image: {{ .Values.swh.loader.image }}:{{ .Values.swh.loader.version }} + imagePullPolicy: Always + command: + - /entrypoint.sh + resources: + requests: + memory: "256Mi" + cpu: "200m" + limits: + memory: "4000Mi" + cpu: "1200m" + lifecycle: + preStop: + exec: + command: ["kill", "1"] + env: + - name: CONCURRENCY + value: "1" + - name: MAX_TASKS_PER_CHILD + value: "5" + - name: LOGLEVEL + value: "INFO" + - name: SWH_CONFIG_FILENAME + # FIXME: built by entrypoint.sh, determine how to properly declare this + value: /tmp/config.yml + - name: SWH_SENTRY_ENVIRONMENT + value: {{ .Values.sentry.environment }} + - name: SWH_MAIN_PACKAGE + value: {{ .Values.sentry.swhpackage }} + - name: SWH_SENTRY_DSN + valueFrom: + secretKeyRef: + name: {{ .Values.loader.name }}-{{ .Values.loader.type }}-sentry-secrets + key: sentry-dsn + # 'name' secret must exist & include key "host" + optional: false + - name: RABBITMQ_HOST + valueFrom: + secretKeyRef: + name: amqp-access-credentials + key: host + # 'name' secret must exist & include key "host" + optional: false + volumeMounts: + - name: config + mountPath: /etc/softwareheritage/config.yml + subPath: config.yml + readOnly: true + - name: config + mountPath: /entrypoint.sh + subPath: entrypoint.sh + readOnly: true + - name: metadata-fetcher-credentials + mountPath: /etc/credentials/metadata-fetcher + readOnly: true + - mountPath: /tmp + name: tmp-volume + volumes: + - name: config + configMap: + name: {{ .Values.loader.name }}-{{ .Values.loader.type }} + defaultMode: 0777 + - name: tmp-volume + emptyDir: {} + - name: metadata-fetcher-credentials + secret: + secretName: metadata-fetcher-credentials diff --git a/worker/templates/services.yaml b/worker/templates/services.yaml new file mode 100644 index 0000000..ecf4d19 --- /dev/null +++ b/worker/templates/services.yaml @@ -0,0 +1,20 @@ +--- +apiVersion: v1 +kind: Service +metadata: + name: storage-{{ .Values.loader.name }}-{{ .Values.loader.type }} + namespace: ns-{{ .Values.loader.name }}-{{ .Values.loader.type }} +spec: + type: ExternalName + externalName: {{ .Values.storage.host }} + +--- +apiVersion: v1 +kind: Service +metadata: + name: amqp-{{ .Values.loader.name }}-{{ .Values.loader.type }} + namespace: ns-{{ .Values.loader.name }}-{{ .Values.loader.type }} +spec: + type: ExternalName + externalName: {{ .Values.amqp.host }} + diff --git a/worker/values.yaml b/worker/values.yaml new file mode 100644 index 0000000..2888b9d --- /dev/null +++ b/worker/values.yaml @@ -0,0 +1,24 @@ +# Default values for worker. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +amqp: + host: amqp + +storage: + host: swh-storage + +loader: + name: loaders + type: + +swh: + loader: + image: softwareheritage/loaders + version: 2022-05-11 + replicas: + min: 1 + max: 5 + +sentry: + environment: staging