diff --git a/kubernetes/02-monitoring.yml b/kubernetes/02-monitoring.yml index e32a7b2..e0fe9a2 100644 --- a/kubernetes/02-monitoring.yml +++ b/kubernetes/02-monitoring.yml @@ -1,349 +1,351 @@ # ------- # Prometheus statsd exporter configuration # ------- --- apiVersion: v1 kind: ConfigMap metadata: name: prometheus-statsd-exporter data: config.yml: | defaults: timer_type: histogram buckets: - .005 - .01 - .025 - .05 - .1 - .25 - .5 - .75 - 1 - 2 - 5 - 10 - 15 - 30 - 45 - 60 - 120 - 300 - 600 - 900 - 1800 - 2700 - 3600 - 7200 --- apiVersion: apps/v1 kind: Deployment metadata: name: prometheus-statsd-exporter labels: app: prometheus-statsd-exporter spec: replicas: 1 selector: matchLabels: app: prometheus-statsd-exporter template: metadata: labels: app: prometheus-statsd-exporter spec: containers: - name: prometheus-statsd-exporter image: prom/statsd-exporter imagePullPolicy: Always args: - "--statsd.mapping-config=/etc/prometheus/statsd-mapping.yml" ports: - containerPort: 9125 volumeMounts: - name: config mountPath: /etc/prometheus/statsd-mapping.yml subPath: config.yml readOnly: true volumes: - name: config configMap: name: prometheus-statsd-exporter --- apiVersion: v1 kind: Service metadata: name: prometheus-statsd-exporter spec: type: ClusterIP selector: app: prometheus-statsd-exporter ports: - name: statsd port: 9125 targetPort: 9125 - name: http port: 9102 targetPort: 9102 --- # ------- # Prometheus configuration # ------- apiVersion: v1 kind: ConfigMap metadata: name: prometheus data: config.yml: | # my global config global: scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute. evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute. # scrape_timeout is set to the global default (10s). scrape_configs: - job_name: prometheus static_configs: - targets: - prometheus:9090 metrics_path: /prometheus/metrics - job_name: statsd-exporter static_configs: - targets: - prometheus-statsd-exporter:9102 - job_name: jmx-exporter-cassandra static_configs: - targets: - prometheus-jmx-exporter-cassandra:5556 - job_name: swh-counters-exporter static_configs: - - targets: + - labels: + environment: dev + targets: - counters:5011 metrics_path: /metrics - job_name: swh-web-exporter static_configs: - targets: - webapp:5004 metrics_path: /metrics/prometheus/ --- apiVersion: v1 kind: PersistentVolume metadata: name: prometheus-pv spec: capacity: storage: 10Gi volumeMode: Filesystem accessModes: - ReadWriteOnce persistentVolumeReclaimPolicy: Delete storageClassName: prometheus-pv local: path: /srv/softwareheritage-kube/dev/prometheus nodeAffinity: required: nodeSelectorTerms: - matchExpressions: # TODO adapt for your needs - key: kubernetes.io/os operator: In values: - linux --- apiVersion: v1 kind: Service metadata: name: prometheus spec: type: ClusterIP selector: app: prometheus ports: - port: 9090 targetPort: 9090 --- apiVersion: networking.k8s.io/v1 kind: Ingress metadata: name: prometheus spec: rules: - host: prometheus.default http: paths: - path: / pathType: Prefix backend: service: name: prometheus port: number: 9090 --- kind: PersistentVolumeClaim apiVersion: v1 metadata: name: prometheus-pvc spec: accessModes: - ReadWriteOnce storageClassName: prometheus-pv resources: requests: storage: 10Gi --- apiVersion: apps/v1 kind: Deployment metadata: name: prometheus labels: app: prometheus spec: replicas: 1 selector: matchLabels: app: prometheus template: metadata: labels: app: prometheus spec: containers: - name: prometheus image: prom/prometheus imagePullPolicy: Always args: - "--config.file=/etc/prometheus/prometheus.yml" ports: - containerPort: 9090 volumeMounts: - name: config mountPath: /etc/prometheus/prometheus.yml subPath: config.yml readOnly: true - mountPath: "/prometheus/data" name: prometheus-pvc resources: requests: cpu: "20m" memory: "100Mi" limits: cpu: "100m" memory: "256Mi" volumes: - name: config configMap: name: prometheus - name: prometheus-pvc persistentVolumeClaim: claimName: prometheus-pvc --- apiVersion: v1 kind: PersistentVolume metadata: name: grafana-pv spec: capacity: storage: 1Gi volumeMode: Filesystem accessModes: - ReadWriteOnce persistentVolumeReclaimPolicy: Delete storageClassName: grafana-pv local: path: /srv/softwareheritage-kube/dev/grafana nodeAffinity: required: nodeSelectorTerms: - matchExpressions: # TODO adapt for your needs - key: kubernetes.io/os operator: In values: - linux --- kind: PersistentVolumeClaim apiVersion: v1 metadata: name: grafana-pvc spec: accessModes: - ReadWriteOnce storageClassName: grafana-pv resources: requests: storage: 1Gi --- apiVersion: v1 kind: Service metadata: name: grafana spec: type: ClusterIP selector: app: grafana ports: - port: 3000 targetPort: 3000 --- apiVersion: networking.k8s.io/v1 kind: Ingress metadata: name: grafana spec: rules: - host: grafana.default http: paths: - path: / pathType: Prefix backend: service: name: grafana port: number: 3000 --- apiVersion: apps/v1 kind: Deployment metadata: name: grafana labels: app: grafana spec: replicas: 1 selector: matchLabels: app: grafana template: metadata: labels: app: grafana spec: containers: - name: grafana image: swh/grafana imagePullPolicy: Always env: - name: GF_SERVER_ROOT_URL value: http://grafana.default ports: - containerPort: 3000 resources: requests: cpu: "50m" memory: "128Mi" limits: cpu: "100m" memory: "256Mi" volumeMounts: - mountPath: "/var/lib/grafana" name: grafana-pvc volumes: - name: grafana-pvc persistentVolumeClaim: claimName: grafana-pvc diff --git a/kubernetes/61-counters.yml b/kubernetes/61-counters.yml index f9e7281..dad1809 100644 --- a/kubernetes/61-counters.yml +++ b/kubernetes/61-counters.yml @@ -1,169 +1,171 @@ --- apiVersion: v1 kind: ConfigMap metadata: name: counters data: config.yml: | counters: cls: redis host: redis:6379 history: cls: prometheus prometheus_host: prometheus prometheus_port: 9090 live_data_start: 1609462861 # 2021-01-01 cache_base_directory: /tmp interval: 1h + labels: + environment: dev entrypoint.sh: | #!/bin/bash set -e wait-for-it redis:6379 -s --timeout=0 exec gunicorn --bind 0.0.0.0:5011 \ --reload \ --threads 4 \ --workers 2 \ --log-level ${LOGLEVEL} \ --timeout 3600 \ --config 'python:swh.core.api.gunicorn_config' \ 'swh.counters.api.server:make_app_from_configfile()' --- apiVersion: apps/v1 kind: Deployment metadata: name: counters labels: app: counters spec: replicas: 1 selector: matchLabels: app: counters strategy: type: RollingUpdate rollingUpdate: maxSurge: 1 template: metadata: labels: app: counters spec: containers: - name: counters image: swh/counters:latest imagePullPolicy: Always command: - /entrypoint.sh ports: - containerPort: 5011 readinessProbe: httpGet: path: / port: 5011 scheme: "HTTP" initialDelaySeconds: 0 failureThreshold: 1 periodSeconds: 10 startupProbe: httpGet: path: / port: 5011 scheme: "HTTP" initialDelaySeconds: 5 failureThreshold: 30 periodSeconds: 1 env: - name: PORT value: "5010" - name: STATSD_HOST value: "prometheus-statsd-exporter" - name: STATSD_PORT value: "9125" - name: SWH_CONFIG_FILENAME value: /etc/softwareheritage/config.yml - name: LOGLEVEL value: INFO volumeMounts: - name: config mountPath: /etc/softwareheritage/config.yml subPath: config.yml readOnly: true - name: config mountPath: /entrypoint.sh subPath: entrypoint.sh readOnly: true resources: requests: memory: "75Mi" cpu: "20m" limits: memory: "150Mi" cpu: "200m" volumes: - name: config configMap: name: counters defaultMode: 0777 --- apiVersion: v1 kind: Service metadata: name: counters spec: type: ClusterIP selector: app: counters ports: - port: 5011 targetPort: 5011 --- apiVersion: networking.k8s.io/v1 kind: Ingress metadata: name: counters spec: rules: - host: counters.default http: paths: - path: / pathType: Prefix backend: service: name: counters port: number: 5011 --- apiVersion: batch/v1beta1 kind: CronJob metadata: name: counters-history labels: app: counters-history spec: schedule: "*/1 * * * *" successfulJobsHistoryLimit: 1 failedJobsHistoryLimit: 1 concurrencyPolicy: Forbid jobTemplate: spec: template: spec: containers: - name: counters-history image: curlimages/curl imagePullPolicy: Always args: - -v - -f - -X - POST - -H - "Content-Type: application/json" - "http://counters:5011/refresh_history" - "-d" - '{"cache_file": "history.json", "objects": ["content", "origin", "revision"]}' restartPolicy: Never