diff --git a/Dockerfile.grafana b/Dockerfile.grafana new file mode 100644 index 0000000..76bb196 --- /dev/null +++ b/Dockerfile.grafana @@ -0,0 +1,8 @@ +FROM grafana/grafana + +COPY --chown=grafana:root docker/conf/grafana/provisioning /etc/grafana/provisioning +COPY --chown=grafana:root docker/conf/grafana/dashboards /etc/grafana/dashboards + +RUN sed -i 's|localhost:5080/prometheus|prometheus:9090|g' /etc/grafana/provisioning/datasources/prometheus.yaml && \ + sed -i 's|access: direct|access: proxy|g' /etc/grafana/provisioning/datasources/prometheus.yaml && \ + sed -i 's|/var/lib/grafana/dashboards|/etc/grafana/dashboards|g' /etc/grafana/provisioning/dashboards/all.yaml diff --git a/kubernetes/02-monitoring.yml b/kubernetes/02-monitoring.yml new file mode 100644 index 0000000..042f835 --- /dev/null +++ b/kubernetes/02-monitoring.yml @@ -0,0 +1,334 @@ +# ------- +# Prometheus statsd exporter configuration +# ------- +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: prometheus-statsd-exporter +data: + config.yml: | + defaults: + timer_type: histogram + buckets: + - .005 + - .01 + - .025 + - .05 + - .1 + - .25 + - .5 + - .75 + - 1 + - 2 + - 5 + - 10 + - 15 + - 30 + - 45 + - 60 + - 120 + - 300 + - 600 + - 900 + - 1800 + - 2700 + - 3600 + - 7200 +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: prometheus-statsd-exporter + labels: + app: prometheus-statsd-exporter +spec: + replicas: 1 + selector: + matchLabels: + app: prometheus-statsd-exporter + template: + metadata: + labels: + app: prometheus-statsd-exporter + spec: + containers: + - name: prometheus-statsd-exporter + image: prom/statsd-exporter + imagePullPolicy: Always + args: + - "--statsd.mapping-config=/etc/prometheus/statsd-mapping.yml" + ports: + - containerPort: 9125 + volumeMounts: + - name: config + mountPath: /etc/prometheus/statsd-mapping.yml + subPath: config.yml + readOnly: true + volumes: + - name: config + configMap: + name: prometheus-statsd-exporter +--- +apiVersion: v1 +kind: Service +metadata: + name: prometheus-statsd-exporter +spec: + type: ClusterIP + selector: + app: prometheus-statsd-exporter + ports: + - name: statsd + port: 9125 + targetPort: 9125 + - name: http + port: 9102 + targetPort: 9102 +--- +# ------- +# Prometheus configuration +# ------- + +apiVersion: v1 +kind: ConfigMap +metadata: + name: prometheus +data: + config.yml: | + # my global config + global: + scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute. + evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute. + # scrape_timeout is set to the global default (10s). + + scrape_configs: + - job_name: prometheus + static_configs: + - targets: + - prometheus:9090 + metrics_path: /prometheus/metrics + + - job_name: statsd-exporter + static_configs: + - targets: + - prometheus-statsd-exporter:9102 + + - job_name: jmx-exporter-cassandra + static_configs: + - targets: + - prometheus-jmx-exporter-cassandra:5556 +--- +apiVersion: v1 +kind: PersistentVolume +metadata: + name: prometheus-pv +spec: + capacity: + storage: 10Gi + volumeMode: Filesystem + accessModes: + - ReadWriteOnce + persistentVolumeReclaimPolicy: Delete + storageClassName: prometheus-pv + local: + path: /srv/softwareheritage-kube/dev/prometheus + nodeAffinity: + required: + nodeSelectorTerms: + - matchExpressions: + # TODO adapt for your needs + - key: kubernetes.io/os + operator: In + values: + - linux +--- +apiVersion: v1 +kind: Service +metadata: + name: prometheus +spec: + type: ClusterIP + selector: + app: prometheus + ports: + - port: 9090 + targetPort: 9090 +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: prometheus +spec: + rules: + - host: prometheus.default + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: prometheus + port: + number: 9090 +--- +kind: PersistentVolumeClaim +apiVersion: v1 +metadata: + name: prometheus-pvc +spec: + accessModes: + - ReadWriteOnce + storageClassName: prometheus-pv + resources: + requests: + storage: 10Gi +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: prometheus + labels: + app: prometheus +spec: + replicas: 1 + selector: + matchLabels: + app: prometheus + template: + metadata: + labels: + app: prometheus + spec: + containers: + - name: prometheus + image: prom/prometheus + imagePullPolicy: Always + args: + - "--config.file=/etc/prometheus/prometheus.yml" + ports: + - containerPort: 9090 + volumeMounts: + - name: config + mountPath: /etc/prometheus/prometheus.yml + subPath: config.yml + readOnly: true + - mountPath: "/prometheus/data" + name: prometheus-pvc + resources: + limits: + cpu: "100m" + memory: "256Mi" + volumes: + - name: config + configMap: + name: prometheus + - name: prometheus-pvc + persistentVolumeClaim: + claimName: prometheus-pvc + +--- +apiVersion: v1 +kind: PersistentVolume +metadata: + name: grafana-pv +spec: + capacity: + storage: 1Gi + volumeMode: Filesystem + accessModes: + - ReadWriteOnce + persistentVolumeReclaimPolicy: Delete + storageClassName: grafana-pv + local: + path: /srv/softwareheritage-kube/dev/grafana + nodeAffinity: + required: + nodeSelectorTerms: + - matchExpressions: + # TODO adapt for your needs + - key: kubernetes.io/os + operator: In + values: + - linux +--- +kind: PersistentVolumeClaim +apiVersion: v1 +metadata: + name: grafana-pvc +spec: + accessModes: + - ReadWriteOnce + storageClassName: grafana-pv + resources: + requests: + storage: 1Gi +--- +apiVersion: v1 +kind: Service +metadata: + name: grafana +spec: + type: ClusterIP + selector: + app: grafana + ports: + - port: 3000 + targetPort: 3000 +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: grafana +spec: + rules: + - host: grafana.default + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: grafana + port: + number: 3000 +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: grafana + labels: + app: grafana +spec: + replicas: 1 + selector: + matchLabels: + app: grafana + template: + metadata: + labels: + app: grafana + spec: + containers: + - name: grafana + image: swh/grafana + imagePullPolicy: Always + env: + - name: GF_SERVER_ROOT_URL + value: http://grafana.default + ports: + - containerPort: 3000 + resources: + requests: + cpu: "50m" + memory: "128Mi" + limits: + cpu: "100m" + memory: "256Mi" + volumeMounts: + - mountPath: "/var/lib/grafana" + name: grafana-pvc + volumes: + - name: grafana-pvc + persistentVolumeClaim: + claimName: grafana-pvc diff --git a/kubernetes/Readme.md b/kubernetes/Readme.md index 69084e3..0d90b72 100644 --- a/kubernetes/Readme.md +++ b/kubernetes/Readme.md @@ -1,203 +1,204 @@ ## Prerequisite ### Directories ``` -# sudo mkdir -p /srv/softwareheritage-kube/dev/{objects,storage-db,scheduler-db,kafka,web-db,prometheus,zookeeper/data,zookeeper/datalog} +# sudo mkdir -p /srv/softwareheritage-kube/dev/{objects,storage-db,scheduler-db,kafka,web-db,prometheus,zookeeper/data,zookeeper/datalog,grafana} # sudo chown 1000:1000 /srv/softwareheritage-kube/dev/objects +# sudo chown 472:0 /srv/softwareheritage-kube/dev/grafana # sudo chown nobody:nogroup /srv/softwareheritage-kube/dev/prometheus ``` Must match the content of `05-storage-db.yaml` ### Registry - Add the following line on your `/etc/hosts` file. It's needed to be able to push the image to it from docker ``` 127.0.0.1 registry.default ``` - Start the registry in kubernetes ``` # cd kubernetes # kubectl apply -f registry/00-registry.yml ``` ## Build the base image ``` # cd docker # docker build --no-cache -t swh/stack . # docker tag swh/stack:latest registry.default/swh/stack:latest # docker push registry.default/swh/stack:latest ``` ## start the objstorage - build image ``` # docker build -f Dockerfile.objstorage -t swh/objstorage --build-arg BASE=swh/stack . # docker tag swh/objstorage:latest registry.default/swh/objstorage:latest # docker push registry.default/swh/objstorage:latest ``` - start the service ``` # cd kubernetes # kubectl apply -f 10-objstorage.yml configmap/objstorage created persistentvolume/objstorage-pv created persistentvolumeclaim/objstorage-pvc created deployment.apps/objstorage created service/objstorage created ``` - test it ``` # kubectl get pods NAME READY STATUS RESTARTS AGE registry-deployment-7595868dc8-657ps 1/1 Running 0 46m objstorage-8587d58b68-76jbn 1/1 Running 0 12m # kubectl get services objstorage NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE objstorage ClusterIP 10.43.185.191 5003/TCP 17m # curl http://$(kubectl get services objstorage -o jsonpath='{.spec.clusterIP}'):5003 SWH Objstorage API server% ``` ## Start the storage - Start the db ``` # cd kubernetes # kubectl apply -f 05-storage-db.yml persistentvolume/storage-db-pv created persistentvolumeclaim/storage-db-pvc created secret/storage-db created configmap/storage-db created deployment.apps/storage-db created service/storage-db created # kubectl get pods NAME READY STATUS RESTARTS AGE registry-deployment-7595868dc8-657ps 1/1 Running 0 46m objstorage-8587d58b68-76jbn 1/1 Running 0 15m storage-db-64b7f8b684-48n7w 1/1 Running 0 4m52s # kubectl get services storage-db NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE storage-db ClusterIP 10.43.213.178 5432/TCP 8m19s ``` - Start the storage ``` # cd kubernetes # kubectl apply -f 11-storage.yml configmap/storage created deployment.apps/storage created service/storage created ``` - Test the service ``` # kubectl get pods NAME READY STATUS RESTARTS AGE registry-deployment-7595868dc8-657ps 1/1 Running 0 49m storage-db-64b7f8b684-48n7w 1/1 Running 0 7m40s storage-6b759fb974-w9rzj 1/1 Running 0 66s # kubectl get services storage NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE storage ClusterIP 10.43.212.116 5002/TCP 2m24s # curl http://$(kubectl get services storage -o jsonpath='{.spec.clusterIP}'):5002 Software Heritage storage server

You have reached the Software Heritage storage server.
See its documentation and API for more information

``` ## Start the scheduler - Start the db ``` # cd kubernetes # kubectl apply -f 15-scheduler-db.yml persistentvolume/scheduler-db-pv unchanged persistentvolumeclaim/scheduler-db-pvc created secret/scheduler-db configured configmap/scheduler-db unchanged deployment.apps/scheduler-db unchanged service/scheduler-db unchanged # kubectl get services scheduler-db NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE scheduler-db ClusterIP 10.43.115.249 5433/TCP 110s ``` - Test the service ``` # kubectl apply -f 20-scheduler.yml configmap/scheduler created deployment.apps/scheduler created service/scheduler created ingress.networking.k8s.io/scheduler created # kubectl get services scheduler NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE scheduler ClusterIP 10.43.218.74 5008/TCP 23s # kubectl get pods NAME READY STATUS RESTARTS AGE registry-deployment-5f6894c5b-9wkmr 1/1 Running 0 28m objstorage-5b87c549b6-f6jvc 1/1 Running 0 12m storage-db-79bfbff68-mg7fr 1/1 Running 0 107s storage-6bfcb87b6-7s7t8 1/1 Running 0 87s scheduler-db-666c8dc8b4-qxm9d 1/1 Running 0 73s scheduler-595f944854-hbsj4 1/1 Running 0 62s # curl http://$(kubectl get services scheduler -o jsonpath='{.spec.clusterIP}'):5008 Software Heritage scheduler RPC server

You have reached the Software Heritage scheduler RPC server.
See its documentation and API for more information

% ``` ## Development To access the services, they must be declared on the `/etc/hosts` file: ``` 127.0.0.1 objstorage.default storage.default webapp.default scheduler.default ``` ### Skaffold To start the development environment using skaffold, use the following command: ``` skaffold --default-repo registry.default dev ``` It will build the images, deploy them on the local registry and start the services. It will monitor the projects to detect the changes and restart the containers when needed diff --git a/skaffold.yaml b/skaffold.yaml index 4443801..eec31b6 100644 --- a/skaffold.yaml +++ b/skaffold.yaml @@ -1,52 +1,56 @@ apiVersion: skaffold/v2beta13 kind: Config metadata: name: swh-environment build: artifacts: - image: swh/stack-base docker: dockerfile: Dockerfile - image: swh/objstorage docker: dockerfile: Dockerfile.objstorage requires: - image: swh/stack-base alias: BASE - image: swh/storage docker: dockerfile: Dockerfile.storage requires: - image: swh/stack-base alias: BASE - image: swh/scheduler docker: dockerfile: Dockerfile.scheduler requires: - image: swh/stack-base alias: BASE - image: swh/webapp docker: dockerfile: Dockerfile.webapp requires: - image: swh/stack-base alias: BASE - image: swh/loaders docker: dockerfile: Dockerfile.loaders requires: - image: swh/stack-base alias: BASE + - image: swh/grafana + docker: + dockerfile: Dockerfile.grafana deploy: kubectl: manifests: - kubernetes/01-journal.yml + - kubernetes/02-monitoring.yml - kubernetes/05-storage-db.yml - kubernetes/10-objstorage.yml - kubernetes/11-storage.yml - kubernetes/15-scheduler-db.yml - kubernetes/16-rabbitmq.yml - kubernetes/20-scheduler.yml - kubernetes/29-web-db.yml - kubernetes/30-webapp.yml - kubernetes/40-loaders.yml