diff --git a/sysadmin/grid5000/cassandra/ansible/monitoring.yml b/sysadmin/grid5000/cassandra/ansible/monitoring.yml index 9f36214..ab4139d 100644 --- a/sysadmin/grid5000/cassandra/ansible/monitoring.yml +++ b/sysadmin/grid5000/cassandra/ansible/monitoring.yml @@ -1,57 +1,71 @@ --- +- name: Add SWH repository + apt_repository: + repo: deb [trusted=yes] https://debian.softwareheritage.org/ buster-swh main + state: present + filename: swh.sources + +- name: Install packages + apt: + name: + - dstat + - facter + - prometheus-statsd-exporter + - tcpdump + - name: Create grid5000 tools directories file: state: directory path: "{{ item }}" owner: root group: root mode: "0755" with_items: - /grid5000 - /grid5000/code - /grid5000/code/bin when: install_docker_install_script - name: Install docker installation script copy: src: "files/g5k-setup-docker" dest: "/grid5000/code/bin/g5k-setup-docker" owner: root group: root mode: "0755" when: install_docker_install_script - name: Install docker command: cmd: "/grid5000/code/bin/g5k-setup-docker" - name: Create prometheus data directory file: state: directory path: /tmp/prometheus owner: nobody group: nogroup - name: install prometheus node exporter include: _install_prometheus_exporter.yml - name: Create prometheus configuration template: src: "templates/prometheus/prometheus.yml" dest: "/etc/prometheus.yml" register: prometheus_configuration - name: test if prometheus container exists command: docker ps -a --format='{{ '{{' }}.Names{{ '}}' }}' register: containers - name: Start prometheus command: cmd: "docker run -d -p 9090:9090 -v /etc/prometheus.yml:/etc/prometheus/prometheus.yml -v /tmp/prometheus:/prometheus --name prometheus prom/prometheus" when: containers.stdout.find('prometheus') == -1 - name: Restart prometheus command: cmd: "docker restart prometheus" when: containers.stdout.find('prometheus') != -1 diff --git a/sysadmin/grid5000/cassandra/ansible/playbook.yml b/sysadmin/grid5000/cassandra/ansible/playbook.yml index 7d09ac8..916bc80 100644 --- a/sysadmin/grid5000/cassandra/ansible/playbook.yml +++ b/sysadmin/grid5000/cassandra/ansible/playbook.yml @@ -1,21 +1,26 @@ --- - name: Common hosts: all tasks: - include: common.yml +- name: Install ZFS + hosts: zfs + tasks: + - include: zfs.yml + - name: Install cassandra - hosts: cassandra:zfs + hosts: cassandra tasks: - include: zfs.yml - include: cassandra.yml - name: Install SWH Storage hosts: swh-storage tasks: - include: swh-storage.yml - name: Install Monitoring hosts: monitoring tasks: - include: monitoring.yml diff --git a/sysadmin/grid5000/cassandra/ansible/swh-storage.yml b/sysadmin/grid5000/cassandra/ansible/swh-storage.yml index 1a6ef4c..5087efb 100644 --- a/sysadmin/grid5000/cassandra/ansible/swh-storage.yml +++ b/sysadmin/grid5000/cassandra/ansible/swh-storage.yml @@ -1,129 +1,129 @@ --- - name: Add Backports repository apt_repository: repo: deb http://deb.debian.org/debian/ buster-backports main contrib non-free filename: backports.sources - name: swhstorage group group: name: swhstorage - name: swhstorage user user: name: swhstorage group: swhstorage home: /var/lib/swhstorage # *big images mount homes via nfs so the user creation failed - name: Add SWH repository apt_repository: repo: deb [trusted=yes] https://debian.softwareheritage.org/ buster-swh main state: present - filename: cassandra.sources + filename: swh.sources - name: Install packages apt: name: - daemonize - dstat - facter - prometheus-statsd-exporter - python3 - python3-gunicorn - tcpdump - name: Install packages from backports apt: name: - python3-typing-extensions - gunicorn3 default_release: buster-backports - name: Install swh storage packages apt: name: - python3-swh.storage - python3-swh.journal - name: install prometheus node exporter include: _install_prometheus_exporter.yml - name: Create directories file: state: directory path: "{{ item }}" owner: root group: root mode: "0755" with_items: - /etc/gunicorn - /etc/gunicorn/instances - /run/gunicorn - /run/gunicorn/swh-storage - /etc/softwareheritage - /etc/softwareheritage/storage - /etc/softwareheritage/replayer - name: Create swh-storage directories file: state: directory path: "{{ item }}" owner: swhstorage group: swhstorage mode: "0755" with_items: - /run/gunicorn/swh-storage/ - /run/replayer - name: Configure gunicorn - default service template: src: "templates/gunicorn/gunicorn.service" dest: "/etc/systemd/system/gunicorn.service" - name: Configure gunicorn - log configuration template: src: "templates/gunicorn/logconfig.ini" dest: "/etc/gunicorn/logconfig.ini" - name: swh-storage gunicorn instance configuration template: src: "templates/gunicorn/gunicorn-instance.cfg" dest: "/etc/gunicorn/instances/swh-storage.cfg" - name: swh-storage configuration directories template: src: "templates/swhstorage/storage.yml" dest: "/etc/softwareheritage/storage/storage.yml" - name: swh-storage service configuration template: src: "templates/gunicorn/gunicorn-instance-service.cfg" dest: "/etc/systemd/system/gunicorn-swh-storage.service" # TODO variabilize - name: swh-storage service service: name: gunicorn-swh-storage enabled: true state: started - name: swh-storage init cassandra script template: src: templates/swhstorage/init-cassandra-keyspace.sh dest: /usr/local/bin/swh-storage-init-cassandra.sh mode: 0755 - name: Configure replayer services include: _configure_replayer_services.yml obj={{ item }} loop: - content - skipped_content - directory - extid - origin - origin_visit - origin_visit_status - release - revision - snapshot - name: reload systemd command: systemctl daemon-reload diff --git a/sysadmin/grid5000/cassandra/kubernetes/00-registry.yml b/sysadmin/grid5000/cassandra/kubernetes/00-registry.yml new file mode 100644 index 0000000..6bd16f7 --- /dev/null +++ b/sysadmin/grid5000/cassandra/kubernetes/00-registry.yml @@ -0,0 +1,183 @@ +--- +apiVersion: v1 +kind: PersistentVolume +metadata: + name: registry-pv +spec: + capacity: + storage: 20Gi + volumeMode: Filesystem + accessModes: + - ReadWriteOnce + persistentVolumeReclaimPolicy: Delete + storageClassName: registry-pv + local: + path: /srv/data/softwareheritage-kube/registry + nodeAffinity: + required: + nodeSelectorTerms: + - matchExpressions: + # TODO adapt for your needs + - key: kubernetes.io/hostname + operator: In + values: + - parasilo-19.rennes.grid5000.fr +--- +kind: PersistentVolumeClaim +apiVersion: v1 +metadata: + name: registry-pvc +spec: + accessModes: + - ReadWriteOnce + storageClassName: registry-pv + resources: + requests: + storage: 20Gi +--- +apiVersion: v1 +kind: Service +metadata: + name: registry +spec: + selector: + app: registry + ports: + - protocol: TCP + port: 80 + targetPort: 5000 +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: registry +spec: + rules: + - host: registry.default + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: registry + port: + number: 80 +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: registry-deployment + labels: + app: registry +spec: + replicas: 1 + selector: + matchLabels: + app: registry + template: + metadata: + labels: + app: registry + spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/hostname + operator: In + values: + - parasilo-19.rennes.grid5000.fr + containers: + - name: registry + image: registry:latest + resources: + {} + env: + - name: REGISTRY_STORAGE_DELETE_ENABLED + value: "true" + ports: + - containerPort: 5000 + volumeMounts: + - mountPath: "/var/lib/registry" + name: registry-pvc + volumes: + - name: registry-pvc + persistentVolumeClaim: + claimName: registry-pvc +--- +apiVersion: v1 +kind: Service +metadata: + name: registry-ui +spec: + type: ClusterIP + selector: + app: registry-ui + ports: + - port: 80 + targetPort: 80 +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: registry-ui +spec: + rules: + - host: registry-ui.default + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: registry-ui + port: + number: 80 +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: registry-ui + labels: + app: registry-ui +spec: + replicas: 1 + selector: + matchLabels: + app: registry-ui + template: + metadata: + labels: + app: registry-ui + spec: + containers: + - name: registry-ui + image: "docker.io/joxit/docker-registry-ui:2" + imagePullPolicy: Always + env: + - name: REGISTRY_TITLE + value: "Docker registry UI" + - name: DELETE_IMAGES + value: "true" + # - name: REGISTRY_URL + # value: "http://registry.default" + - name: PULL_URL + value: "registry.default" + - name: NGINX_PROXY_PASS_URL + value: http://registry.default + ports: + - name: http + containerPort: 80 + protocol: TCP + livenessProbe: + httpGet: + path: / + port: http + readinessProbe: + httpGet: + path: / + port: http + resources: + {} diff --git a/sysadmin/grid5000/cassandra/kubernetes/02-monitoring.yaml b/sysadmin/grid5000/cassandra/kubernetes/02-monitoring.yaml new file mode 100644 index 0000000..378c55a --- /dev/null +++ b/sysadmin/grid5000/cassandra/kubernetes/02-monitoring.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: Service +metadata: + name: prometheus-statsd-exporter +spec: + type: ExternalName + externalName: parasilo-19.rennes.grid5000.fr + + + diff --git a/sysadmin/grid5000/cassandra/kubernetes/10-objstorage.yml b/sysadmin/grid5000/cassandra/kubernetes/10-objstorage.yml new file mode 100644 index 0000000..0037406 --- /dev/null +++ b/sysadmin/grid5000/cassandra/kubernetes/10-objstorage.yml @@ -0,0 +1,180 @@ +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: objstorage +data: + config.yml: | + objstorage: + cls: pathslicing + args: + root: "/srv/softwareheritage/objects" + slicing: 0:2/2:4/4:6 + client_max_size: 1073741824 + entrypoint.sh: | + #!/bin/bash + + set -e + + # source /srv/softwareheritage/utils/pyutils.sh + # setup_pip + + # echo Installed Python packages: + # pip list + + echo Starting the swh-objstorage API server + exec gunicorn --bind 0.0.0.0:5003 \ + --worker-class aiohttp.worker.GunicornWebWorker \ + --log-level DEBUG \ + --workers 300 \ + --reload \ + --timeout 3600 \ + --config 'python:swh.core.api.gunicorn_config' \ + 'swh.objstorage.api.server:make_app_from_configfile()' +--- +apiVersion: v1 +kind: PersistentVolume +metadata: + name: objstorage-pv +spec: + capacity: + storage: 10Gi + volumeMode: Filesystem + accessModes: + - ReadWriteOnce + persistentVolumeReclaimPolicy: Delete + storageClassName: objstorage-pv + local: + path: /srv/data/softwareheritage-kube/objects + nodeAffinity: + required: + nodeSelectorTerms: + - matchExpressions: + # TODO adapt for your needs + - key: kubernetes.io/hostname + operator: In + values: + - parasilo-19.rennes.grid5000.fr +--- +kind: PersistentVolumeClaim +apiVersion: v1 +metadata: + name: objstorage-pvc +spec: + accessModes: + - ReadWriteOnce + storageClassName: objstorage-pv + resources: + requests: + storage: 10Gi +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: objstorage + labels: + app: objstorage +spec: + replicas: 1 + selector: + matchLabels: + app: objstorage + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 1 + template: + metadata: + labels: + app: objstorage + spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/hostname + operator: In + values: + - parasilo-19.rennes.grid5000.fr + containers: + - name: objstorage + image: vsellier/swh-base + imagePullPolicy: Always + command: + - /entrypoint.sh + ports: + - containerPort: 5003 + readinessProbe: + httpGet: + path: / + port: 5003 + scheme: "HTTP" + initialDelaySeconds: 0 + failureThreshold: 2 + periodSeconds: 10 + startupProbe: + httpGet: + path: / + port: 5003 + scheme: "HTTP" + initialDelaySeconds: 5 + failureThreshold: 30 + periodSeconds: 1 + env: + - name: PORT + value: "5003" + - name: STATSD_HOST + value: "prometheus-statsd-exporter" + - name: STATSD_PORT + value: "9125" + - name: SWH_CONFIG_FILENAME + value: /etc/softwareheritage/config.yml + volumeMounts: + - mountPath: "/srv/softwareheritage/objects" + name: objstorage-pvc + - name: config + mountPath: /etc/softwareheritage/config.yml + subPath: config.yml + readOnly: true + - name: config + mountPath: /entrypoint.sh + subPath: entrypoint.sh + readOnly: true + volumes: + - name: config + configMap: + name: objstorage + defaultMode: 0777 + - name: objstorage-pvc + persistentVolumeClaim: + claimName: objstorage-pvc +--- +apiVersion: v1 +kind: Service +metadata: + name: objstorage +spec: + type: ClusterIP + selector: + app: objstorage + ports: + - port: 5003 + targetPort: 5003 +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: objstorage +spec: + rules: + - host: objstorage.default + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: objstorage + port: + number: 5003 diff --git a/sysadmin/grid5000/cassandra/kubernetes/11-storage.yml b/sysadmin/grid5000/cassandra/kubernetes/11-storage.yml new file mode 100644 index 0000000..630796d --- /dev/null +++ b/sysadmin/grid5000/cassandra/kubernetes/11-storage.yml @@ -0,0 +1,146 @@ +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: storage +data: + config.yml: | + storage: + cls: cassandra + hosts: + - parasilo-2.rennes.grid5000.fr + keyspace: swh + consistency_level: LOCAL_QUORUM + objstorage: + cls: remote + url: http://objstorage:5003 + + entrypoint.sh: | + #!/bin/bash + + set -e + + echo Starting the swh-storage API server + exec gunicorn --bind 0.0.0.0:5002 \ + --reload \ + --workers 128 \ + --log-level DEBUG \ + --timeout 3600 \ + --config 'python:swh.core.api.gunicorn_config' \ + 'swh.storage.api.server:make_app_from_configfile()' +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: storage + labels: + app: storage +spec: + replicas: 1 + selector: + matchLabels: + app: storage + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 1 + template: + metadata: + labels: + app: storage + spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: type + operator: In + values: + - loader + containers: + - name: storage + image: vsellier/swh-base + imagePullPolicy: Always + command: + - /entrypoint.sh + ports: + - containerPort: 5002 + readinessProbe: + httpGet: + path: / + port: 5002 + scheme: "HTTP" + initialDelaySeconds: 0 + failureThreshold: 2 + periodSeconds: 10 + startupProbe: + httpGet: + path: / + port: 5002 + scheme: "HTTP" + initialDelaySeconds: 5 + failureThreshold: 30 + periodSeconds: 1 + env: + - name: PORT + value: "5002" + - name: STATSD_HOST + value: "prometheus-statsd-exporter" + - name: STATSD_PORT + value: "9125" + - name: SWH_CONFIG_FILENAME + value: /etc/softwareheritage/config.yml + volumeMounts: + - name: config + mountPath: /etc/softwareheritage/config.yml + subPath: config.yml + readOnly: true + - name: config + mountPath: /entrypoint.sh + subPath: entrypoint.sh + readOnly: true + resources: + requests: + memory: "10000Mi" + cpu: "5000m" + limits: + memory: "15000Mi" + cpu: "10000m" + volumes: + - name: config + configMap: + name: storage + defaultMode: 0777 + - name: db-password + secret: + secretName: storage-db +--- +apiVersion: v1 +kind: Service +metadata: + name: storage +spec: + type: ClusterIP + selector: + app: storage + ports: + - port: 5002 + targetPort: 5002 +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: storage +spec: + rules: + - host: storage.default + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: storage + port: + number: 5002 diff --git a/sysadmin/grid5000/cassandra/kubernetes/15-scheduler-db.yml b/sysadmin/grid5000/cassandra/kubernetes/15-scheduler-db.yml new file mode 100644 index 0000000..5c4f570 --- /dev/null +++ b/sysadmin/grid5000/cassandra/kubernetes/15-scheduler-db.yml @@ -0,0 +1,138 @@ +--- +apiVersion: v1 +kind: PersistentVolume +metadata: + name: scheduler-db-pv +spec: + capacity: + storage: 10Gi + volumeMode: Filesystem + accessModes: + - ReadWriteOnce + persistentVolumeReclaimPolicy: Delete + storageClassName: scheduler-db + local: + path: /srv/data/softwareheritage-kube/scheduler-db + nodeAffinity: + required: + nodeSelectorTerms: + - matchExpressions: + # TODO adapt for your needs + - key: kubernetes.io/hostname + operator: In + values: + - parasilo-19.rennes.grid5000.fr +--- +kind: PersistentVolumeClaim +apiVersion: v1 +metadata: + name: scheduler-db-pvc +spec: + accessModes: + - ReadWriteOnce + storageClassName: scheduler-db + resources: + requests: + storage: 10Gi + +--- +## TODO Change this to your real postgresql password +apiVersion: v1 +kind: Secret +metadata: + name: scheduler-db +type: Opaque +stringData: + POSTGRES_PASSWORD: swh +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: scheduler-db +data: + # property-like keys; each key maps to a simple value + POSTGRES_USER: swh + POSTGRES_DB: swh-scheduler +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: scheduler-db + labels: + app: scheduler-db +spec: + replicas: 1 + selector: + matchLabels: + app: scheduler-db + template: + metadata: + labels: + app: scheduler-db + spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/hostname + operator: In + values: + - parasilo-19.rennes.grid5000.fr + containers: + - name: scheduler-db + image: postgres:13.0 + imagePullPolicy: Always + ports: + - containerPort: 5432 + args: + - "-c" + - "shared_buffers=5GB" + - "-c" + - "effective_cache_size=512MB" + - "-c" + - "random_page_cost=1.5" + - "-c" + - "max_wal_size=1GB" + env: + - name: POSTGRES_USER + valueFrom: + configMapKeyRef: + name: scheduler-db + key: POSTGRES_USER + - name: POSTGRES_PASSWORD + valueFrom: + secretKeyRef: + name: scheduler-db + key: POSTGRES_PASSWORD + - name: POSTGRES_DB + valueFrom: + configMapKeyRef: + name: scheduler-db + key: POSTGRES_DB + volumeMounts: + - mountPath: "/var/lib/postgresql/data" + name: scheduler-db-pvc + resources: + requests: + memory: "5Gi" + cpu: "2000m" + limits: + memory: "10Gi" + cpu: "5000m" + volumes: + - name: scheduler-db-pvc + persistentVolumeClaim: + claimName: scheduler-db-pvc +--- +apiVersion: v1 +kind: Service +metadata: + name: scheduler-db +spec: + type: ClusterIP + selector: + app: scheduler-db + ports: + - port: 5432 + targetPort: 5432 diff --git a/sysadmin/grid5000/cassandra/kubernetes/16-rabbitmq.yml b/sysadmin/grid5000/cassandra/kubernetes/16-rabbitmq.yml new file mode 100644 index 0000000..83f7f3b --- /dev/null +++ b/sysadmin/grid5000/cassandra/kubernetes/16-rabbitmq.yml @@ -0,0 +1,65 @@ +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: amqp + labels: + app: amqp +spec: + replicas: 1 + selector: + matchLabels: + app: amqp + template: + metadata: + labels: + app: amqp + spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/hostname + operator: In + values: + - parasilo-19.rennes.grid5000.fr + containers: + - name: amqp + image: rabbitmq:3.6-management + imagePullPolicy: Always + ports: + - containerPort: 5672 +--- +apiVersion: v1 +kind: Service +metadata: + name: amqp +spec: + type: ClusterIP + selector: + app: amqp + ports: + - port: 5672 + targetPort: 5672 + name: rabbitmq + - port: 15672 + targetPort: 15672 + name: rabbitmq-admin +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: amqp +spec: + rules: + - host: rabbitmq.default + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: amqp + port: + number: 15672 diff --git a/sysadmin/grid5000/cassandra/kubernetes/20-scheduler.yml b/sysadmin/grid5000/cassandra/kubernetes/20-scheduler.yml new file mode 100644 index 0000000..82f4159 --- /dev/null +++ b/sysadmin/grid5000/cassandra/kubernetes/20-scheduler.yml @@ -0,0 +1,212 @@ +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: scheduler +data: + config.yml: | + scheduler: + cls: local + db: postgresql:///?service=swh-scheduler + + celery: + task_broker: amqp://guest:guest@amqp/%2f + broker_transport_options: + max_retries: 1 + entrypoint-init-db.sh: | + #!/bin/bash + + set -e + + echo -n "waiting for database availability" + set +e + ko=1 + while [[ $ko -gt 0 ]]; do PGCONNECT_TIMEOUT=2 psql --list &>/dev/null;ko=$?; echo -n .; sleep 0.5; done + echo + set -e + + echo Init swh-scheduler database + + echo Creating extensions... + swh db init-admin --db-name ${PGDATABASE} scheduler + + echo Initializing the database... + swh db init --db-name ${PGDATABASE} scheduler + entrypoint.sh: | + #!/bin/bash + + set -e + + echo "${PGHOST}:5432:${PGDATABASE}:${PGUSER}:${PGPASSWORD}" >> ~/.pgpass + cat >> ~/.pg_service.conf <