diff --git a/sysadmin/grid5000/cassandra/01-run.sh b/sysadmin/grid5000/cassandra/01-run.sh index 8667da6..fc1708b 100755 --- a/sysadmin/grid5000/cassandra/01-run.sh +++ b/sysadmin/grid5000/cassandra/01-run.sh @@ -1,28 +1,28 @@ #!/usr/bin/env bash set -eu SCRIPT_DIR="$(pwd $(dirname @0))" source "${SCRIPT_DIR}/environment.cfg" -rm -f nodes.installed +rm -vf nodes.installed besteffort_nodes.installed nodes.lst besteffort_nodes.lst ${SCRIPT_DIR}/02-reserve_nodes.sh echo "########### Waiting for node installations" while [ ! -e ${SCRIPT_DIR}/nodes.installed ]; do sleep 2 done echo "########### Node installations done" echo "########### Initialize cassandra" FIRST_STORAGE_HOST="$(echo ${STORAGE_HOSTS} | cut -f1 -d' ')" STORAGE_NODE="${FIRST_STORAGE_HOST}.${G5K_SITE}" ssh "${SSH_USER}@${STORAGE_NODE}" /usr/local/bin/swh-storage-init-cassandra.sh echo "####### FINISHED" echo "####### Sleeping" sleep infinity diff --git a/sysadmin/grid5000/cassandra/02-add_best_effort_node.sh b/sysadmin/grid5000/cassandra/02-add_best_effort_node.sh new file mode 100755 index 0000000..df5976d --- /dev/null +++ b/sysadmin/grid5000/cassandra/02-add_best_effort_node.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +set -eu + +SCRIPT_DIR="$(pwd $(dirname @0))" + +source "${SCRIPT_DIR}/environment.cfg" + +oarsub -l "{cluster='${BEST_EFFORT_CLUSTER}'}/nodes=1,walltime=12" -t deploy -t besteffort ${SCRIPT_DIR}/03-deploy_besteffort_nodes.sh diff --git a/sysadmin/grid5000/cassandra/03-deploy_besteffort_nodes.sh b/sysadmin/grid5000/cassandra/03-deploy_besteffort_nodes.sh new file mode 100755 index 0000000..c4339d5 --- /dev/null +++ b/sysadmin/grid5000/cassandra/03-deploy_besteffort_nodes.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash + +# set -eux + +SCRIPT_DIR="$(pwd $(dirname @0))" + +source "${SCRIPT_DIR}/environment.cfg" + +echo "########### Nodes:" +uniq "${OAR_FILE_NODES}" +echo "########### Installing os on nodes" + +INSTALLED_OS_STAMP="${OAR_JOB_ID}.os.stamp" + +if [ ! -e "${SCRIPT_DIR}/${INSTALLED_OS_STAMP}" ]; then + ${SCRIPT_DIR}/_install_os.sh + touch "${SCRIPT_DIR}/${INSTALLED_OS_STAMP}" +fi + +NODE=$(uniq "${OAR_NODE_FILE}") +echo "${NODE}" >> ${SCRIPT_DIR}/besteffort_nodes.lst +sort besteffort_nodes.lst | uniq > besteffort_nodes.lst.tmp +mv besteffort_nodes.lst.tmp besteffort_nodes.lst + +echo "${CASSANDRA_HOSTS}" | sed 's/ /,/g' > ${SCRIPT_DIR}/cassandra_seeds.lst + +time rsync -avP . "${SSH_USER}"@${NODE}:install +time ssh ${SSH_OPTIONS} "${SSH_USER}"@${NODE} install/_provision_node.sh + + +# Refresh the monitoring configuration +time rsync -avP . "${SSH_USER}"@${MONITORING_HOSTS}:install +time ssh ${SSH_OPTIONS} "${SSH_USER}"@${MONITORING_HOSTS} install/_provision_node.sh +ssh ${SSH_OPTIONS} "${SSH_USER}"@${MONITORING_HOSTS} docker restart prometheus + +# The script must not exit to avoid the oar job to be killed +echo "########### Sleeping" +sleep infinity diff --git a/sysadmin/grid5000/cassandra/_provision_node.sh b/sysadmin/grid5000/cassandra/_provision_node.sh index 0770351..809b1dd 100755 --- a/sysadmin/grid5000/cassandra/_provision_node.sh +++ b/sysadmin/grid5000/cassandra/_provision_node.sh @@ -1,12 +1,12 @@ #!/usr/bin/env bash set -eux apt update apt install -y ansible cd /root/install/ansible CASSANDRA_SEEDS="$(cat ../cassandra_seeds.lst)" -NODES="$(cat ../nodes.lst | tr '\n' ',')" +NODES="$(cat ../nodes.lst ../besteffort_nodes.lst | tr '\n' ',')" ansible-playbook -i hosts.yml -e @/root/install/.credentials --vault-password-file=/root/install/.vault_password -l "$(hostname)" playbook.yml --extra-vars "cassandra_seed_ips=${CASSANDRA_SEEDS}" --extra-vars "nodes=${NODES}" diff --git a/sysadmin/grid5000/cassandra/ansible/hosts.yml b/sysadmin/grid5000/cassandra/ansible/hosts.yml index 068eddb..41132f1 100644 --- a/sysadmin/grid5000/cassandra/ansible/hosts.yml +++ b/sysadmin/grid5000/cassandra/ansible/hosts.yml @@ -1,152 +1,150 @@ # Global configuration +monitoring: + hosts: + monitoring1: + paranoia-3.rennes.grid5000.fr: + vars: + ansible_connection: local + install_docker_install_script: false + swh-storage: hosts: parasilo-[20:28].rennes.grid5000.fr: - paranoia-[1:8].rennes.grid5000.fr: - # paravance-[].rennes.grid5000.fr: + paravance-[1:78].rennes.grid5000.fr: # local vagrant hosts swh-storage1: vars: ansible_connection: local journal: brokers: - broker1.journal.softwareheritage.org:9093 - broker2.journal.softwareheritage.org:9093 - broker3.journal.softwareheritage.org:9093 - broker4.journal.softwareheritage.org:9093 consumer: user: swh-vse group: swh-vse-grid5000-1 replayer_count: content: 0 skipped_content: 0 directory: 50 origin: 0 origin_visit: 0 origin_visit_status: 0 release: 0 revision: 0 snapshot: 0 - cassandra: hosts: dahu-[1:32].grenoble.grid5000.fr: parasilo-[1:19].rennes.grid5000.fr: # local vagrant hosts cassandra[1:9]: vars: ansible_connection: local cassandra_config_dir: /etc/cassandra cassandra_data_dir_base: /srv/cassandra cassandra_data_dir_system: "{{cassandra_data_dir_base}}/system" cassandra_data_dir: "{{ cassandra_data_dir_base }}/data" cassandra_commitlogs_dir: "{{ cassandra_data_dir_base }}/commitlogs" # Per cluster specificities dahu_cluster_hosts: hosts: dahu[1:32].grenoble.grid5000.fr vars: cassandra_listen_interface: enp24s0f0 parasilo_cluster_hosts: hosts: parasilo-[1:28].rennes.grid5000.fr: vars: cassandra_listen_interface: eno1 zfs_pools: commitlogs: disks: - sdf datasets: commitlogs: /srv/cassandra/commitlogs data: disks: - sdb - sdc - sdd - sde datasets: data: /srv/cassandra/data paravance_cluster_hosts: hosts: paravance-[1:78].rennes.grid5000.fr: vars: cassandra_listen_interface: eno1 zfs_pools: - data: disks: - sdb datasets: data: /srv/cassandra -monitoring: - hosts: - monitoring1: - paravance-[1:78].rennes.grid5000.fr: - vars: - ansible_connection: local - install_docker_install_script: false - # Vagrant configuration vagrant_nodes: hosts: cassandra1: ansible_host: 10.168.180.11 ansible_user: vagrant ansible_ssh_private_key_file: .vagrant/machines/cassandra1/libvirt/private_key cassandra2: ansible_host: 10.168.180.12 ansible_user: vagrant ansible_ssh_private_key_file: .vagrant/machines/cassandra2/libvirt/private_key cassandra3: ansible_host: 10.168.180.13 ansible_user: vagrant ansible_ssh_private_key_file: .vagrant/machines/cassandra3/libvirt/private_key swh-storage1: ansible_host: 10.168.180.14 ansible_user: vagrant ansible_ssh_private_key_file: .vagrant/machines/swh-storage/libvirt/private_key monitoring1: ansible_host: 10.168.180.15 ansible_user: vagrant ansible_ssh_private_key_file: .vagrant/machines/monitoring/libvirt/private_key vars: ansible_connection: ssh install_docker_install_script: true journal: brokers: # staging - broker0.journal.staging.swh.network:9093 consumer: user: swh-vse group: swh-vse-grid5000-1 replayer_count: content: 0 skipped_content: 0 directory: 5 origin: 0 origin_visit: 0 origin_visit_status: 0 release: 0 revision: 0 snapshot: 0 cassandra_listen_interface: eth1 # passed through --extra-vars on grid5000 cassandra_seed_ips: 10.168.180.11,10.168.180.12,10.168.180.13 - nodes: 10.168.180.11,10.168.180.12,10.168.180.13,10.168.180.14,10.168.180.15 + nodes: 10.168.180.11,10.168.180.12,10.168.180.13,10.168.180.14,10.168.180.15,test zfs_pools: commitlogs: disks: - vdb datasets: commitlogs: /srv/cassandra/commitlogs data: disks: - vdc - vdd datasets: data: /srv/cassandra/data diff --git a/sysadmin/grid5000/cassandra/ansible/monitoring.yml b/sysadmin/grid5000/cassandra/ansible/monitoring.yml index 2539b95..9f36214 100644 --- a/sysadmin/grid5000/cassandra/ansible/monitoring.yml +++ b/sysadmin/grid5000/cassandra/ansible/monitoring.yml @@ -1,45 +1,57 @@ --- - name: Create grid5000 tools directories file: state: directory path: "{{ item }}" owner: root group: root mode: "0755" with_items: - /grid5000 - /grid5000/code - /grid5000/code/bin when: install_docker_install_script - name: Install docker installation script copy: src: "files/g5k-setup-docker" dest: "/grid5000/code/bin/g5k-setup-docker" owner: root group: root mode: "0755" when: install_docker_install_script - name: Install docker command: cmd: "/grid5000/code/bin/g5k-setup-docker" - name: Create prometheus data directory file: state: directory path: /tmp/prometheus owner: nobody group: nogroup - name: install prometheus node exporter include: _install_prometheus_exporter.yml - name: Create prometheus configuration template: src: "templates/prometheus/prometheus.yml" dest: "/etc/prometheus.yml" + register: prometheus_configuration + +- name: test if prometheus container exists + command: docker ps -a --format='{{ '{{' }}.Names{{ '}}' }}' + register: containers + - name: Start prometheus command: cmd: "docker run -d -p 9090:9090 -v /etc/prometheus.yml:/etc/prometheus/prometheus.yml -v /tmp/prometheus:/prometheus --name prometheus prom/prometheus" + when: containers.stdout.find('prometheus') == -1 + +- name: Restart prometheus + command: + cmd: "docker restart prometheus" + when: containers.stdout.find('prometheus') != -1 diff --git a/sysadmin/grid5000/cassandra/environment.cfg b/sysadmin/grid5000/cassandra/environment.cfg index 60087e7..f8054b0 100644 --- a/sysadmin/grid5000/cassandra/environment.cfg +++ b/sysadmin/grid5000/cassandra/environment.cfg @@ -1,16 +1,17 @@ export G5K_SITE='rennes.grid5000.fr' +export BEST_EFFORT_CLUSTER='paravance' + export CASSANDRA_HOSTS="parasilo-2 parasilo-3 parasilo-4 parasilo-5" export CASSANDRA_DISKS_COUNT=5 export STORAGE_HOSTS="paravance-1" export JOURNAL_CLIENT_HOSTS="" -export MONITORING_HOSTS="paravance-51" +export MONITORING_HOSTS="paravance-2" # export CASSANDRA_NB_DISK_PER_NODE=5 -export DISK_RESERVATION_DURATION=$((1 * 24)) # 1day during tests -export NODE_RESERVATION_DURATION=00:50 # in hours - +export DISK_RESERVATION_DURATION=$((7 * 24)) # 1day during tests +export NODE_RESERVATION_DURATION=12 # in hours export SSH_USER=root # export SSH_OPTIONS="-o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" export SSH_OPTIONS=""