diff --git a/sysadmin/grid5000/cassandra/01-run.sh b/sysadmin/grid5000/cassandra/01-run.sh index fc1708b..9943375 100755 --- a/sysadmin/grid5000/cassandra/01-run.sh +++ b/sysadmin/grid5000/cassandra/01-run.sh @@ -1,28 +1,28 @@ #!/usr/bin/env bash set -eu SCRIPT_DIR="$(pwd $(dirname @0))" source "${SCRIPT_DIR}/environment.cfg" rm -vf nodes.installed besteffort_nodes.installed nodes.lst besteffort_nodes.lst ${SCRIPT_DIR}/02-reserve_nodes.sh echo "########### Waiting for node installations" while [ ! -e ${SCRIPT_DIR}/nodes.installed ]; do sleep 2 done echo "########### Node installations done" -echo "########### Initialize cassandra" -FIRST_STORAGE_HOST="$(echo ${STORAGE_HOSTS} | cut -f1 -d' ')" -STORAGE_NODE="${FIRST_STORAGE_HOST}.${G5K_SITE}" +${SCRIPT_DIR}/05-initialize_cassandra.sh + +${SCRIPT_DIR}/10-start_replayers.sh + -ssh "${SSH_USER}@${STORAGE_NODE}" /usr/local/bin/swh-storage-init-cassandra.sh echo "####### FINISHED" echo "####### Sleeping" sleep infinity diff --git a/sysadmin/grid5000/cassandra/05-initialize_cassandra.sh b/sysadmin/grid5000/cassandra/05-initialize_cassandra.sh new file mode 100755 index 0000000..b768c23 --- /dev/null +++ b/sysadmin/grid5000/cassandra/05-initialize_cassandra.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash + +set -eu + +SCRIPT_DIR="$(pwd $(dirname @0))" + +source "${SCRIPT_DIR}/environment.cfg" + +FIRST_STORAGE_HOST="$(echo ${STORAGE_HOSTS} | cut -f1 -d' ')" +STORAGE_NODE="${FIRST_STORAGE_HOST}.${G5K_SITE}" + +FIRST_CASSANDRA_HOST="$(echo ${CASSANDRA_HOSTS} | cut -f1 -d' ')" +CASSANDRA_NODE="${FIRST_CASSANDRA_HOST}.${G5K_SITE}" + +echo "########### Initialize cassandra keyspace..." +ssh "${SSH_USER}@${STORAGE_NODE}" /usr/local/bin/swh-storage-init-cassandra.sh + +echo "########### Change the replication factor..." +ssh "${SSH_USER}@${CASSANDRA_NODE}" /usr/local/bin/change-cassandra-replication.sh "${CASSANDRA_REPLICATION_FACTOR}" + +echo "####### $0 FINISHED" diff --git a/sysadmin/grid5000/cassandra/10-start_replayers.sh b/sysadmin/grid5000/cassandra/10-start_replayers.sh new file mode 100755 index 0000000..2be0b3b --- /dev/null +++ b/sysadmin/grid5000/cassandra/10-start_replayers.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash + +set -eu + +NODES=$* + +if [ -z "${NODES}" ]; then + NODES=${STORAGE_HOSTS} +fi + +SCRIPT_DIR="$(pwd $(dirname @0))" + +source "${SCRIPT_DIR}/environment.cfg" + +for NODE in $NODES; do + echo "########### Starting replayers on $NODE..." + ssh "${SSH_USER}"@"${NODE}" 'cd /etc/systemd/system; ls replayer-*.target' | xargs -r ssh "${SSH_USER}"@"${NODE}" systemctl restart +done + +echo "####### $0 FINISHED" diff --git a/sysadmin/grid5000/cassandra/10-stop_replayers.sh b/sysadmin/grid5000/cassandra/10-stop_replayers.sh new file mode 100755 index 0000000..e269ac7 --- /dev/null +++ b/sysadmin/grid5000/cassandra/10-stop_replayers.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash + +set -eu + +NODES=$* + +if [ -z "${NODES}" ]; then + NODES=${STORAGE_HOSTS} +fi + +SCRIPT_DIR="$(pwd $(dirname @0))" + +source "${SCRIPT_DIR}/environment.cfg" + +for NODE in $NODES; do + echo "########### Stopping replayers on $NODE..." + ssh "${SSH_USER}"@"${NODE}" 'cd /etc/systemd/system; ls replayer-*.target' | xargs -r ssh "${SSH_USER}"@"${NODE}" systemctl stop +done + +echo "####### $0 FINISHED" diff --git a/sysadmin/grid5000/cassandra/ansible/_configure_replayer_services.yml b/sysadmin/grid5000/cassandra/ansible/_configure_replayer_services.yml index 11a5dee..e5ad17d 100644 --- a/sysadmin/grid5000/cassandra/ansible/_configure_replayer_services.yml +++ b/sysadmin/grid5000/cassandra/ansible/_configure_replayer_services.yml @@ -1,23 +1,23 @@ --- - name: swh replayer {{obj}} configuration files template: src: templates/swhstorage/replayer.yml dest: /etc/softwareheritage/replayer/replayer-{{ obj }}.yml - name: swh replayer replayer-{{obj}}@ template: src: templates/swhstorage/replayer@.service dest: /etc/systemd/system/replayer-{{obj}}@.service - name: swh replayer {{obj}} target template: src: templates/swhstorage/replayer.target dest: /etc/systemd/system/replayer-{{obj}}.target vars: process_count: "{{ hostvars[inventory_hostname]['journal']['replayer_count'][obj] }}" - name: start {{obj}} replayer target service: name: replayer-{{obj}}.target enabled: true - state: started + state: stopped diff --git a/sysadmin/grid5000/cassandra/ansible/cassandra.yml b/sysadmin/grid5000/cassandra/ansible/cassandra.yml index 9e5da53..4abdf37 100644 --- a/sysadmin/grid5000/cassandra/ansible/cassandra.yml +++ b/sysadmin/grid5000/cassandra/ansible/cassandra.yml @@ -1,71 +1,92 @@ --- # - name: "Get public ipv4 address" # set_fact: # cassandra_seed_ips: "{{ansible_facts[item]['ipv4']['address']}}" # with_items: # - "{{cassandra_listen_interface }}" - name: Install cassandra signing key apt_key: url: https://downloads.apache.org/cassandra/KEYS state: present - name: Install cassandra apt repository apt_repository: repo: deb http://downloads.apache.org/cassandra/debian 40x main state: present filename: cassandra.sources - name: Install cassandra packages apt: update_cache: true # force an apt update before name: ## TODO: check other version than jdk11 - cassandra - dstat - facter - openjdk-11-jdk - smartmontools - tcpdump - name: install prometheus node exporter include: _install_prometheus_exporter.yml - name: Create datadirs file: state: directory path: "{{ item }}" owner: "cassandra" group: "cassandra" mode: "0755" recurse: true with_items: - "{{ cassandra_data_dir_base }}" - "{{ cassandra_data_dir_system }}" - "{{ cassandra_data_dir }}" - "{{ cassandra_commitlogs_dir }}" - name: Download prometheus jmx exporter get_url: url: https://repo1.maven.org/maven2/io/prometheus/jmx/jmx_prometheus_javaagent/0.15.0/jmx_prometheus_javaagent-0.15.0.jar dest: /opt/jmx_prometheus_javaagent-0.15.0.jar - name: Download prometheus exporter configuration get_url: url: https://raw.githubusercontent.com/prometheus/jmx_exporter/master/example_configs/cassandra.yml dest: /opt/jmx_exporter.yml - name: Configure cassandra template: src: "templates/{{item}}" dest: "{{cassandra_config_dir}}/{{item}}" with_items: [cassandra.yaml, jvm11-server.options] register: cassandra_configuration_files - name: Restart cassandra service service: name: cassandra state: restarted when: cassandra_configuration_files.changed +- name: cassandra replication configuration script + template: + src: templates/swhstorage/change-cassandra-replication.sh + dest: /usr/local/bin/change-cassandra-replication.sh + mode: 0755 + # TODO test different read ahead + +- name: Install object count exporter + copy: + src: "files/cassandra/prometheus-object-count.sh" + dest: "/usr/local/bin/prometheus-object-count.sh" + owner: root + group: root + mode: "0755" + +- name: Send object count to prometheus + cron: + name: "object count" + minute: "*" + hour: "*" + job: "/usr/local/bin/prometheus-object-count.sh > /dev/null" diff --git a/sysadmin/grid5000/cassandra/ansible/swh-storage.yml b/sysadmin/grid5000/cassandra/ansible/swh-storage.yml index dd84201..bcb8e0f 100644 --- a/sysadmin/grid5000/cassandra/ansible/swh-storage.yml +++ b/sysadmin/grid5000/cassandra/ansible/swh-storage.yml @@ -1,125 +1,128 @@ --- - name: Add Backports repository apt_repository: repo: deb http://deb.debian.org/debian/ buster-backports main contrib non-free filename: backports.sources - name: swhstorage group group: name: swhstorage - name: swhstorage user user: name: swhstorage group: swhstorage home: /var/lib/swhstorage # *big images mount homes via nfs so the user creation failed - name: Add SWH repository apt_repository: repo: deb [trusted=yes] https://debian.softwareheritage.org/ buster-swh main state: present filename: cassandra.sources - name: Install packages apt: name: - daemonize - dstat - facter - prometheus-statsd-exporter - python3 - python3-gunicorn - tcpdump - name: Install packages from backports apt: name: - python3-typing-extensions - gunicorn3 default_release: buster-backports - name: Install swh storage packages apt: name: - python3-swh.storage - python3-swh.journal - name: install prometheus node exporter include: _install_prometheus_exporter.yml - name: Create directories file: state: directory path: "{{ item }}" owner: root group: root mode: "0755" with_items: - /etc/gunicorn - /etc/gunicorn/instances - /run/gunicorn - /run/gunicorn/swh-storage - /etc/softwareheritage - /etc/softwareheritage/storage - /etc/softwareheritage/replayer - name: Create swh-storage directories file: state: directory path: "{{ item }}" owner: swhstorage group: swhstorage mode: "0755" with_items: - /run/gunicorn/swh-storage/ - /run/replayer - name: Configure gunicorn - default service template: src: "templates/gunicorn/gunicorn.service" dest: "/etc/systemd/system/gunicorn.service" - name: Configure gunicorn - log configuration template: src: "templates/gunicorn/logconfig.ini" dest: "/etc/gunicorn/logconfig.ini" - name: swh-storage gunicorn instance configuration template: src: "templates/gunicorn/gunicorn-instance.cfg" dest: "/etc/gunicorn/instances/swh-storage.cfg" - name: swh-storage configuration directories template: src: "templates/swhstorage/storage.yml" dest: "/etc/softwareheritage/storage/storage.yml" - name: swh-storage service configuration template: src: "templates/gunicorn/gunicorn-instance-service.cfg" dest: "/etc/systemd/system/gunicorn-swh-storage.service" # TODO variabilize - name: swh-storage service service: name: gunicorn-swh-storage enabled: true state: started - name: swh-storage init cassandra script template: - src: templates/swhstorage/init-cassandra.sh + src: templates/swhstorage/init-cassandra-keyspace.sh dest: /usr/local/bin/swh-storage-init-cassandra.sh mode: 0755 - name: Configure replayer services include: _configure_replayer_services.yml obj={{ item }} loop: - content - skipped_content - directory - origin - origin_visit - origin_visit_status - release - revision - snapshot + +- name: reload systemd + command: systemctl daemon-reload diff --git a/sysadmin/grid5000/cassandra/ansible/templates/swhstorage/change-cassandra-replication.sh b/sysadmin/grid5000/cassandra/ansible/templates/swhstorage/change-cassandra-replication.sh new file mode 100644 index 0000000..6f1e431 --- /dev/null +++ b/sysadmin/grid5000/cassandra/ansible/templates/swhstorage/change-cassandra-replication.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +REPLICATION_FACTOR=$1 + +if [ -z "${REPLICATION_FACTOR}" ]; then + echo "usage: $0 " + exit 1 +fi + +echo "Changing replication factor" +echo "ALTER KEYSPACE swh +WITH +replication = +{ +'class': 'SimpleStrategy', +'replication_factor': '3' +}; +" | cqlsh "$(facter networking.ip)" + +echo "Launching repair..." +nodetool repair --full diff --git a/sysadmin/grid5000/cassandra/ansible/templates/swhstorage/init-cassandra.sh b/sysadmin/grid5000/cassandra/ansible/templates/swhstorage/init-cassandra-keyspace.sh similarity index 100% rename from sysadmin/grid5000/cassandra/ansible/templates/swhstorage/init-cassandra.sh rename to sysadmin/grid5000/cassandra/ansible/templates/swhstorage/init-cassandra-keyspace.sh diff --git a/sysadmin/grid5000/cassandra/ansible/templates/swhstorage/replayer@.service b/sysadmin/grid5000/cassandra/ansible/templates/swhstorage/replayer@.service index dddb30a..fe9acd0 100644 --- a/sysadmin/grid5000/cassandra/ansible/templates/swhstorage/replayer@.service +++ b/sysadmin/grid5000/cassandra/ansible/templates/swhstorage/replayer@.service @@ -1,20 +1,20 @@ [Unit] Description=swh storage {{ item }} replayer ConditionPathExists=/etc/softwareheritage/replayer/replayer-{{ item }}.yml PartOf=replayer-{{item}}.target [Service] Type=simple User=swhstorage Group=swhstorage Environment=SWH_CONFIG_FILENAME=/etc/softwareheritage/replayer/replayer-{{ item }}.yml #Environment=SWH_LOG_TARGET=journal Environment=STATSD_PORT=9125 ExecStart=/usr/bin/swh storage replay # ExecStop=/bin/kill -TERM $MAINPID # ExecReload=/bin/kill -HUP $MAINPID -# Restart=on-failure -# RestartSec=10 +Restart=on-failure +RestartSec=10 [Install] WantedBy=multi-user.target