diff --git a/data/common/cassandra.yaml b/data/common/cassandra.yaml
index 1a5e5e26..508a2fcc 100644
--- a/data/common/cassandra.yaml
+++ b/data/common/cassandra.yaml
@@ -1,1182 +1,78 @@
-cassandra::release: 311x
-cassandra::cluster: azure
+cassandra::base_data_directory: /srv/cassandra
+cassandra::base_config_directory: /etc/cassandra
+cassandra::base_log_directory: /var/log/cassandra
 
-cassandra::exporter::version: "0.9.10"
-cassandra::exporter::listen_network: "%{lookup('internal_network')}"
-cassandra::exporter::listen_port: 9500
+cassandra::version: 4.0.5
 
 cassandra::listen_network: "%{lookup('internal_network')}"
 
-cassandra::baseline_settings:
-  # NOTE:
-  #   See http://wiki.apache.org/cassandra/StorageConfiguration for
-  #   full explanations of configuration directives
-  # /NOTE
-  
-  # This defines the number of tokens randomly assigned to this node on the ring
-  # The more tokens, relative to other nodes, the larger the proportion of data
-  # that this node will store. You probably want all nodes to have the same number
-  # of tokens assuming they have equal hardware capability.
-  #
-  # If you leave this unspecified, Cassandra will use the default of 1 token for legacy compatibility,
-  # and will use the initial_token as described below.
-  #
-  # Specifying initial_token will override this setting on the node's initial start,
-  # on subsequent starts, this setting will apply even if initial token is set.
-  #
-  # If you already have a cluster with 1 token per node, and wish to migrate to 
-  # multiple tokens per node, see http://wiki.apache.org/cassandra/Operations
-  num_tokens: 256
-  
-  # Triggers automatic allocation of num_tokens tokens for this node. The allocation
-  # algorithm attempts to choose tokens in a way that optimizes replicated load over
-  # the nodes in the datacenter for the replication strategy used by the specified
-  # keyspace.
-  #
-  # The load assigned to each node will be close to proportional to its number of
-  # vnodes.
-  #
-  # Only supported with the Murmur3Partitioner.
-  # allocate_tokens_for_keyspace: KEYSPACE
-  
-  # initial_token allows you to specify tokens manually.  While you can use it with
-  # vnodes (num_tokens > 1, above) -- in which case you should provide a 
-  # comma-separated list -- it's primarily used when adding nodes to legacy clusters 
-  # that do not have vnodes enabled.
-  # initial_token:
-  
-  # See http://wiki.apache.org/cassandra/HintedHandoff
-  # May either be "true" or "false" to enable globally
-  hinted_handoff_enabled: true
-  
-  # When hinted_handoff_enabled is true, a black list of data centers that will not
-  # perform hinted handoff
-  # hinted_handoff_disabled_datacenters:
-  #    - DC1
-  #    - DC2
-  
-  # this defines the maximum amount of time a dead host will have hints
-  # generated.  After it has been dead this long, new hints for it will not be
-  # created until it has been seen alive and gone down again.
-  max_hint_window_in_ms: 10800000 # 3 hours
-  
-  # Maximum throttle in KBs per second, per delivery thread.  This will be
-  # reduced proportionally to the number of nodes in the cluster.  (If there
-  # are two nodes in the cluster, each delivery thread will use the maximum
-  # rate; if there are three, each will throttle to half of the maximum,
-  # since we expect two nodes to be delivering hints simultaneously.)
-  hinted_handoff_throttle_in_kb: 1024
-  
-  # Number of threads with which to deliver hints;
-  # Consider increasing this number when you have multi-dc deployments, since
-  # cross-dc handoff tends to be slower
-  max_hints_delivery_threads: 2
-  
-  # How often hints should be flushed from the internal buffers to disk.
-  # Will *not* trigger fsync.
-  hints_flush_period_in_ms: 10000
-  
-  # Maximum size for a single hints file, in megabytes.
-  max_hints_file_size_in_mb: 128
-  
-  # Compression to apply to the hint files. If omitted, hints files
-  # will be written uncompressed. LZ4, Snappy, and Deflate compressors
-  # are supported.
-  #hints_compression:
-  #   - class_name: LZ4Compressor
-  #     parameters:
-  #         -
-  
-  # Maximum throttle in KBs per second, total. This will be
-  # reduced proportionally to the number of nodes in the cluster.
-  batchlog_replay_throttle_in_kb: 1024
-  
-  # Authentication backend, implementing IAuthenticator; used to identify users
-  # Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllAuthenticator,
-  # PasswordAuthenticator}.
-  #
-  # - AllowAllAuthenticator performs no checks - set it to disable authentication.
-  # - PasswordAuthenticator relies on username/password pairs to authenticate
-  #   users. It keeps usernames and hashed passwords in system_auth.roles table.
-  #   Please increase system_auth keyspace replication factor if you use this authenticator.
-  #   If using PasswordAuthenticator, CassandraRoleManager must also be used (see below)
-  authenticator: AllowAllAuthenticator
-  
-  # Authorization backend, implementing IAuthorizer; used to limit access/provide permissions
-  # Out of the box, Cassandra provides org.apache.cassandra.auth.{AllowAllAuthorizer,
-  # CassandraAuthorizer}.
-  #
-  # - AllowAllAuthorizer allows any action to any user - set it to disable authorization.
-  # - CassandraAuthorizer stores permissions in system_auth.role_permissions table. Please
-  #   increase system_auth keyspace replication factor if you use this authorizer.
-  authorizer: AllowAllAuthorizer
-  
-  # Part of the Authentication & Authorization backend, implementing IRoleManager; used
-  # to maintain grants and memberships between roles.
-  # Out of the box, Cassandra provides org.apache.cassandra.auth.CassandraRoleManager,
-  # which stores role information in the system_auth keyspace. Most functions of the
-  # IRoleManager require an authenticated login, so unless the configured IAuthenticator
-  # actually implements authentication, most of this functionality will be unavailable.
-  #
-  # - CassandraRoleManager stores role data in the system_auth keyspace. Please
-  #   increase system_auth keyspace replication factor if you use this role manager.
-  role_manager: CassandraRoleManager
-  
-  # Validity period for roles cache (fetching granted roles can be an expensive
-  # operation depending on the role manager, CassandraRoleManager is one example)
-  # Granted roles are cached for authenticated sessions in AuthenticatedUser and
-  # after the period specified here, become eligible for (async) reload.
-  # Defaults to 2000, set to 0 to disable caching entirely.
-  # Will be disabled automatically for AllowAllAuthenticator.
-  roles_validity_in_ms: 2000
-  
-  # Refresh interval for roles cache (if enabled).
-  # After this interval, cache entries become eligible for refresh. Upon next
-  # access, an async reload is scheduled and the old value returned until it
-  # completes. If roles_validity_in_ms is non-zero, then this must be
-  # also.
-  # Defaults to the same value as roles_validity_in_ms.
-  # roles_update_interval_in_ms: 2000
-  
-  # Validity period for permissions cache (fetching permissions can be an
-  # expensive operation depending on the authorizer, CassandraAuthorizer is
-  # one example). Defaults to 2000, set to 0 to disable.
-  # Will be disabled automatically for AllowAllAuthorizer.
-  permissions_validity_in_ms: 2000
-  
-  # Refresh interval for permissions cache (if enabled).
-  # After this interval, cache entries become eligible for refresh. Upon next
-  # access, an async reload is scheduled and the old value returned until it
-  # completes. If permissions_validity_in_ms is non-zero, then this must be
-  # also.
-  # Defaults to the same value as permissions_validity_in_ms.
-  # permissions_update_interval_in_ms: 2000
-  
-  # Validity period for credentials cache. This cache is tightly coupled to
-  # the provided PasswordAuthenticator implementation of IAuthenticator. If
-  # another IAuthenticator implementation is configured, this cache will not
-  # be automatically used and so the following settings will have no effect.
-  # Please note, credentials are cached in their encrypted form, so while
-  # activating this cache may reduce the number of queries made to the
-  # underlying table, it may not  bring a significant reduction in the
-  # latency of individual authentication attempts.
-  # Defaults to 2000, set to 0 to disable credentials caching.
-  credentials_validity_in_ms: 2000
-  
-  # Refresh interval for credentials cache (if enabled).
-  # After this interval, cache entries become eligible for refresh. Upon next
-  # access, an async reload is scheduled and the old value returned until it
-  # completes. If credentials_validity_in_ms is non-zero, then this must be
-  # also.
-  # Defaults to the same value as credentials_validity_in_ms.
-  # credentials_update_interval_in_ms: 2000
-  
-  # The partitioner is responsible for distributing groups of rows (by
-  # partition key) across nodes in the cluster.  You should leave this
-  # alone for new clusters.  The partitioner can NOT be changed without
-  # reloading all data, so when upgrading you should set this to the
-  # same partitioner you were already using.
-  #
-  # Besides Murmur3Partitioner, partitioners included for backwards
-  # compatibility include RandomPartitioner, ByteOrderedPartitioner, and
-  # OrderPreservingPartitioner.
-  #
-  partitioner: org.apache.cassandra.dht.Murmur3Partitioner
-  
-  # Enable / disable CDC functionality on a per-node basis. This modifies the logic used
-  # for write path allocation rejection (standard: never reject. cdc: reject Mutation
-  # containing a CDC-enabled table if at space limit in cdc_raw_directory).
-  cdc_enabled: false
-  
-  # Policy for data disk failures:
-  #
-  # die
-  #   shut down gossip and client transports and kill the JVM for any fs errors or
-  #   single-sstable errors, so the node can be replaced.
-  #
-  # stop_paranoid
-  #   shut down gossip and client transports even for single-sstable errors,
-  #   kill the JVM for errors during startup.
-  #
-  # stop
-  #   shut down gossip and client transports, leaving the node effectively dead, but
-  #   can still be inspected via JMX, kill the JVM for errors during startup.
-  #
-  # best_effort
-  #    stop using the failed disk and respond to requests based on
-  #    remaining available sstables.  This means you WILL see obsolete
-  #    data at CL.ONE!
-  #
-  # ignore
-  #    ignore fatal errors and let requests fail, as in pre-1.2 Cassandra
-  disk_failure_policy: stop
-  
-  # Policy for commit disk failures:
-  #
-  # die
-  #   shut down gossip and Thrift and kill the JVM, so the node can be replaced.
-  #
-  # stop
-  #   shut down gossip and Thrift, leaving the node effectively dead, but
-  #   can still be inspected via JMX.
-  #
-  # stop_commit
-  #   shutdown the commit log, letting writes collect but
-  #   continuing to service reads, as in pre-2.0.5 Cassandra
-  #
-  # ignore
-  #   ignore fatal errors and let the batches fail
-  commit_failure_policy: stop
-  
-  # Maximum size of the native protocol prepared statement cache
-  #
-  # Valid values are either "auto" (omitting the value) or a value greater 0.
-  #
-  # Note that specifying a too large value will result in long running GCs and possbily
-  # out-of-memory errors. Keep the value at a small fraction of the heap.
-  #
-  # If you constantly see "prepared statements discarded in the last minute because
-  # cache limit reached" messages, the first step is to investigate the root cause
-  # of these messages and check whether prepared statements are used correctly -
-  # i.e. use bind markers for variable parts.
-  #
-  # Do only change the default value, if you really have more prepared statements than
-  # fit in the cache. In most cases it is not neccessary to change this value.
-  # Constantly re-preparing statements is a performance penalty.
-  #
-  # Default value ("auto") is 1/256th of the heap or 10MB, whichever is greater
-  #prepared_statements_cache_size_mb:
-  
-  # Maximum size of the Thrift prepared statement cache
-  #
-  # If you do not use Thrift at all, it is safe to leave this value at "auto".
-  #
-  # See description of 'prepared_statements_cache_size_mb' above for more information.
-  #
-  # Default value ("auto") is 1/256th of the heap or 10MB, whichever is greater
-  #thrift_prepared_statements_cache_size_mb:
-  
-  # Maximum size of the key cache in memory.
-  #
-  # Each key cache hit saves 1 seek and each row cache hit saves 2 seeks at the
-  # minimum, sometimes more. The key cache is fairly tiny for the amount of
-  # time it saves, so it's worthwhile to use it at large numbers.
-  # The row cache saves even more time, but must contain the entire row,
-  # so it is extremely space-intensive. It's best to only use the
-  # row cache if you have hot rows or static rows.
-  #
-  # NOTE: if you reduce the size, you may not get you hottest keys loaded on startup.
-  #
-  # Default value is empty to make it "auto" (min(5% of Heap (in MB), 100MB)). Set to 0 to disable key cache.
-  #key_cache_size_in_mb:
-  
-  # Duration in seconds after which Cassandra should
-  # save the key cache. Caches are saved to saved_caches_directory as
-  # specified in this configuration file.
-  #
-  # Saved caches greatly improve cold-start speeds, and is relatively cheap in
-  # terms of I/O for the key cache. Row cache saving is much more expensive and
-  # has limited use.
-  #
-  # Default is 14400 or 4 hours.
-  key_cache_save_period: 14400
-  
-  # Number of keys from the key cache to save
-  # Disabled by default, meaning all keys are going to be saved
-  # key_cache_keys_to_save: 100
-  
-  # Row cache implementation class name. Available implementations:
-  #
-  # org.apache.cassandra.cache.OHCProvider
-  #   Fully off-heap row cache implementation (default).
-  #
-  # org.apache.cassandra.cache.SerializingCacheProvider
-  #   This is the row cache implementation availabile
-  #   in previous releases of Cassandra.
-  # row_cache_class_name: org.apache.cassandra.cache.OHCProvider
-  
-  # Maximum size of the row cache in memory.
-  # Please note that OHC cache implementation requires some additional off-heap memory to manage
-  # the map structures and some in-flight memory during operations before/after cache entries can be
-  # accounted against the cache capacity. This overhead is usually small compared to the whole capacity.
-  # Do not specify more memory that the system can afford in the worst usual situation and leave some
-  # headroom for OS block level cache. Do never allow your system to swap.
-  #
-  # Default value is 0, to disable row caching.
-  row_cache_size_in_mb: 0
-  
-  # Duration in seconds after which Cassandra should save the row cache.
-  # Caches are saved to saved_caches_directory as specified in this configuration file.
-  #
-  # Saved caches greatly improve cold-start speeds, and is relatively cheap in
-  # terms of I/O for the key cache. Row cache saving is much more expensive and
-  # has limited use.
-  #
-  # Default is 0 to disable saving the row cache.
-  row_cache_save_period: 0
-  
-  # Number of keys from the row cache to save.
-  # Specify 0 (which is the default), meaning all keys are going to be saved
-  # row_cache_keys_to_save: 100
-  
-  # Maximum size of the counter cache in memory.
-  #
-  # Counter cache helps to reduce counter locks' contention for hot counter cells.
-  # In case of RF = 1 a counter cache hit will cause Cassandra to skip the read before
-  # write entirely. With RF > 1 a counter cache hit will still help to reduce the duration
-  # of the lock hold, helping with hot counter cell updates, but will not allow skipping
-  # the read entirely. Only the local (clock, count) tuple of a counter cell is kept
-  # in memory, not the whole counter, so it's relatively cheap.
-  #
-  # NOTE: if you reduce the size, you may not get you hottest keys loaded on startup.
-  #
-  # Default value is empty to make it "auto" (min(2.5% of Heap (in MB), 50MB)). Set to 0 to disable counter cache.
-  # NOTE: if you perform counter deletes and rely on low gcgs, you should disable the counter cache.
-  #counter_cache_size_in_mb:
-  
-  # Duration in seconds after which Cassandra should
-  # save the counter cache (keys only). Caches are saved to saved_caches_directory as
-  # specified in this configuration file.
-  #
-  # Default is 7200 or 2 hours.
-  counter_cache_save_period: 7200
-  
-  # Number of keys from the counter cache to save
-  # Disabled by default, meaning all keys are going to be saved
-  # counter_cache_keys_to_save: 100
-  
-  # commitlog_sync may be either "periodic" or "batch." 
-  # 
-  # When in batch mode, Cassandra won't ack writes until the commit log
-  # has been fsynced to disk.  It will wait
-  # commitlog_sync_batch_window_in_ms milliseconds between fsyncs.
-  # This window should be kept short because the writer threads will
-  # be unable to do extra work while waiting.  (You may need to increase
-  # concurrent_writes for the same reason.)
-  #
-  # commitlog_sync: batch
-  # commitlog_sync_batch_window_in_ms: 2
-  #
-  # the other option is "periodic" where writes may be acked immediately
-  # and the CommitLog is simply synced every commitlog_sync_period_in_ms
-  # milliseconds.
-  commitlog_sync: periodic
-  commitlog_sync_period_in_ms: 10000
-  
-  # The size of the individual commitlog file segments.  A commitlog
-  # segment may be archived, deleted, or recycled once all the data
-  # in it (potentially from each columnfamily in the system) has been
-  # flushed to sstables.
-  #
-  # The default size is 32, which is almost always fine, but if you are
-  # archiving commitlog segments (see commitlog_archiving.properties),
-  # then you probably want a finer granularity of archiving; 8 or 16 MB
-  # is reasonable.
-  # Max mutation size is also configurable via max_mutation_size_in_kb setting in
-  # cassandra.yaml. The default is half the size commitlog_segment_size_in_mb * 1024.
-  # This should be positive and less than 2048.
-  #
-  # NOTE: If max_mutation_size_in_kb is set explicitly then commitlog_segment_size_in_mb must
-  # be set to at least twice the size of max_mutation_size_in_kb / 1024
-  #
-  commitlog_segment_size_in_mb: 32
-  
-  # Compression to apply to the commit log. If omitted, the commit log
-  # will be written uncompressed.  LZ4, Snappy, and Deflate compressors
-  # are supported.
-  # commitlog_compression:
-  #   - class_name: LZ4Compressor
-  #     parameters:
-  #         -
-  
-  # For workloads with more data than can fit in memory, Cassandra's
-  # bottleneck will be reads that need to fetch data from
-  # disk. "concurrent_reads" should be set to (16 * number_of_drives) in
-  # order to allow the operations to enqueue low enough in the stack
-  # that the OS and drives can reorder them. Same applies to
-  # "concurrent_counter_writes", since counter writes read the current
-  # values before incrementing and writing them back.
-  #
-  # On the other hand, since writes are almost never IO bound, the ideal
-  # number of "concurrent_writes" is dependent on the number of cores in
-  # your system; (8 * number_of_cores) is a good rule of thumb.
-  concurrent_reads: 64
-  concurrent_writes: 96
-  concurrent_counter_writes: 64
-  
-  # For materialized view writes, as there is a read involved, so this should
-  # be limited by the less of concurrent reads or concurrent writes.
-  concurrent_materialized_view_writes: 32
-  
-  # Maximum memory to use for sstable chunk cache and buffer pooling.
-  # 32MB of this are reserved for pooling buffers, the rest is used as an
-  # cache that holds uncompressed sstable chunks.
-  # Defaults to the smaller of 1/4 of heap or 512MB. This pool is allocated off-heap,
-  # so is in addition to the memory allocated for heap. The cache also has on-heap
-  # overhead which is roughly 128 bytes per chunk (i.e. 0.2% of the reserved size
-  # if the default 64k chunk size is used).
-  # Memory is only allocated when needed.
-  # file_cache_size_in_mb: 512
-  
-  # Flag indicating whether to allocate on or off heap when the sstable buffer
-  # pool is exhausted, that is when it has exceeded the maximum memory
-  # file_cache_size_in_mb, beyond which it will not cache buffers but allocate on request.
-  
-  # buffer_pool_use_heap_if_exhausted: true
-  
-  # The strategy for optimizing disk read
-  # Possible values are:
-  # ssd (for solid state disks, the default)
-  # spinning (for spinning disks)
-  # disk_optimization_strategy: ssd
-  
-  # Total permitted memory to use for memtables. Cassandra will stop
-  # accepting writes when the limit is exceeded until a flush completes,
-  # and will trigger a flush based on memtable_cleanup_threshold
-  # If omitted, Cassandra will set both to 1/4 the size of the heap.
-  # memtable_heap_space_in_mb: 2048
-  # memtable_offheap_space_in_mb: 2048
-  
-  # memtable_cleanup_threshold is deprecated. The default calculation
-  # is the only reasonable choice. See the comments on  memtable_flush_writers
-  # for more information.
-  #
-  # Ratio of occupied non-flushing memtable size to total permitted size
-  # that will trigger a flush of the largest memtable. Larger mct will
-  # mean larger flushes and hence less compaction, but also less concurrent
-  # flush activity which can make it difficult to keep your disks fed
-  # under heavy write load.
-  #
-  # memtable_cleanup_threshold defaults to 1 / (memtable_flush_writers + 1)
-  # memtable_cleanup_threshold: 0.11
-  
-  # Specify the way Cassandra allocates and manages memtable memory.
-  # Options are:
-  #
-  # heap_buffers
-  #   on heap nio buffers
-  #
-  # offheap_buffers
-  #   off heap (direct) nio buffers
-  #
-  # offheap_objects
-  #    off heap objects
-  memtable_allocation_type: heap_buffers
-  
-  # Total space to use for commit logs on disk.
-  #
-  # If space gets above this value, Cassandra will flush every dirty CF
-  # in the oldest segment and remove it.  So a small total commitlog space
-  # will tend to cause more flush activity on less-active columnfamilies.
-  #
-  # The default value is the smaller of 8192, and 1/4 of the total space
-  # of the commitlog volume.
-  #
-  # commitlog_total_space_in_mb: 8192
-  
-  # This sets the number of memtable flush writer threads per disk
-  # as well as the total number of memtables that can be flushed concurrently.
-  # These are generally a combination of compute and IO bound.
-  #
-  # Memtable flushing is more CPU efficient than memtable ingest and a single thread
-  # can keep up with the ingest rate of a whole server on a single fast disk
-  # until it temporarily becomes IO bound under contention typically with compaction.
-  # At that point you need multiple flush threads. At some point in the future
-  # it may become CPU bound all the time.
-  #
-  # You can tell if flushing is falling behind using the MemtablePool.BlockedOnAllocation
-  # metric which should be 0, but will be non-zero if threads are blocked waiting on flushing
-  # to free memory.
-  #
-  # memtable_flush_writers defaults to two for a single data directory.
-  # This means that two  memtables can be flushed concurrently to the single data directory.
-  # If you have multiple data directories the default is one memtable flushing at a time
-  # but the flush will use a thread per data directory so you will get two or more writers.
-  #
-  # Two is generally enough to flush on a fast disk [array] mounted as a single data directory.
-  # Adding more flush writers will result in smaller more frequent flushes that introduce more
-  # compaction overhead.
-  #
-  # There is a direct tradeoff between number of memtables that can be flushed concurrently
-  # and flush size and frequency. More is not better you just need enough flush writers
-  # to never stall waiting for flushing to free memory.
-  #
-  #memtable_flush_writers: 2
-  
-  # Total space to use for change-data-capture logs on disk.
-  #
-  # If space gets above this value, Cassandra will throw WriteTimeoutException
-  # on Mutations including tables with CDC enabled. A CDCCompactor is responsible
-  # for parsing the raw CDC logs and deleting them when parsing is completed.
-  #
-  # The default value is the min of 4096 mb and 1/8th of the total space
-  # of the drive where cdc_raw_directory resides.
-  # cdc_total_space_in_mb: 4096
-  
-  # When we hit our cdc_raw limit and the CDCCompactor is either running behind
-  # or experiencing backpressure, we check at the following interval to see if any
-  # new space for cdc-tracked tables has been made available. Default to 250ms
-  # cdc_free_space_check_interval_ms: 250
-  
-  # A fixed memory pool size in MB for for SSTable index summaries. If left
-  # empty, this will default to 5% of the heap size. If the memory usage of
-  # all index summaries exceeds this limit, SSTables with low read rates will
-  # shrink their index summaries in order to meet this limit.  However, this
-  # is a best-effort process. In extreme conditions Cassandra may need to use
-  # more than this amount of memory.
-  #index_summary_capacity_in_mb:
-  
-  # How frequently index summaries should be resampled.  This is done
-  # periodically to redistribute memory from the fixed-size pool to sstables
-  # proportional their recent read rates.  Setting to -1 will disable this
-  # process, leaving existing index summaries at their current sampling level.
-  index_summary_resize_interval_in_minutes: 60
-  
-  # Whether to, when doing sequential writing, fsync() at intervals in
-  # order to force the operating system to flush the dirty
-  # buffers. Enable this to avoid sudden dirty buffer flushing from
-  # impacting read latencies. Almost always a good idea on SSDs; not
-  # necessarily on platters.
-  trickle_fsync: true
-  trickle_fsync_interval_in_kb: 10240
-  
-  # TCP port, for commands and data
-  # For security reasons, you should not expose this port to the internet.  Firewall it if needed.
-  storage_port: 7000
-  
-  # SSL port, for encrypted communication.  Unused unless enabled in
-  # encryption_options
-  # For security reasons, you should not expose this port to the internet.  Firewall it if needed.
-  ssl_storage_port: 7001
-  
-  # Set listen_address OR listen_interface, not both. Interfaces must correspond
-  # to a single address, IP aliasing is not supported.
-  # listen_interface: eth0
-  
-  # If you choose to specify the interface by name and the interface has an ipv4 and an ipv6 address
-  # you can specify which should be chosen using listen_interface_prefer_ipv6. If false the first ipv4
-  # address will be used. If true the first ipv6 address will be used. Defaults to false preferring
-  # ipv4. If there is only one address it will be selected regardless of ipv4/ipv6.
-  # listen_interface_prefer_ipv6: false
-  
-  # Address to broadcast to other Cassandra nodes
-  # Leaving this blank will set it to the same value as listen_address
-  # broadcast_address: 1.2.3.4
-  
-  # When using multiple physical network interfaces, set this
-  # to true to listen on broadcast_address in addition to
-  # the listen_address, allowing nodes to communicate in both
-  # interfaces.
-  # Ignore this property if the network configuration automatically
-  # routes  between the public and private networks such as EC2.
-  # listen_on_broadcast_address: false
-  
-  # Internode authentication backend, implementing IInternodeAuthenticator;
-  # used to allow/disallow connections from peer nodes.
-  # internode_authenticator: org.apache.cassandra.auth.AllowAllInternodeAuthenticator
-  
-  # Whether to start the native transport server.
-  # Please note that the address on which the native transport is bound is the
-  # same as the rpc_address. The port however is different and specified below.
-  start_native_transport: true
-  # port for the CQL native transport to listen for clients on
-  # For security reasons, you should not expose this port to the internet.  Firewall it if needed.
+cassandra::nodes:
+  cassandra01.internal.softwareheritage.org:
+    instances:
+      instance1:
+        cluster_name: archive_production
+  cassandra02.internal.softwareheritage.org:
+    instances:
+      instance1:
+        cluster_name: archive_production
+  cassandra03.internal.softwareheritage.org:
+    instances:
+      instance1:
+        cluster_name: archive_production
+  cassandra04.internal.softwareheritage.org:
+    instances:
+      instance1:
+        cluster_name: archive_production
+  cassandra05.internal.softwareheritage.org:
+    instances:
+      instance1:
+        cluster_name: archive_production
+  cassandra06.internal.softwareheritage.org:
+    instances:
+      instance1:
+        cluster_name: archive_production
+
+cassandra::clusters:
+  archive_production:
+    seed_provider:
+      - class_name: org.apache.cassandra.locator.SimpleSeedProvider
+        parameters:
+          - seeds: "cassandra01.internal.softwareheritage.org:7000"
+
+cassandra::default_instance_configuration:
+  cluster_name: "%{lookup('cassandra::default_cluster_name')}"
+  datacenter: "%{::subnet}"
+  # rack: "%{::hostname}"
+  rack: rack1
   native_transport_port: 9042
-  # Enabling native transport encryption in client_encryption_options allows you to either use
-  # encryption for the standard port or to use a dedicated, additional port along with the unencrypted
-  # standard native_transport_port.
-  # Enabling client encryption and keeping native_transport_port_ssl disabled will use encryption
-  # for native_transport_port. Setting native_transport_port_ssl to a different value
-  # from native_transport_port will use encryption for native_transport_port_ssl while
-  # keeping native_transport_port unencrypted.
-  # native_transport_port_ssl: 9142
-  # The maximum threads for handling requests when the native transport is used.
-  # This is similar to rpc_max_threads though the default differs slightly (and
-  # there is no native_transport_min_threads, idle threads will always be stopped
-  # after 30 seconds).
-  # native_transport_max_threads: 128
-  #
-  # The maximum size of allowed frame. Frame (requests) larger than this will
-  # be rejected as invalid. The default is 256MB. If you're changing this parameter,
-  # you may want to adjust max_value_size_in_mb accordingly. This should be positive and less than 2048.
-  # native_transport_max_frame_size_in_mb: 256
-  
-  # The maximum number of concurrent client connections.
-  # The default is -1, which means unlimited.
-  # native_transport_max_concurrent_connections: -1
-  
-  # The maximum number of concurrent client connections per source ip.
-  # The default is -1, which means unlimited.
-  # native_transport_max_concurrent_connections_per_ip: -1
-  
-  # Whether to start the thrift rpc server.
-  start_rpc: false
-  
-  # Set rpc_address OR rpc_interface, not both. Interfaces must correspond
-  # to a single address, IP aliasing is not supported.
-  # rpc_interface: eth1
-  
-  # If you choose to specify the interface by name and the interface has an ipv4 and an ipv6 address
-  # you can specify which should be chosen using rpc_interface_prefer_ipv6. If false the first ipv4
-  # address will be used. If true the first ipv6 address will be used. Defaults to false preferring
-  # ipv4. If there is only one address it will be selected regardless of ipv4/ipv6.
-  # rpc_interface_prefer_ipv6: false
-  
-  # port for Thrift to listen for clients on
-  rpc_port: 9160
-  
-  # RPC address to broadcast to drivers and other Cassandra nodes. This cannot
-  # be set to 0.0.0.0. If left blank, this will be set to the value of
-  # rpc_address. If rpc_address is set to 0.0.0.0, broadcast_rpc_address must
-  # be set.
-  # broadcast_rpc_address: 1.2.3.4
-  
-  # enable or disable keepalive on rpc/native connections
-  rpc_keepalive: true
-  
-  # Cassandra provides two out-of-the-box options for the RPC Server:
-  #
-  # sync
-  #   One thread per thrift connection. For a very large number of clients, memory
-  #   will be your limiting factor. On a 64 bit JVM, 180KB is the minimum stack size
-  #   per thread, and that will correspond to your use of virtual memory (but physical memory
-  #   may be limited depending on use of stack space).
-  #
-  # hsha
-  #   Stands for "half synchronous, half asynchronous." All thrift clients are handled
-  #   asynchronously using a small number of threads that does not vary with the amount
-  #   of thrift clients (and thus scales well to many clients). The rpc requests are still
-  #   synchronous (one thread per active request). If hsha is selected then it is essential
-  #   that rpc_max_threads is changed from the default value of unlimited.
-  #
-  # The default is sync because on Windows hsha is about 30% slower.  On Linux,
-  # sync/hsha performance is about the same, with hsha of course using less memory.
-  #
-  # Alternatively,  can provide your own RPC server by providing the fully-qualified class name
-  # of an o.a.c.t.TServerFactory that can create an instance of it.
-  rpc_server_type: sync
-  
-  # Uncomment rpc_min|max_thread to set request pool size limits.
-  #
-  # Regardless of your choice of RPC server (see above), the number of maximum requests in the
-  # RPC thread pool dictates how many concurrent requests are possible (but if you are using the sync
-  # RPC server, it also dictates the number of clients that can be connected at all).
-  #
-  # The default is unlimited and thus provides no protection against clients overwhelming the server. You are
-  # encouraged to set a maximum that makes sense for you in production, but do keep in mind that
-  # rpc_max_threads represents the maximum number of client requests this server may execute concurrently.
-  #
-  # rpc_min_threads: 16
-  # rpc_max_threads: 2048
-  
-  # uncomment to set socket buffer sizes on rpc connections
-  # rpc_send_buff_size_in_bytes:
-  # rpc_recv_buff_size_in_bytes:
-  
-  # Uncomment to set socket buffer size for internode communication
-  # Note that when setting this, the buffer size is limited by net.core.wmem_max
-  # and when not setting it it is defined by net.ipv4.tcp_wmem
-  # See also:
-  # /proc/sys/net/core/wmem_max
-  # /proc/sys/net/core/rmem_max
-  # /proc/sys/net/ipv4/tcp_wmem
-  # /proc/sys/net/ipv4/tcp_wmem
-  # and 'man tcp'
-  # internode_send_buff_size_in_bytes:
-  
-  # Uncomment to set socket buffer size for internode communication
-  # Note that when setting this, the buffer size is limited by net.core.wmem_max
-  # and when not setting it it is defined by net.ipv4.tcp_wmem
-  # internode_recv_buff_size_in_bytes:
-  
-  # Frame size for thrift (maximum message length).
-  thrift_framed_transport_size_in_mb: 15
-  
-  # Set to true to have Cassandra create a hard link to each sstable
-  # flushed or streamed locally in a backups/ subdirectory of the
-  # keyspace data.  Removing these links is the operator's
-  # responsibility.
-  incremental_backups: false
-  
-  # Whether or not to take a snapshot before each compaction.  Be
-  # careful using this option, since Cassandra won't clean up the
-  # snapshots for you.  Mostly useful if you're paranoid when there
-  # is a data format change.
-  snapshot_before_compaction: false
-  
-  # Whether or not a snapshot is taken of the data before keyspace truncation
-  # or dropping of column families. The STRONGLY advised default of true 
-  # should be used to provide data safety. If you set this flag to false, you will
-  # lose data on truncation or drop.
-  auto_snapshot: true
-  
-  # Granularity of the collation index of rows within a partition.
-  # Increase if your rows are large, or if you have a very large
-  # number of rows per partition.  The competing goals are these:
-  #
-  # - a smaller granularity means more index entries are generated
-  #   and looking up rows withing the partition by collation column
-  #   is faster
-  # - but, Cassandra will keep the collation index in memory for hot
-  #   rows (as part of the key cache), so a larger granularity means
-  #   you can cache more hot rows
-  column_index_size_in_kb: 64
-  
-  # Per sstable indexed key cache entries (the collation index in memory
-  # mentioned above) exceeding this size will not be held on heap.
-  # This means that only partition information is held on heap and the
-  # index entries are read from disk.
-  #
-  # Note that this size refers to the size of the
-  # serialized index information and not the size of the partition.
-  column_index_cache_size_in_kb: 2
-  
-  # Number of simultaneous compactions to allow, NOT including
-  # validation "compactions" for anti-entropy repair.  Simultaneous
-  # compactions can help preserve read performance in a mixed read/write
-  # workload, by mitigating the tendency of small sstables to accumulate
-  # during a single long running compactions. The default is usually
-  # fine and if you experience problems with compaction running too
-  # slowly or too fast, you should look at
-  # compaction_throughput_mb_per_sec first.
-  #
-  # concurrent_compactors defaults to the smaller of (number of disks,
-  # number of cores), with a minimum of 2 and a maximum of 8.
-  # 
-  # If your data directories are backed by SSD, you should increase this
-  # to the number of cores.
-  #concurrent_compactors: 1
-  
-  # Throttles compaction to the given total throughput across the entire
-  # system. The faster you insert data, the faster you need to compact in
-  # order to keep the sstable count down, but in general, setting this to
-  # 16 to 32 times the rate you are inserting data is more than sufficient.
-  # Setting this to 0 disables throttling. Note that this account for all types
-  # of compaction, including validation compaction.
-  compaction_throughput_mb_per_sec: 16
-  
-  # When compacting, the replacement sstable(s) can be opened before they
-  # are completely written, and used in place of the prior sstables for
-  # any range that has been written. This helps to smoothly transfer reads 
-  # between the sstables, reducing page cache churn and keeping hot rows hot
-  sstable_preemptive_open_interval_in_mb: 50
-  
-  # Throttles all outbound streaming file transfers on this node to the
-  # given total throughput in Mbps. This is necessary because Cassandra does
-  # mostly sequential IO when streaming data during bootstrap or repair, which
-  # can lead to saturating the network connection and degrading rpc performance.
-  # When unset, the default is 200 Mbps or 25 MB/s.
-  # stream_throughput_outbound_megabits_per_sec: 200
-  
-  # Throttles all streaming file transfer between the datacenters,
-  # this setting allows users to throttle inter dc stream throughput in addition
-  # to throttling all network stream traffic as configured with
-  # stream_throughput_outbound_megabits_per_sec
-  # When unset, the default is 200 Mbps or 25 MB/s
-  # inter_dc_stream_throughput_outbound_megabits_per_sec: 200
-  
-  # How long the coordinator should wait for read operations to complete
+  storage_port: 7000
+  jmx_port: 7199
+  jmx_exporter_port: 7070
+
+# Reflect the base of the cassandra.yaml content
+# These list is completed and/or overridden in cassandra::instance
+cassandra::base_instance_configuration:
+  num_tokens: 16
+  allocate_tokens_for_local_replication_factor: 3
+  disk_optimization_strategy: ssd
+  concurrent_compactors: 4 # should be min(nb core, nb disks)
+  internode_compression: dc # default dc possible all|dc|none
+  concurrent_reads: 64 # 16 x number of drives
+  concurrent_writes: 128 # 8 x number of cores
+  concurrent_counter_writes: 48
+  commitlog_sync: periodic # default periodic
+  commitlog_sync_period_in_ms: 10000 # default 10000
+  commitlog_total_space_in_mb: 16384 # default 8192
+  commitlog_segment_size_in_mb: 256 # default 32 (due to oversize mutation on revision table)
+  partitioner: org.apache.cassandra.dht.Murmur3Partitioner
+  endpoint_snitch: GossipingPropertyFileSnitch
+  enable_user_defined_functions: true # needed by swh-storage
+  compaction_throughput_mb_per_sec: 160
   read_request_timeout_in_ms: 5000
-  # How long the coordinator should wait for seq or index scans to complete
   range_request_timeout_in_ms: 10000
-  # How long the coordinator should wait for writes to complete
   write_request_timeout_in_ms: 2000
-  # How long the coordinator should wait for counter writes to complete
   counter_write_request_timeout_in_ms: 5000
-  # How long a coordinator should continue to retry a CAS operation
-  # that contends with other proposals for the same row
   cas_contention_timeout_in_ms: 1000
-  # How long the coordinator should wait for truncates to complete
-  # (This can be much longer, because unless auto_snapshot is disabled
-  # we need to flush first so we can snapshot before removing the data.)
   truncate_request_timeout_in_ms: 60000
-  # The default timeout for other, miscellaneous operations
   request_timeout_in_ms: 10000
-  
-  # How long before a node logs slow queries. Select queries that take longer than
-  # this timeout to execute, will generate an aggregated log message, so that slow queries
-  # can be identified. Set this value to zero to disable slow query logging.
-  slow_query_log_timeout_in_ms: 500
-  
-  # Enable operation timeout information exchange between nodes to accurately
-  # measure request timeouts.  If disabled, replicas will assume that requests
-  # were forwarded to them instantly by the coordinator, which means that
-  # under overload conditions we will waste that much extra time processing 
-  # already-timed-out requests.
-  #
-  # Warning: before enabling this property make sure to ntp is installed
-  # and the times are synchronized between the nodes.
-  cross_node_timeout: false
-  
-  # Set keep-alive period for streaming
-  # This node will send a keep-alive message periodically with this period.
-  # If the node does not receive a keep-alive message from the peer for
-  # 2 keep-alive cycles the stream session times out and fail
-  # Default value is 300s (5 minutes), which means stalled stream
-  # times out in 10 minutes by default
-  # streaming_keep_alive_period_in_secs: 300
-  
-  # phi value that must be reached for a host to be marked down.
-  # most users should never need to adjust this.
-  # phi_convict_threshold: 8
-  
-  # endpoint_snitch -- Set this to a class that implements
-  # IEndpointSnitch.  The snitch has two functions:
-  #
-  # - it teaches Cassandra enough about your network topology to route
-  #   requests efficiently
-  # - it allows Cassandra to spread replicas around your cluster to avoid
-  #   correlated failures. It does this by grouping machines into
-  #   "datacenters" and "racks."  Cassandra will do its best not to have
-  #   more than one replica on the same "rack" (which may not actually
-  #   be a physical location)
-  #
-  # CASSANDRA WILL NOT ALLOW YOU TO SWITCH TO AN INCOMPATIBLE SNITCH
-  # ONCE DATA IS INSERTED INTO THE CLUSTER.  This would cause data loss.
-  # This means that if you start with the default SimpleSnitch, which
-  # locates every node on "rack1" in "datacenter1", your only options
-  # if you need to add another datacenter are GossipingPropertyFileSnitch
-  # (and the older PFS).  From there, if you want to migrate to an
-  # incompatible snitch like Ec2Snitch you can do it by adding new nodes
-  # under Ec2Snitch (which will locate them in a new "datacenter") and
-  # decommissioning the old ones.
-  #
-  # Out of the box, Cassandra provides:
-  #
-  # SimpleSnitch:
-  #    Treats Strategy order as proximity. This can improve cache
-  #    locality when disabling read repair.  Only appropriate for
-  #    single-datacenter deployments.
-  #
-  # GossipingPropertyFileSnitch
-  #    This should be your go-to snitch for production use.  The rack
-  #    and datacenter for the local node are defined in
-  #    cassandra-rackdc.properties and propagated to other nodes via
-  #    gossip.  If cassandra-topology.properties exists, it is used as a
-  #    fallback, allowing migration from the PropertyFileSnitch.
-  #
-  # PropertyFileSnitch:
-  #    Proximity is determined by rack and data center, which are
-  #    explicitly configured in cassandra-topology.properties.
-  #
-  # Ec2Snitch:
-  #    Appropriate for EC2 deployments in a single Region. Loads Region
-  #    and Availability Zone information from the EC2 API. The Region is
-  #    treated as the datacenter, and the Availability Zone as the rack.
-  #    Only private IPs are used, so this will not work across multiple
-  #    Regions.
-  #
-  # Ec2MultiRegionSnitch:
-  #    Uses public IPs as broadcast_address to allow cross-region
-  #    connectivity.  (Thus, you should set seed addresses to the public
-  #    IP as well.) You will need to open the storage_port or
-  #    ssl_storage_port on the public IP firewall.  (For intra-Region
-  #    traffic, Cassandra will switch to the private IP after
-  #    establishing a connection.)
-  #
-  # RackInferringSnitch:
-  #    Proximity is determined by rack and data center, which are
-  #    assumed to correspond to the 3rd and 2nd octet of each node's IP
-  #    address, respectively.  Unless this happens to match your
-  #    deployment conventions, this is best used as an example of
-  #    writing a custom Snitch class and is provided in that spirit.
-  #
-  # You can use a custom Snitch by setting this to the full class name
-  # of the snitch, which will be assumed to be on your classpath.
-  endpoint_snitch: SimpleSnitch
-  
-  # controls how often to perform the more expensive part of host score
-  # calculation
-  dynamic_snitch_update_interval_in_ms: 100 
-  # controls how often to reset all host scores, allowing a bad host to
-  # possibly recover
-  dynamic_snitch_reset_interval_in_ms: 600000
-  # if set greater than zero and read_repair_chance is < 1.0, this will allow
-  # 'pinning' of replicas to hosts in order to increase cache capacity.
-  # The badness threshold will control how much worse the pinned host has to be
-  # before the dynamic snitch will prefer other replicas over it.  This is
-  # expressed as a double which represents a percentage.  Thus, a value of
-  # 0.2 means Cassandra would continue to prefer the static snitch values
-  # until the pinned host was 20% worse than the fastest.
-  dynamic_snitch_badness_threshold: 0.1
-  
-  # request_scheduler -- Set this to a class that implements
-  # RequestScheduler, which will schedule incoming client requests
-  # according to the specific policy. This is useful for multi-tenancy
-  # with a single Cassandra cluster.
-  # NOTE: This is specifically for requests from the client and does
-  # not affect inter node communication.
-  # org.apache.cassandra.scheduler.NoScheduler - No scheduling takes place
-  # org.apache.cassandra.scheduler.RoundRobinScheduler - Round robin of
-  # client requests to a node with a separate queue for each
-  # request_scheduler_id. The scheduler is further customized by
-  # request_scheduler_options as described below.
-  request_scheduler: org.apache.cassandra.scheduler.NoScheduler
-  
-  # Scheduler Options vary based on the type of scheduler
-  #
-  # NoScheduler
-  #   Has no options
-  #
-  # RoundRobin
-  #   throttle_limit
-  #     The throttle_limit is the number of in-flight
-  #     requests per client.  Requests beyond 
-  #     that limit are queued up until
-  #     running requests can complete.
-  #     The value of 80 here is twice the number of
-  #     concurrent_reads + concurrent_writes.
-  #   default_weight
-  #     default_weight is optional and allows for
-  #     overriding the default which is 1.
-  #   weights
-  #     Weights are optional and will default to 1 or the
-  #     overridden default_weight. The weight translates into how
-  #     many requests are handled during each turn of the
-  #     RoundRobin, based on the scheduler id.
-  #
-  # request_scheduler_options:
-  #    throttle_limit: 80
-  #    default_weight: 5
-  #    weights:
-  #      Keyspace1: 1
-  #      Keyspace2: 5
-  
-  # request_scheduler_id -- An identifier based on which to perform
-  # the request scheduling. Currently the only valid option is keyspace.
-  # request_scheduler_id: keyspace
-  
-  # Enable or disable inter-node encryption
-  # JVM defaults for supported SSL socket protocols and cipher suites can
-  # be replaced using custom encryption options. This is not recommended
-  # unless you have policies in place that dictate certain settings, or
-  # need to disable vulnerable ciphers or protocols in case the JVM cannot
-  # be updated.
-  # FIPS compliant settings can be configured at JVM level and should not
-  # involve changing encryption settings here:
-  # https://docs.oracle.com/javase/8/docs/technotes/guides/security/jsse/FIPS.html
-  # *NOTE* No custom encryption options are enabled at the moment
-  # The available internode options are : all, none, dc, rack
-  #
-  # If set to dc cassandra will encrypt the traffic between the DCs
-  # If set to rack cassandra will encrypt the traffic between the racks
-  #
-  # The passwords used in these options must match the passwords used when generating
-  # the keystore and truststore.  For instructions on generating these files, see:
-  # http://download.oracle.com/javase/6/docs/technotes/guides/security/jsse/JSSERefGuide.html#CreateKeystore
-  #
-  server_encryption_options:
-      internode_encryption: none
-      keystore: conf/.keystore
-      keystore_password: cassandra
-      truststore: conf/.truststore
-      truststore_password: cassandra
-      # More advanced defaults below:
-      # protocol: TLS
-      # algorithm: SunX509
-      # store_type: JKS
-      # cipher_suites: [TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA,TLS_DHE_RSA_WITH_AES_128_CBC_SHA,TLS_DHE_RSA_WITH_AES_256_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA]
-      # require_client_auth: false
-      # require_endpoint_verification: false
-  
-  # enable or disable client/server encryption.
-  client_encryption_options:
-      enabled: false
-      # If enabled and optional is set to true encrypted and unencrypted connections are handled.
-      optional: false
-      keystore: conf/.keystore
-      keystore_password: cassandra
-      # require_client_auth: false
-      # Set trustore and truststore_password if require_client_auth is true
-      # truststore: conf/.truststore
-      # truststore_password: cassandra
-      # More advanced defaults below:
-      # protocol: TLS
-      # algorithm: SunX509
-      # store_type: JKS
-      # cipher_suites: [TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA,TLS_DHE_RSA_WITH_AES_128_CBC_SHA,TLS_DHE_RSA_WITH_AES_256_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA]
-  
-  # internode_compression controls whether traffic between nodes is
-  # compressed.
-  # Can be:
-  #
-  # all
-  #   all traffic is compressed
-  #
-  # dc
-  #   traffic between different datacenters is compressed
-  #
-  # none
-  #   nothing is compressed.
-  internode_compression: dc
-  
-  # Enable or disable tcp_nodelay for inter-dc communication.
-  # Disabling it will result in larger (but fewer) network packets being sent,
-  # reducing overhead from the TCP protocol itself, at the cost of increasing
-  # latency if you block for cross-datacenter responses.
-  inter_dc_tcp_nodelay: false
-  
-  # TTL for different trace types used during logging of the repair process.
-  tracetype_query_ttl: 86400
-  tracetype_repair_ttl: 604800
-  
-  # By default, Cassandra logs GC Pauses greater than 200 ms at INFO level
-  # This threshold can be adjusted to minimize logging if necessary
-  # gc_log_threshold_in_ms: 200
-  
-  # If unset, all GC Pauses greater than gc_log_threshold_in_ms will log at
-  # INFO level
-  # UDFs (user defined functions) are disabled by default.
-  # As of Cassandra 3.0 there is a sandbox in place that should prevent execution of evil code.
-  enable_user_defined_functions: true
-  
-  # Enables scripted UDFs (JavaScript UDFs).
-  # Java UDFs are always enabled, if enable_user_defined_functions is true.
-  # Enable this option to be able to use UDFs with "language javascript" or any custom JSR-223 provider.
-  # This option has no effect, if enable_user_defined_functions is false.
-  enable_scripted_user_defined_functions: false
-  
-  # Enables materialized view creation on this node.
-  # Materialized views are considered experimental and are not recommended for production use.
-  enable_materialized_views: true
-  
-  # The default Windows kernel timer and scheduling resolution is 15.6ms for power conservation.
-  # Lowering this value on Windows can provide much tighter latency and better throughput, however
-  # some virtualized environments may see a negative performance impact from changing this setting
-  # below their system default. The sysinternals 'clockres' tool can confirm your system's default
-  # setting.
-  windows_timer_interval: 1
-  
-  
-  # Enables encrypting data at-rest (on disk). Different key providers can be plugged in, but the default reads from
-  # a JCE-style keystore. A single keystore can hold multiple keys, but the one referenced by
-  # the "key_alias" is the only key that will be used for encrypt opertaions; previously used keys
-  # can still (and should!) be in the keystore and will be used on decrypt operations
-  # (to handle the case of key rotation).
-  #
-  # It is strongly recommended to download and install Java Cryptography Extension (JCE)
-  # Unlimited Strength Jurisdiction Policy Files for your version of the JDK.
-  # (current link: http://www.oracle.com/technetwork/java/javase/downloads/jce8-download-2133166.html)
-  #
-  # Currently, only the following file types are supported for transparent data encryption, although
-  # more are coming in future cassandra releases: commitlog, hints
-  transparent_data_encryption_options:
-      enabled: false
-      chunk_length_kb: 64
-      cipher: AES/CBC/PKCS5Padding
-      key_alias: testing:1
-      # CBC IV length for AES needs to be 16 bytes (which is also the default size)
-      # iv_length: 16
-      key_provider: 
-        - class_name: org.apache.cassandra.security.JKSKeyProvider
-          parameters: 
-            - keystore: conf/.keystore
-              keystore_password: cassandra
-              store_type: JCEKS
-              key_password: cassandra
-  
-  
-  #####################
-  # SAFETY THRESHOLDS #
-  #####################
-  
-  # When executing a scan, within or across a partition, we need to keep the
-  # tombstones seen in memory so we can return them to the coordinator, which
-  # will use them to make sure other replicas also know about the deleted rows.
-  # With workloads that generate a lot of tombstones, this can cause performance
-  # problems and even exaust the server heap.
-  # (http://www.datastax.com/dev/blog/cassandra-anti-patterns-queues-and-queue-like-datasets)
-  # Adjust the thresholds here if you understand the dangers and want to
-  # scan more tombstones anyway.  These thresholds may also be adjusted at runtime
-  # using the StorageService mbean.
-  tombstone_warn_threshold: 1000
-  tombstone_failure_threshold: 100000
-  
-  # Log WARN on any multiple-partition batch size exceeding this value. 5kb per batch by default.
-  # Caution should be taken on increasing the size of this threshold as it can lead to node instability.
-  batch_size_warn_threshold_in_kb: 5
-  
-  # Fail any multiple-partition batch exceeding this value. 50kb (10x warn threshold) by default.
-  batch_size_fail_threshold_in_kb: 50
-  
-  # Log WARN on any batches not of type LOGGED than span across more partitions than this limit
-  unlogged_batch_across_partitions_warn_threshold: 10
-  
-  # Log a warning when compacting partitions larger than this value
-  compaction_large_partition_warning_threshold_mb: 100
-  
-  # GC Pauses greater than gc_warn_threshold_in_ms will be logged at WARN level
-  # Adjust the threshold based on your application throughput requirement
-  # By default, Cassandra logs GC Pauses greater than 200 ms at INFO level
-  gc_warn_threshold_in_ms: 1000
-  
-  # Maximum size of any value in SSTables. Safety measure to detect SSTable corruption
-  # early. Any value size larger than this threshold will result into marking an SSTable
-  # as corrupted. This should be positive and less than 2048.
-  # max_value_size_in_mb: 256
-  
-  # Back-pressure settings #
-  # If enabled, the coordinator will apply the back-pressure strategy specified below to each mutation
-  # sent to replicas, with the aim of reducing pressure on overloaded replicas.
-  back_pressure_enabled: false
-  # The back-pressure strategy applied.
-  # The default implementation, RateBasedBackPressure, takes three arguments:
-  # high ratio, factor, and flow type, and uses the ratio between incoming mutation responses and outgoing mutation requests.
-  # If below high ratio, outgoing mutations are rate limited according to the incoming rate decreased by the given factor;
-  # if above high ratio, the rate limiting is increased by the given factor;
-  # such factor is usually best configured between 1 and 10, use larger values for a faster recovery
-  # at the expense of potentially more dropped mutations;
-  # the rate limiting is applied according to the flow type: if FAST, it's rate limited at the speed of the fastest replica,
-  # if SLOW at the speed of the slowest one.
-  # New strategies can be added. Implementors need to implement org.apache.cassandra.net.BackpressureStrategy and
-  # provide a public constructor accepting a Map<String, Object>.
-  back_pressure_strategy:
-      - class_name: org.apache.cassandra.net.RateBasedBackPressure
-        parameters:
-          - high_ratio: 0.90
-            factor: 5
-            flow: FAST
-  
-  # Coalescing Strategies #
-  # Coalescing multiples messages turns out to significantly boost message processing throughput (think doubling or more).
-  # On bare metal, the floor for packet processing throughput is high enough that many applications won't notice, but in
-  # virtualized environments, the point at which an application can be bound by network packet processing can be
-  # surprisingly low compared to the throughput of task processing that is possible inside a VM. It's not that bare metal
-  # doesn't benefit from coalescing messages, it's that the number of packets a bare metal network interface can process
-  # is sufficient for many applications such that no load starvation is experienced even without coalescing.
-  # There are other benefits to coalescing network messages that are harder to isolate with a simple metric like messages
-  # per second. By coalescing multiple tasks together, a network thread can process multiple messages for the cost of one
-  # trip to read from a socket, and all the task submission work can be done at the same time reducing context switching
-  # and increasing cache friendliness of network message processing.
-  # See CASSANDRA-8692 for details.
-  
-  # Strategy to use for coalescing messages in OutboundTcpConnection.
-  # Can be fixed, movingaverage, timehorizon, disabled (default).
-  # You can also specify a subclass of CoalescingStrategies.CoalescingStrategy by name.
-  # otc_coalescing_strategy: DISABLED
-  
-  # How many microseconds to wait for coalescing. For fixed strategy this is the amount of time after the first
-  # message is received before it will be sent with any accompanying messages. For moving average this is the
-  # maximum amount of time that will be waited as well as the interval at which messages must arrive on average
-  # for coalescing to be enabled.
-  # otc_coalescing_window_us: 200
-  
-  # Do not try to coalesce messages if we already got that many messages. This should be more than 2 and less than 128.
-  # otc_coalescing_enough_coalesced_messages: 8
-  
-  # How many milliseconds to wait between two expiration runs on the backlog (queue) of the OutboundTcpConnection.
-  # Expiration is done if messages are piling up in the backlog. Droppable messages are expired to free the memory
-  # taken by expired messages. The interval should be between 0 and 1000, and in most installations the default value
-  # will be appropriate. A smaller value could potentially expire messages slightly sooner at the expense of more CPU
-  # time and queue contention while iterating the backlog of messages.
-  # An interval of 0 disables any wait time, which is the behavior of former Cassandra versions.
-  #
-  # otc_backlog_expiration_interval_ms: 200
-  
-cassandra::clusters:
-  azure:
-    cluster_name: SWH on Azure
-    seed_provider:
-      - class_name: org.apache.cassandra.locator.SimpleSeedProvider
-        parameters:
-          - seeds: 192.168.200.27  # cassandra01.euwest.azure.internal.softwareheritage.org
+  slow_query_log_timeout_in_ms: 1000
diff --git a/data/deployments/production/common.yaml b/data/deployments/production/common.yaml
index e3748902..87f1692b 100644
--- a/data/deployments/production/common.yaml
+++ b/data/deployments/production/common.yaml
@@ -1,26 +1,28 @@
 swh::deploy::deposit::reverse_proxy::backend_http_host: "::1"
 swh::deploy::webapp::reverse_proxy::backend_http_host: "::1"
 
 elasticsearch::config::cluster::name: swh-logging-prod
 elasticsearch::config::discovery::seed_hosts:
   - esnode1.internal.softwareheritage.org
   - esnode2.internal.softwareheritage.org
   - esnode3.internal.softwareheritage.org
 elasticsearch::config::cluster::initial_master_nodes:
   - esnode1
   - esnode2
   - esnode3
 
 elasticsearch::jvm_options::heap_size: 16g
 
 swh::postgresql::shared_buffers: 4GB
 
 swh::deploy::webapp::url: "https://archive.softwareheritage.org"
 swh::deploy::webapp::inbound_email::domain: "%{lookup('swh::deploy::webapp::inbound_email::production::domain')}"
 
 # e2e checks on vault
 swh::deploy::vault::e2e::storage: "http://saam.internal.softwareheritage.org:%{hiera('swh::remote_service::storage::port')}"
 
 swh::deploy::worker::loader_git::concurrency: 4
 
 memcached::server::max_memory: 1224
+
+cassandra::default_cluster_name: archive_production
diff --git a/data/deployments/staging/common.yaml b/data/deployments/staging/common.yaml
index edca5f1d..4e3be3b6 100644
--- a/data/deployments/staging/common.yaml
+++ b/data/deployments/staging/common.yaml
@@ -1,391 +1,393 @@
 ---
 swh::deploy::environment: staging
 dns::search_domains:
   - internal.staging.swh.network
 
 swh::deploy::storage::db::host: db1.internal.staging.swh.network
 swh::deploy::storage::db::user: swh
 swh::deploy::storage::db::dbname: swh
 
 swh::deploy::indexer::storage::db::host: db1.internal.staging.swh.network
 swh::deploy::indexer::storage::db::user: swh-indexer
 swh::deploy::indexer::storage::db::dbname: swh-indexer
 
 swh::deploy::scheduler::db::host: db1.internal.staging.swh.network
 swh::deploy::scheduler::db::dbname: swh-scheduler
 swh::deploy::scheduler::db::user: swh-scheduler
 
 swh::deploy::deposit::db::host: db1.internal.staging.swh.network
 swh::deploy::deposit::db::dbuser: swh-deposit
 swh::deploy::deposit::db::dbname: swh-deposit
 
 swh::deploy::vault::db::host: db1.internal.staging.swh.network
 swh::deploy::vault::db::user: swh-vault
 swh::deploy::vault::db::dbname: swh-vault
 
 swh::deploy::worker::lister::db::host: db1.internal.staging.swh.network
 swh::deploy::worker::lister::db::user: swh-lister
 swh::deploy::worker::lister::db::name: swh-lister
 
 swh::deploy::scrubber::db::host: db1.internal.staging.swh.network
 
 swh::deploy::webapp::db::host: db1.internal.staging.swh.network
 # swh::deploy::webapp::db::password in private data
 swh::deploy::webapp::inbound_email::domain: "%{lookup('swh::deploy::webapp::inbound_email::staging::domain')}"
 
 swh::deploy::worker::instances:
   - indexer_content_mimetype
   - indexer_fossology_license
   - indexer_origin_intrinsic_metadata
   - indexer_origin_extrinsic_metadata
   - checker_deposit
   - loader_archive
   - loader_bzr
   - loader_cran
   - loader_debian
   - loader_deposit
   - loader_nixguix
   - loader_git
   - loader_mercurial
   - loader_npm
   - loader_pypi
   - loader_svn
   - vault_cooker
   - lister
   - loader_high_priority
   - loader_opam
   # only in staging
   - loader_cvs
   - loader_maven
 
 swh::deploy::lister::queues:
   - swh.lister.bitbucket.tasks.IncrementalBitBucketLister
   - swh.lister.bitbucket.tasks.FullBitBucketRelister
   - swh.lister.cgit.tasks.CGitListerTask
   - swh.lister.cran.tasks.CRANListerTask
   - swh.lister.debian.tasks.DebianListerTask
   - swh.lister.gitea.tasks.IncrementalGiteaLister
   - swh.lister.gitea.tasks.RangeGiteaLister
   - swh.lister.gitea.tasks.FullGiteaRelister
   - swh.lister.gitlab.tasks.IncrementalGitLabLister
   - swh.lister.gitlab.tasks.RangeGitLabLister
   - swh.lister.gitlab.tasks.FullGitLabRelister
   - swh.lister.gnu.tasks.GNUListerTask
   - swh.lister.launchpad.tasks.FullLaunchpadLister
   - swh.lister.launchpad.tasks.IncrementalLaunchpadLister
   - swh.lister.opam.tasks.OpamListerTask
   - swh.lister.npm.tasks.NpmListerTask
   - swh.lister.phabricator.tasks.FullPhabricatorLister
   - swh.lister.pypi.tasks.PyPIListerTask
   - swh.lister.sourceforge.tasks.FullSourceForgeLister
   - swh.lister.sourceforge.tasks.IncrementalSourceForgeLister
   # extra queues
   - swh.lister.maven.tasks.FullMavenLister
   - swh.lister.maven.tasks.IncrementalMavenLister
 
 #### Rabbitmq instance to use
 # swh::deploy::worker::task_broker::password in private data
 swh::deploy::worker::task_broker: "amqp://swhconsumer:%{hiera('swh::deploy::worker::task_broker::password')}@scheduler0.internal.staging.swh.network:5672/%2f"
 
 #### Storage/Indexer/Vault/Scheduler services to use in staging area
 
 swh::remote_service::storage0::url: "http://storage1.internal.staging.swh.network:%{hiera('swh::remote_service::storage::port')}/"
 
 swh::remote_service::storage::config::storage0:
   cls: remote
   url: "%{alias('swh::remote_service::storage0::url')}"
 swh::remote_service::storage::config: "%{alias('swh::remote_service::storage::config::storage0')}"
 swh::remote_service::storage::config::writable: &swh_remote_service_storage_config_writable
   "%{alias('swh::remote_service::storage::config::storage0')}"
 
 swh::remote_service::vault::config::vault0:
   cls: remote
   url: "http://vault.internal.staging.swh.network:%{hiera('swh::remote_service::vault::port')}/"
 swh::remote_service::vault::config: "%{alias('swh::remote_service::vault::config::vault0')}"
 swh::remote_service::vault::config::writable: "%{alias('swh::remote_service::vault::config::vault0')}"
 
 swh::remote_service::indexer::config::storage0:
   cls: remote
   url: "http://storage1.internal.staging.swh.network:%{hiera('swh::remote_service::indexer::port')}/"
 swh::remote_service::indexer::config: "%{alias('swh::remote_service::indexer::config::storage0')}"
 swh::remote_service::indexer::config::writable: "%{alias('swh::remote_service::indexer::config::storage0')}"
 
 swh::remote_service::scheduler::config::scheduler0:
   cls: remote
   url: "http://scheduler0.internal.staging.swh.network:%{hiera('swh::remote_service::scheduler::port')}/"
 
 swh::remote_service::scheduler::config: "%{alias('swh::remote_service::scheduler::config::scheduler0')}"
 swh::remote_service::scheduler::config::writable: "%{alias('swh::remote_service::scheduler::config::scheduler0')}"
 
 swh::remote_service::counters::url: "http://counters0.internal.staging.swh.network:%{hiera('swh::remote_service::counters::port')}/"
 
 swh::deploy::deposit::url: https://deposit.staging.swh.network
 swh::deploy::deposit::internal_url: "https://deposit-rp.internal.staging.swh.network"
 
 # do not save pack
 swh::deploy::worker::loader_git::save_data_path: ""
 swh::deploy::worker::loader_git::concurrency: 1
 
 zookeeper::clusters:
   rocquencourt_staging:
     '2': storage1.internal.staging.swh.network
 
 kafka::broker::heap_opts: "-Xmx3G -Xms3G"
 
 swh::deploy::journal::brokers:
   - journal1.internal.staging.swh.network
 
 swh::deploy::deposit::vhost::letsencrypt_cert: deposit_staging
 swh::deploy::deposit::reverse_proxy::backend_http_host: deposit.internal.staging.swh.network
 
 swh::deploy::webapp::vhost::letsencrypt_cert: archive_staging
 swh::deploy::webapp::reverse_proxy::backend_http_host: webapp.internal.staging.swh.network
 
 swh::deploy::graphql::vhost::letsencrypt_cert: graphql_staging
 swh::deploy::graphql::vhost::ssl_protocol: "%{hiera('apache::ssl_protocol')}"
 swh::deploy::graphql::vhost::ssl_honorcipherorder: "%{hiera('apache::ssl_honorcipherorder')}"
 swh::deploy::graphql::vhost::ssl_cipher: "%{hiera('apache::ssl_cipher')}"
 swh::deploy::graphql::vhost::hsts_header: "%{hiera('apache::hsts_header')}"
 swh::deploy::graphql::vhost::access_log_format: combined_with_duration
 swh::deploy::graphql::icinga_check_string: 'GraphQL'
 
 swh::deploy::graphql::reverse_proxy::backend_http_host: graphql-worker0.internal.staging.swh.network
 swh::deploy::graphql::reverse_proxy::backend_http_port: "80"
 
 # swh::deploy::graphql::reverse_proxy::basic_auth::swh-stg in private data
 swh::deploy::graphql::reverse_proxy::basic_auth::users:
   - swh-stg
 
 swh::remote_service::objstorage::config::rw:
   cls: remote
   url: "http://storage1.internal.staging.swh.network:%{hiera('swh::remote_service::objstorage::port')}/"
 
 swh::remote_service::objstorage::config::ro:
   cls: filtered
   storage_conf: "%{alias('swh::remote_service::objstorage::config::rw')}"
   filters_conf:
     - type: readonly
 
 swh::deploy::objstorage::vhost::letsencrypt_cert: objstorage_staging
 swh::deploy::objstorage::reverse_proxy::backend_http_host: objstorage0.internal.staging.swh.network
 swh::deploy::objstorage::reverse_proxy::basic_auth::users:
   - swh-stg
   - enea-stg
   - snyk-stg-01
 
 swh::deploy::objstorage::backend::public_server_name:
   objstorage.staging.swh.network
   objstorage.internal.staging.swh.network
 
 swh::remote_service::objstorage::config: "%{alias('swh::remote_service::objstorage::config::ro')}"
 swh::remote_service::objstorage::config::writable: "%{alias('swh::remote_service::objstorage::config::rw')}"
 
 swh::deploy::objstorage::backend::server_names:
   - "%{alias('swh::deploy::objstorage::backend::public_server_name')}"
   - "%{::swh_hostname.internal_fqdn}"
   - "%{::hostname}"
   - 127.0.0.1
   - localhost
   - "::1"
 
 swh::deploy::reverse_proxy::services:
   - deposit
   - webapp
   - objstorage
   - graphql
 
 swh::postgresql::version: '12'
 swh::postgresql::port: 5433
 swh::postgresql::cluster_name: "%{lookup('swh::postgresql::version')}/main"
 swh::postgresql::datadir_base: "%{lookup('swh::base_directory')}/postgres"
 swh::postgresql::datadir: "%{lookup('swh::postgresql::datadir_base')}/%{lookup('swh::postgresql::cluster_name')}"
 swh::postgresql::listen_addresses:
   - 0.0.0.0
 swh::postgresql::network_accesses:
   - 192.168.100.0/24 # Monitoring
   - 192.168.130.0/24 # Staging services
 
 swh::postgresql::shared_buffers: 32GB
 
 postgresql::server::config_entries:
   shared_buffers: "%{alias('swh::postgresql::shared_buffers')}"
   cluster_name: "%{alias('swh::postgresql::cluster_name')}"
 
 swh::dbs:
   storage:
     name: swh
     user: swh
   scheduler:
     name: swh-scheduler
     user: swh-scheduler
   vault:
     name: swh-vault
     user: swh-vault
   lister:
     name: swh-lister
     user: swh-lister
   deposit:
     name: swh-deposit
     user: swh-deposit
   indexer::storage:
     name: swh-indexer
     user: swh-indexer
   webapp:
     name: swh-web
     user: swh-web
   scrubber:
     name: swh-scrubber
     user: swh-scrubber
   mirror:
     name: swh-mirror
     user: swh-mirror
     password: "%{lookup('swh::deploy::mirror::db::password')}"
 
 pgbouncer::auth_hba_file: "/etc/postgresql/%{lookup('swh::postgresql::cluster_name')}/pg_hba.conf"
 pgbouncer::common::listen_addresses:
   - 0.0.0.0
 pgbouncer::databases:
   - source_db: swh
     host: localhost
     auth_user: postgres
     port: 5433
     alias: staging-swh
   - source_db: swh-scheduler
     host: localhost
     auth_user: postgres
     port: 5433
     alias: staging-swh-scheduler
   - source_db: swh-vault
     host: localhost
     auth_user: postgres
     port: 5433
     alias: staging-swh-vault
   - source_db: swh-lister
     host: localhost
     auth_user: postgres
     port: 5433
     alias: staging-swh-lister
   - source_db: swh-deposit
     host: localhost
     auth_user: postgres
     port: 5433
     alias: staging-swh-deposit
   - source_db: swh-indexer
     host: localhost
     auth_user: postgres
     port: 5433
     alias: staging-swh-indexer
   - source_db: swh-web
     host: localhost
     auth_user: postgres
     port: 5433
     alias: staging-swh-web
   - source_db: swh-mirror
     host: localhost
     auth_user: postgres
     port: 5433
     alias: swh-mirror
   - source_db: swh-scrubber
     host: localhost
     auth_user: postgres
     port: 5433
     alias: staging-swh-scrubber
 
 # open objstorage api
 swh::deploy::objstorage::backend::listen::host: 0.0.0.0
 swh::deploy::objstorage::backend::workers: 16
 swh::deploy::objstorage::directory: "%{hiera('swh::deploy::storage::directory')}"
 swh::deploy::objstorage::slicing: 0:1/1:5
 
 # Deploy the storage server as a public resource
 swh::deploy::storage::backend::listen::host: 0.0.0.0
 swh::deploy::storage::backend::workers: 4
 swh::deploy::storage::backend::max_requests: 100
 swh::deploy::storage::backend::max_requests_jitter: 10
 
 # Deploy the indexer storage server as a public resource
 swh::deploy::indexer::storage::backend::listen::host: 0.0.0.0
 swh::deploy::indexer::storage::backend::workers: 4
 
 nginx::worker_processes: 4
 
 ## Reverse-proxy and frontend
 
 hitch::frontend: "[*]:443"
 hitch::proxy_support: true
 
 varnish::http_port: 80
 
 apache::http_port: 9080
 # Disable default vhost on port 80
 apache::default_vhost: false
 
 # Elasticsearch
 elasticsearch::config::cluster::name: swh-search
 
 elasticsearch::config::discovery::seed_hosts:
   - search-esnode0.internal.staging.swh.network
 elasticsearch::config::cluster::initial_master_nodes:
   - search-esnode0
 
 elasticsearch::jvm_options::heap_size: 16g
 elasticsearch::config::prometheus::indices: true
 
 swh::elasticsearch::search_nodes:
   - host: search-esnode0.internal.staging.swh.network
     port: 9200
 
 swh::deploy::search::journal_client::service_types:
   - objects
   - indexed
 swh::deploy::search::journal_client::objects::consumer_group: swh.search.journal_client-v0.11
 swh::deploy::search::journal_client::indexed::consumer_group: swh.search.journal_client.indexed-v0.11
 
 swh::deploy::webapp::url: "https://webapp.staging.swh.network"
 
 swh::deploy::vault::e2e::storage: "%{alias('swh::remote_service::storage0::url')}"
 
 swh::config::keycloak::realm_name: SoftwareHeritageStaging
 
 # No historical file on staging
 swh::deploy::counters::cache_static_file:
 swh::deploy::counters::live_data_start: 1609462861 # 2021-01-01
 
 swh::deploy::webapp::snapshot_e2e:
   uri: '/browse/snapshot/48dcf76ec1a3bd57ec117b1dace633691fdfd70d/branches/'
   regexp:
     - 'refs/tags/syslinux-3.20-pre2.*refs/tags/syslinux-3.20-pre3.*'
 
 swh::deploy::worker::loader_high_priority::queues:
   # bzr
   - save_code_now:swh.loader.bzr.tasks.LoadBazaar
   # cvs
   - save_code_now:swh.loader.cvs.tasks.LoadCvsRepository
   # git
   - save_code_now:swh.loader.git.tasks.UpdateGitRepository
   # mercurial
   - save_code_now:swh.loader.mercurial.tasks.LoadMercurial
   - save_code_now:swh.loader.mercurial.tasks.LoadArchiveMercurial
   # svn
   - save_code_now:swh.loader.svn.tasks.LoadSvnRepository
   - save_code_now:swh.loader.svn.tasks.MountAndLoadSvnRepository
   - save_code_now:swh.loader.svn.tasks.DumpMountAndLoadSvnRepository
   # archives
   - save_code_now:swh.loader.package.archive.tasks.LoadArchive
 
 swh::deploy::scheduler::swh-scheduler-runner-priority::config::task_types:
   - load-bzr
   - load-cvs
   - load-git
   - load-svn
   - load-archive-files
   - load-hg
 
 syncoid::public_keys::storage1:
   type: ssh-ed25519
   key: "AAAAC3NzaC1lZDI1NTE5AAAAIB0y7dvB0cBluC+Dy+w51P6JCbB18whd/IekP5148XsS"
 syncoid::public_keys::db1:
   type: ssh-ed25519
   key: "AAAAC3NzaC1lZDI1NTE5AAAAILRVodfvLudSiOdWOPDSoN5MIwZPbyZAyClfr/SQUK4w"
 
 swh::deploy::maven_index_exporter::url: maven-exporter.internal.staging.swh.network
+
+cassandra::default_cluster_name: archive_staging
diff --git a/site-modules/profile/files/cassandra/99-cassandra.rules b/site-modules/profile/files/cassandra/99-cassandra.rules
deleted file mode 100644
index 04aae5db..00000000
--- a/site-modules/profile/files/cassandra/99-cassandra.rules
+++ /dev/null
@@ -1,7 +0,0 @@
-# Readahead configuration for cassandra devices.
-#
-# https://docs.microsoft.com/en-us/azure/architecture/best-practices/cassandra#linux-read-ahead
-#
-# Managed by Puppet (class profile::cassandra::node), changes will be lost.
-
-ACTION=="add|change", KERNEL=="sd[c-z]|md*", ATTR{queue/rotational}="0", ATTR{bdi/read_ahead_kb}="8"
diff --git a/site-modules/profile/files/cassandra/jvm.options b/site-modules/profile/files/cassandra/jvm.options
deleted file mode 100644
index 01bb1685..00000000
--- a/site-modules/profile/files/cassandra/jvm.options
+++ /dev/null
@@ -1,256 +0,0 @@
-###########################################################################
-#                             jvm.options                                 #
-#                                                                         #
-# - all flags defined here will be used by cassandra to startup the JVM   #
-# - one flag should be specified per line                                 #
-# - lines that do not start with '-' will be ignored                      #
-# - only static flags are accepted (no variables or parameters)           #
-# - dynamic flags will be appended to these on cassandra-env              #
-###########################################################################
-
-######################
-# STARTUP PARAMETERS #
-######################
-
-# Uncomment any of the following properties to enable specific startup parameters
-
-# In a multi-instance deployment, multiple Cassandra instances will independently assume that all
-# CPU processors are available to it. This setting allows you to specify a smaller set of processors
-# and perhaps have affinity.
-#-Dcassandra.available_processors=number_of_processors
-
-# The directory location of the cassandra.yaml file.
-#-Dcassandra.config=directory
-
-# Sets the initial partitioner token for a node the first time the node is started.
-#-Dcassandra.initial_token=token
-
-# Set to false to start Cassandra on a node but not have the node join the cluster.
-#-Dcassandra.join_ring=true|false
-
-# Set to false to clear all gossip state for the node on restart. Use when you have changed node
-# information in cassandra.yaml (such as listen_address).
-#-Dcassandra.load_ring_state=true|false
-
-# Enable pluggable metrics reporter. See Pluggable metrics reporting in Cassandra 2.0.2.
-#-Dcassandra.metricsReporterConfigFile=file
-
-# Set the port on which the CQL native transport listens for clients. (Default: 9042)
-#-Dcassandra.native_transport_port=port
-
-# Overrides the partitioner. (Default: org.apache.cassandra.dht.Murmur3Partitioner)
-#-Dcassandra.partitioner=partitioner
-
-# To replace a node that has died, restart a new node in its place specifying the address of the
-# dead node. The new node must not have any data in its data directory, that is, it must be in the
-# same state as before bootstrapping.
-#-Dcassandra.replace_address=listen_address or broadcast_address of dead node
-
-# Allow restoring specific tables from an archived commit log.
-#-Dcassandra.replayList=table
-
-# Allows overriding of the default RING_DELAY (30000ms), which is the amount of time a node waits
-# before joining the ring.
-#-Dcassandra.ring_delay_ms=ms
-
-# Set the port for the Thrift RPC service, which is used for client connections. (Default: 9160)
-#-Dcassandra.rpc_port=port
-
-# Set the SSL port for encrypted communication. (Default: 7001)
-#-Dcassandra.ssl_storage_port=port
-
-# Enable or disable the native transport server. See start_native_transport in cassandra.yaml.
-# cassandra.start_native_transport=true|false
-
-# Enable or disable the Thrift RPC server. (Default: true)
-#-Dcassandra.start_rpc=true/false
-
-# Set the port for inter-node communication. (Default: 7000)
-#-Dcassandra.storage_port=port
-
-# Set the default location for the trigger JARs. (Default: conf/triggers)
-#-Dcassandra.triggers_dir=directory
-
-# For testing new compaction and compression strategies. It allows you to experiment with different
-# strategies and benchmark write performance differences without affecting the production workload. 
-#-Dcassandra.write_survey=true
-
-# To disable configuration via JMX of auth caches (such as those for credentials, permissions and
-# roles). This will mean those config options can only be set (persistently) in cassandra.yaml
-# and will require a restart for new values to take effect.
-#-Dcassandra.disable_auth_caches_remote_configuration=true
-
-# To disable dynamic calculation of the page size used when indexing an entire partition (during
-# initial index build/rebuild). If set to true, the page size will be fixed to the default of
-# 10000 rows per page.
-#-Dcassandra.force_default_indexing_page_size=true
-
-########################
-# GENERAL JVM SETTINGS #
-########################
-
-# enable assertions. highly suggested for correct application functionality.
--ea
-
-# enable thread priorities, primarily so we can give periodic tasks
-# a lower priority to avoid interfering with client workload
--XX:+UseThreadPriorities
-
-# allows lowering thread priority without being root on linux - probably
-# not necessary on Windows but doesn't harm anything.
-# see http://tech.stolsvik.com/2010/01/linux-java-thread-priorities-workar
--XX:ThreadPriorityPolicy=42
-
-# Enable heap-dump if there's an OOM
--XX:+HeapDumpOnOutOfMemoryError
-
-# Per-thread stack size.
--Xss256k
-
-# Larger interned string table, for gossip's benefit (CASSANDRA-6410)
--XX:StringTableSize=1000003
-
-# Make sure all memory is faulted and zeroed on startup.
-# This helps prevent soft faults in containers and makes
-# transparent hugepage allocation more effective.
--XX:+AlwaysPreTouch
-
-# Disable biased locking as it does not benefit Cassandra.
--XX:-UseBiasedLocking
-
-# Enable thread-local allocation blocks and allow the JVM to automatically
-# resize them at runtime.
--XX:+UseTLAB
--XX:+ResizeTLAB
--XX:+UseNUMA
-
-# http://www.evanjones.ca/jvm-mmap-pause.html
--XX:+PerfDisableSharedMem
-
-# Prefer binding to IPv4 network intefaces (when net.ipv6.bindv6only=1). See
-# http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6342561 (short version:
-# comment out this entry to enable IPv6 support).
--Djava.net.preferIPv4Stack=true
-
-### Debug options
-
-# uncomment to enable flight recorder
-#-XX:+UnlockCommercialFeatures
-#-XX:+FlightRecorder
-
-# uncomment to have Cassandra JVM listen for remote debuggers/profilers on port 1414
-#-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=1414
-
-# uncomment to have Cassandra JVM log internal method compilation (developers only)
-#-XX:+UnlockDiagnosticVMOptions
-#-XX:+LogCompilation
-
-#################
-# HEAP SETTINGS #
-#################
-
-# Heap size is automatically calculated by cassandra-env based on this
-# formula: max(min(1/2 ram, 1024MB), min(1/4 ram, 8GB))
-# That is:
-# - calculate 1/2 ram and cap to 1024MB
-# - calculate 1/4 ram and cap to 8192MB
-# - pick the max
-#
-# For production use you may wish to adjust this for your environment.
-# If that's the case, uncomment the -Xmx and Xms options below to override the
-# automatic calculation of JVM heap memory.
-#
-# It is recommended to set min (-Xms) and max (-Xmx) heap sizes to
-# the same value to avoid stop-the-world GC pauses during resize, and
-# so that we can lock the heap in memory on startup to prevent any
-# of it from being swapped out.
-#-Xms4G
-#-Xmx4G
-
-# Young generation size is automatically calculated by cassandra-env
-# based on this formula: min(100 * num_cores, 1/4 * heap size)
-#
-# The main trade-off for the young generation is that the larger it
-# is, the longer GC pause times will be. The shorter it is, the more
-# expensive GC will be (usually).
-#
-# It is not recommended to set the young generation size if using the
-# G1 GC, since that will override the target pause-time goal.
-# More info: http://www.oracle.com/technetwork/articles/java/g1gc-1984535.html
-#
-# The example below assumes a modern 8-core+ machine for decent
-# times. If in doubt, and if you do not particularly want to tweak, go
-# 100 MB per physical CPU core.
-#-Xmn800M
-
-###################################
-# EXPIRATION DATE OVERFLOW POLICY #
-###################################
-
-# Defines how to handle INSERT requests with TTL exceeding the maximum supported expiration date:
-# * REJECT: this is the default policy and will reject any requests with expiration date timestamp after 2038-01-19T03:14:06+00:00.
-# * CAP: any insert with TTL expiring after 2038-01-19T03:14:06+00:00 will expire on 2038-01-19T03:14:06+00:00 and the client will receive a warning.
-# * CAP_NOWARN: same as previous, except that the client warning will not be emitted.
-#
-#-Dcassandra.expiration_date_overflow_policy=REJECT
-
-#################
-#  GC SETTINGS  #
-#################
-
-### CMS Settings
-
--XX:+UseParNewGC
--XX:+UseConcMarkSweepGC
--XX:+CMSParallelRemarkEnabled
--XX:SurvivorRatio=8
--XX:MaxTenuringThreshold=1
--XX:CMSInitiatingOccupancyFraction=75
--XX:+UseCMSInitiatingOccupancyOnly
--XX:CMSWaitDuration=10000
--XX:+CMSParallelInitialMarkEnabled
--XX:+CMSEdenChunksRecordAlways
-# some JVMs will fill up their heap when accessed via JMX, see CASSANDRA-6541
--XX:+CMSClassUnloadingEnabled
-
-### G1 Settings (experimental, comment previous section and uncomment section below to enable)
-
-## Use the Hotspot garbage-first collector.
-#-XX:+UseG1GC
-#
-## Have the JVM do less remembered set work during STW, instead
-## preferring concurrent GC. Reduces p99.9 latency.
-#-XX:G1RSetUpdatingPauseTimePercent=5
-#
-## Main G1GC tunable: lowering the pause target will lower throughput and vise versa.
-## 200ms is the JVM default and lowest viable setting
-## 1000ms increases throughput. Keep it smaller than the timeouts in cassandra.yaml.
-#-XX:MaxGCPauseMillis=500
-
-## Optional G1 Settings
-
-# Save CPU time on large (>= 16GB) heaps by delaying region scanning
-# until the heap is 70% full. The default in Hotspot 8u40 is 40%.
-#-XX:InitiatingHeapOccupancyPercent=70
-
-# For systems with > 8 cores, the default ParallelGCThreads is 5/8 the number of logical cores.
-# Otherwise equal to the number of cores when 8 or less.
-# Machines with > 10 cores should try setting these to <= full cores.
-#-XX:ParallelGCThreads=16
-# By default, ConcGCThreads is 1/4 of ParallelGCThreads.
-# Setting both to the same value can reduce STW durations.
-#-XX:ConcGCThreads=16
-
-### GC logging options -- uncomment to enable
-
--XX:+PrintGCDetails
--XX:+PrintGCDateStamps
--XX:+PrintHeapAtGC
--XX:+PrintTenuringDistribution
--XX:+PrintGCApplicationStoppedTime
--XX:+PrintPromotionFailure
-#-XX:PrintFLSStatistics=1
-#-Xloggc:/var/log/cassandra/gc.log
--XX:+UseGCLogFileRotation
--XX:NumberOfGCLogFiles=10
--XX:GCLogFileSize=10M
diff --git a/site-modules/profile/manifests/cassandra.pp b/site-modules/profile/manifests/cassandra.pp
new file mode 100644
index 00000000..406fca7a
--- /dev/null
+++ b/site-modules/profile/manifests/cassandra.pp
@@ -0,0 +1,132 @@
+# Install the base components of a 4.x cassandra server
+# Configure all the instances declared in cassandra::instances property
+#
+# Look at profile::cassandra:node for more information about the
+# instance(s) configuration
+class profile::cassandra {
+
+  include profile::prometheus::jmx
+  $jmx_exporter_version = lookup('prometheus::jmx::version')
+
+  $cassandra_user = 'cassandra'
+  $cassandra_group = 'cassandra'
+  $cassandra_home = '/home/cassandra'
+
+  $cassandra_version = lookup('cassandra::version')
+  $cassandra_archive_name = "apache-cassandra-${cassandra_version}-bin.tar.gz"
+  $cassandra_bin_url = "https://dlcdn.apache.org/cassandra/${cassandra_version}/${cassandra_archive_name}"
+  $cassandra_bin_checksum_type = 'sha512'
+  $cassandra_bin_checksum = '188e131392ea0e48b46f24b1be297ef6335197f4480c9421328006507e069dce659ce3ce473906398273a5926e331960cbf824362e40cb4c74670cde95458349'
+
+  $systemd_service = 'cassandra@.service'
+
+  $download_path = "/opt/${cassandra_archive_name}"
+
+  $cassandra_install_directory = "/opt/cassandra-${cassandra_version}"
+
+  $cassandra_base_data_directory = lookup('cassandra::base_data_directory')
+  $cassandra_config_directory = lookup('cassandra::base_config_directory')
+  $cassandra_log_directory = lookup('cassandra::base_log_directory')
+
+  $cassandra_nodes = lookup('cassandra::nodes')
+  $node_definition = $cassandra_nodes["$::fqdn"]
+  $instances = $node_definition['instances']
+
+  $default_instance_config = lookup('cassandra::default_instance_configuration')
+  $clusters_config = lookup('cassandra::clusters')
+
+  group {$cassandra_group:
+    system => true,
+  }
+
+  user {$cassandra_user:
+    system => true,
+    gid    => $cassandra_group,
+    shell  => '/usr/sbin/nologin',
+    home   => $cassandra_home,
+  }
+
+  file { [
+      $cassandra_install_directory,
+      $cassandra_config_directory,
+    ]:
+    ensure => directory,
+    owner  => 'root',
+    group  => 'root',
+    mode   => '0755'
+  }
+
+  $config_files_to_copy = [
+    'jvm11-clients.options',
+    'jvm-clients.options',
+    'logback-tools.xml',
+  ]
+
+  $config_files_to_copy.each | $file_name | {
+    file { "${cassandra_config_directory}/${file_name}":
+      ensure  => present,
+      owner   => 'root',
+      group   => 'root',
+      mode    => '0644',
+      source  => "/opt/cassandra/conf/${file_name}",
+      require => [File[$cassandra_config_directory]],
+    }
+  }
+
+  file { [
+      $cassandra_base_data_directory,
+      $cassandra_log_directory,
+    ]:
+    ensure => directory,
+    owner  => $cassandra_user,
+    group  => $cassandra_group,
+    mode   => '0750'
+  }
+
+  ensure_packages(['openjdk-11-jdk', 'libnetty-java'])
+
+  archive { 'cassandra':
+    path            => $download_path,
+    extract         => true,
+    extract_command => 'tar xzf %s --strip-components=1 --no-same-owner --no-same-permissions',
+    source          => $cassandra_bin_url,
+    extract_path    => $cassandra_install_directory,
+    checksum_type   => $cassandra_bin_checksum_type,
+    checksum        => $cassandra_bin_checksum,
+    creates         => "${cassandra_install_directory}/bin/cassandra",
+    cleanup         => true,
+    user            => 'root',
+    group           => 'root',
+    require         => File[$cassandra_install_directory],
+  }
+  -> file {'/opt/cassandra':
+    ensure => link,
+    force  => true,
+    target => $cassandra_install_directory
+  }
+
+  ::systemd::unit_file {$systemd_service:
+      ensure  => present,
+      content => template('profile/cassandra/cassandra.service.erb'),
+  }
+
+  file {"${cassandra_config_directory}/jmx_exporter.yml":
+    ensure => present,
+    owner  => 'root',
+    group  => 'root',
+    mode   => '0644',
+    source => "https://raw.githubusercontent.com/prometheus/jmx_exporter/parent-${jmx_exporter_version}/example_configs/cassandra.yml",
+  }
+
+  $instances.each | $instance_name, $instance_config | {
+    $merged_instance_config = $default_instance_config + $instance_config
+    $cluster_config = $clusters_config[$merged_instance_config["cluster_name"]]
+    $merged_config = $cluster_config + $merged_instance_config
+
+    profile::cassandra::instance{$instance_name:
+      config => $merged_config
+    }
+  }
+
+}
+
diff --git a/site-modules/profile/manifests/cassandra/instance.pp b/site-modules/profile/manifests/cassandra/instance.pp
new file mode 100644
index 00000000..56027ad2
--- /dev/null
+++ b/site-modules/profile/manifests/cassandra/instance.pp
@@ -0,0 +1,112 @@
+# Configure a cassandra node on a server
+# Several nodes can coexist on a same server
+#
+# It supposes the profile::cassandra class
+# was installed before this
+define profile::cassandra::instance (
+  $instance_name = $name,
+  $config = {}
+) {
+
+  $service_name = "cassandra@${instance_name}.service"
+
+  $listen_network = lookup('internal_network')
+  $listen_address = ip_for_network($listen_network)
+
+  $cassandra_base_data_dir = lookup('cassandra::base_data_directory')
+  $instance_base_data_dir = "${cassandra_base_data_dir}/${instance_name}"
+  $cassandra_config_dir = lookup('cassandra::base_config_directory')
+  $cassandra_log_dir = lookup('cassandra::base_log_directory')
+
+  $base_data_dir = "${instance_base_data_dir}/data"
+  $commitlog_dir = "${instance_base_data_dir}/commitlog"
+
+  $data_dir = "${base_data_dir}/data"
+  $hints_dir = "${data_dir}/hints"
+  $saved_caches_dir = "${data_dir}/saved_caches"
+
+  $config_dir = "${cassandra_config_dir}/${instance_name}"
+  $log_dir = "${cassandra_log_dir}/${instance_name}"
+
+  $jmx_exporter_path = $::profile::prometheus::jmx::jar_path
+
+  $base_configuration = lookup('cassandra::base_instance_configuration')
+  $instance_configuration = {
+    cluster_name           => $config["cluster_name"],
+    data_file_directories  => [ $base_data_dir, ],
+    commitlog_directory    => $commitlog_dir,
+    hints_directory        => $hints_dir,
+    saved_caches_directory => $saved_caches_dir,
+    listen_address         => $listen_address,
+    native_transport_port  => $config['native_transport_port'],
+    storage_port           => $config['storage_port'],
+    seed_provider          => $config['seed_provider']
+  }
+
+  $computed_configuration = $base_configuration + $instance_configuration
+
+  file {[
+      $instance_base_data_dir,
+      $base_data_dir,
+      # $commitlog_dir,
+      $config_dir,
+      $log_dir,
+    ] :
+    ensure  => directory,
+    owner   => $::profile::cassandra::cassandra_user,
+    group   => $::profile::cassandra::cassandra_group,
+    require => [
+      # File[$::profile::cassandra::cassandra_base_data_directory],
+      # File[$::profile::cassandra::cassandra_config_directory],
+      # File[$::profile::cassandra::cassandra_log_directory],
+    ]
+  }
+
+  ::systemd::dropin_file { "${service_name}.d/parameters.conf":
+    ensure   => present,
+    unit     => "cassandra@${instance_name}.service",
+    filename => 'parameters.conf',
+    content  => template('profile/cassandra/instance-parameters.conf.erb'),
+  }
+
+  service {$service_name:
+    enable => true,
+  }
+
+  $config_files_to_copy = [
+    'jvm11-server.options',
+    'jvm-server.options',
+    'logback.xml',
+    'cassandra-env.sh',
+  ]
+
+  $config_files_to_copy.each | $file_name | {
+    file { "${config_dir}/${file_name}":
+      ensure  => present,
+      owner   => 'root',
+      group   => 'root',
+      mode    => '0644',
+      source  => "/opt/cassandra/conf/${file_name}",
+      require => [File[$config_dir]],
+    }
+  }
+
+  file { "${config_dir}/cassandra.yaml":
+    ensure  => present,
+    owner   => 'root',
+    group   => 'root',
+    mode    => '0644',
+    content => inline_yaml($computed_configuration),
+    require => [File[$config_dir]],
+  }
+
+  file { "${config_dir}/cassandra-rackdc.properties":
+    ensure  => present,
+    owner   => 'root',
+    group   => 'root',
+    mode    => '0644',
+    content => template('profile/cassandra/cassandra-rackdc.properties.erb'),
+    require => [File[$config_dir]],
+  }
+
+}
diff --git a/site-modules/profile/manifests/cassandra/node.pp b/site-modules/profile/manifests/cassandra/node.pp
deleted file mode 100644
index 05ac5fcd..00000000
--- a/site-modules/profile/manifests/cassandra/node.pp
+++ /dev/null
@@ -1,120 +0,0 @@
-# Definition of a cassandra node
-class profile::cassandra::node {
-  include profile::cassandra::apt_config
-
-  $basedir = '/srv/cassandra'
-  $commitlogdir = "${basedir}/commitlog"
-  $datadir = "${basedir}/data"
-  $hintsdir = "${basedir}/hints"
-
-  file {$basedir:
-    ensure => 'directory',
-    owner  => 'cassandra',
-    group  => 'cassandra',
-  }
-
-  $baseline_settings = lookup('cassandra::baseline_settings')
-
-  $cluster = lookup('cassandra::cluster')
-  $cluster_settings = lookup('cassandra::clusters', Hash)[$cluster]
-
-  $listen_network = lookup('cassandra::listen_network', Optional[String], 'first', undef)
-  $listen_address = lookup('cassandra::listen_address', Optional[String], 'first', undef)
-  $actual_listen_address = pick($listen_address, ip_for_network($listen_network))
-
-  $listen_settings = {
-    listen_address => $actual_listen_address,
-    rpc_address    => $actual_listen_address
-  }
-
-  $exporter_version = lookup('cassandra::exporter::version')
-  $exporter_filename = "cassandra-exporter-agent-${exporter_version}.jar"
-  $exporter_url = "https://github.com/instaclustr/cassandra-exporter/releases/download/v${exporter_version}/${exporter_filename}"
-
-  $exporter_base_directory = '/opt/prometheus-cassandra-exporter'
-  $exporter_path = "${exporter_base_directory}/${exporter_filename}"
-  $exporter_config = "/etc/cassandra/cassandra-exporter.options"
-
-  file {$exporter_base_directory:
-    ensure => 'directory',
-    mode   => '0644',
-    owner  => 'root',
-    group  => 'root',
-  }
-
-  # Use wget to work around https://tickets.puppetlabs.com/browse/PUP-6380
-  exec {'download-cassandra-exporter':
-    command => "wget --quiet ${exporter_url} -O ${exporter_path}",
-    path    => ['/sbin', '/usr/sbin', '/bin', '/usr/bin'],
-    creates => $exporter_path,
-    require => File[$exporter_base_directory],
-  }
-
-  $exporter_network = lookup('cassandra::exporter::listen_network', Optional[String], 'first', undef)
-  $exporter_address = lookup('cassandra::exporter::listen_address', Optional[String], 'first', undef)
-  $actual_exporter_address = pick($exporter_address, ip_for_network($exporter_network))
-
-  $exporter_port = lookup('cassandra::exporter::listen_port')
-
-  $exporter_target = "${actual_exporter_address}:${exporter_port}"
-
-  file {$exporter_config:
-    ensure => 'present',
-    owner  => 'root',
-    group  => 'root',
-    content => template('profile/cassandra/cassandra-exporter.options.erb'),
-    notify => Service['cassandra'],
-  }
-
-  ::systemd::unit_file {'cassandra.service':
-    content => template('profile/cassandra/cassandra.service.erb'),
-    notify  => Service['cassandra'],
-    require => [
-      Exec['download-cassandra-exporter'],
-      File[$exporter_config],
-    ],
-  }
-
-  ::profile::prometheus::export_scrape_config {'cassandra':
-    target => $exporter_target,
-    labels => {
-      cluster => $cluster,
-    }
-  }
-
-  package {'openjdk-8-jre-headless':
-    ensure => 'installed',
-  }
-  -> class {'::cassandra':
-    baseline_settings     => $baseline_settings,
-    commitlog_directory   => $commitlogdir,
-    data_file_directories => [$datadir],
-    hints_directory       => $hintsdir,
-    settings              => $cluster_settings + $listen_settings,
-  }
-
-  file {'/etc/cassandra/jvm.options':
-    ensure  => 'present',
-    owner   => 'root',
-    group   => 'root',
-    mode    => '0644',
-    source  => 'puppet:///modules/profile/cassandra/jvm.options',
-    require => Package['cassandra'],
-    notify  => Service['cassandra'],
-  }
-
-  file {'/etc/udev/rules.d/99-cassandra.rules':
-    ensure  => 'present',
-    owner   => 'root',
-    group   => 'root',
-    mode    => '0644',
-    source  => 'puppet:///modules/profile/cassandra/99-cassandra.rules',
-    notify  => Exec['cassandra-reload-udev-rules'],
-  }
-
-  exec {'cassandra-reload-udev-rules':
-    command     => 'udevadm control --reload-rules',
-    refreshonly => true,
-    path        => ['/usr/local/sbin', '/usr/local/bin', '/usr/sbin', '/usr/bin', '/sbin', '/bin'],
-  }
-}
diff --git a/site-modules/profile/templates/cassandra/cassandra-rackdc.properties.erb b/site-modules/profile/templates/cassandra/cassandra-rackdc.properties.erb
new file mode 100644
index 00000000..2578c65c
--- /dev/null
+++ b/site-modules/profile/templates/cassandra/cassandra-rackdc.properties.erb
@@ -0,0 +1,2 @@
+dc=<%= @config["datacenter"] %>
+rack=<%= @config["rack"] %>
diff --git a/site-modules/profile/templates/cassandra/cassandra.service.erb b/site-modules/profile/templates/cassandra/cassandra.service.erb
index 90f2365c..88c1c7be 100644
--- a/site-modules/profile/templates/cassandra/cassandra.service.erb
+++ b/site-modules/profile/templates/cassandra/cassandra.service.erb
@@ -1,22 +1,20 @@
-# Managed by puppet (class profile::cassandra::node); Changes will be lost.
+# Managed by puppet (class profile::cassandra); Changes will be lost.
 
 [Unit]
 Description=Cassandra
 After=network.target
 
 [Service]
-User=cassandra
-Group=cassandra
-Environment=JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
-Environment=JVM_EXTRA_OPTS=-javaagent:<%= @exporter_path %>=@<%= @exporter_config %>
-EnvironmentFile=-/etc/default/cassandra
-ExecStart=/usr/sbin/cassandra -f
+User=<%= @cassandra_user %>
+Group=<%= @cassandra_group %>
+
+ExecStart=/opt/cassandra/bin/cassandra -f
 LimitNOFILE=100000
 LimitMEMLOCK=infinity
 LimitNPROC=32768
 LimitAS=infinity
 Restart=on-failure
 SuccessExitStatus=143
 
 [Install]
 WantedBy=multi-user.target
diff --git a/site-modules/profile/templates/cassandra/cassandra.yaml.erb b/site-modules/profile/templates/cassandra/cassandra.yaml.erb
new file mode 100644
index 00000000..eb60b5d6
--- /dev/null
+++ b/site-modules/profile/templates/cassandra/cassandra.yaml.erb
@@ -0,0 +1,76 @@
+cluster_name: <%= @config["cluster_name"] %>
+num_tokens: 16
+allocate_tokens_for_local_replication_factor: 3
+data_file_directories:
+  - <%= @base_data_dir %>
+
+commitlog_directory: <%= @commitlog_dir %>
+
+hints_directory: <%= @hints_dir %>
+
+saved_caches_directory: <%= @saved_caches_dir %>
+
+# local_system_data_file_directory: {{ cassandra_data_dir_system }}
+
+disk_optimization_strategy: ssd
+
+listen_address: <%= @listen_address %>
+native_transport_port: <%= @config["native_transport_port"] %>
+storage_port: <%= @config["storage_port"] %>
+
+concurrent_compactors: 4 # should be min(nb core, nb disks)
+
+internode_compression: dc # default dc possible all|dc|none
+
+concurrent_reads: 64 # 16 x number of drives
+concurrent_writes: 128 # 8 x number of cores
+concurrent_counter_writes: 48
+
+commitlog_sync: periodic # default periodic
+commitlog_sync_period_in_ms: 10000 # default 10000
+commitlog_total_space_in_mb: 16384 # default 8192
+commitlog_segment_size_in_mb: 256 # default 32 (due to oversize mutation on revision table)
+
+partitioner: org.apache.cassandra.dht.Murmur3Partitioner
+endpoint_snitch: GossipingPropertyFileSnitch
+
+seed_provider: <%= @config["seed_provider"].to_yaml().delete_prefix("---") %>
+
+# needed by swh-storage
+enable_user_defined_functions: true
+
+# TODO Test this options effects
+# disk_failure_policy:
+# cdc_enabled
+#end
+
+# Trying to reduce cassandra_compaction_pendingtasks
+compaction_throughput_mb_per_sec: 160
+
+# https://forge.softwareheritage.org/source/cassandra-replayer-deployment/browse/master/playbooks/templates/cassandra.yaml$854
+# How long the coordinator should wait for read operations to complete.
+# Lowest acceptable value is 10 ms.
+read_request_timeout_in_ms: 5000
+# How long the coordinator should wait for seq or index scans to complete.
+# Lowest acceptable value is 10 ms.
+range_request_timeout_in_ms: 10000
+# How long the coordinator should wait for writes to complete.
+# Lowest acceptable value is 10 ms.
+write_request_timeout_in_ms: 2000
+# How long the coordinator should wait for counter writes to complete.
+# Lowest acceptable value is 10 ms.
+counter_write_request_timeout_in_ms: 5000
+# How long a coordinator should continue to retry a CAS operation
+# that contends with other proposals for the same row.
+# Lowest acceptable value is 10 ms.
+cas_contention_timeout_in_ms: 1000
+# How long the coordinator should wait for truncates to complete
+# (This can be much longer, because unless auto_snapshot is disabled
+# we need to flush first so we can snapshot before removing the data.)
+# Lowest acceptable value is 10 ms.
+truncate_request_timeout_in_ms: 60000
+# The default timeout for other, miscellaneous operations.
+# Lowest acceptable value is 10 ms.
+request_timeout_in_ms: 10000
+
+slow_query_log_timeout_in_ms: 1000
diff --git a/site-modules/profile/templates/cassandra/instance-parameters.conf.erb b/site-modules/profile/templates/cassandra/instance-parameters.conf.erb
new file mode 100644
index 00000000..3b41df23
--- /dev/null
+++ b/site-modules/profile/templates/cassandra/instance-parameters.conf.erb
@@ -0,0 +1,13 @@
+# Managed by puppet (class profile::cassandra::node); Changes will be lost.
+
+[Unit]
+Description=Cassandra <%= @instance_name %> instance
+After=network.target
+
+[Service]
+Environment=JVM_EXTRA_OPTS="-javaagent:<%= @jmx_exporter_path %>=<%= @config["jmx_exporter_port"] %>:/etc/cassandra/jmx_exporter.yml -Dcassandra.jmx.local.port=<%= @config["jmx_port"] %> -Dcom.sun.management.jmxremote.authenticate=false"
+Environment=CASSANDRA_CONF=<%= @config_dir %>
+Environment=CASSANDRA_LOG_DIR=<%= @log_dir %>
+
+[Install]
+WantedBy=multi-user.target
diff --git a/site-modules/role/manifests/swh_cassandra_node.pp b/site-modules/role/manifests/swh_cassandra_node.pp
index 1c62cc18..4e564504 100644
--- a/site-modules/role/manifests/swh_cassandra_node.pp
+++ b/site-modules/role/manifests/swh_cassandra_node.pp
@@ -1,5 +1,6 @@
 # Deployment of a cassandra node
 class role::swh_cassandra_node inherits role::swh_base {
   # include profile::cassandra::node
+  include profile::cassandra
   include profile::docker
 }