diff --git a/site-modules/profile/manifests/swh/deploy/worker/indexer_content_mimetype.pp b/site-modules/profile/manifests/swh/deploy/worker/indexer_content_mimetype.pp index c1b77e9e..1832abac 100644 --- a/site-modules/profile/manifests/swh/deploy/worker/indexer_content_mimetype.pp +++ b/site-modules/profile/manifests/swh/deploy/worker/indexer_content_mimetype.pp @@ -1,13 +1,14 @@ # Deployment for swh-indexer-mimetype class profile::swh::deploy::worker::indexer_content_mimetype { include ::profile::swh::deploy::indexer Package[$::profile::swh::deploy::base_indexer::packages] ~> ::profile::swh::deploy::worker::instance {'indexer_content_mimetype': - ensure => present, - sentry_name => 'indexer', - require => [ + ensure => present, + sentry_name => 'indexer', + send_task_events => true, + require => [ Class['profile::swh::deploy::indexer'] ], } } diff --git a/site-modules/profile/manifests/swh/deploy/worker/indexer_fossology_license.pp b/site-modules/profile/manifests/swh/deploy/worker/indexer_fossology_license.pp index 3e4caf2e..43c56577 100644 --- a/site-modules/profile/manifests/swh/deploy/worker/indexer_fossology_license.pp +++ b/site-modules/profile/manifests/swh/deploy/worker/indexer_fossology_license.pp @@ -1,20 +1,21 @@ # Deployment for swh-indexer-fossology-license class profile::swh::deploy::worker::indexer_fossology_license { include ::profile::swh::deploy::indexer $packages = ['fossology-nomossa'] package {$packages: ensure => 'present', require => Apt::Source['softwareheritage'], } Package[$::profile::swh::deploy::base_indexer::packages] ~> ::profile::swh::deploy::worker::instance {'indexer_fossology_license': - ensure => present, - sentry_name => 'indexer', - require => [ + ensure => present, + sentry_name => 'indexer', + send_task_events => true, + require => [ Class['profile::swh::deploy::indexer'], Package[$packages], ], } } diff --git a/site-modules/profile/manifests/swh/deploy/worker/indexer_origin_intrinsic_metadata.pp b/site-modules/profile/manifests/swh/deploy/worker/indexer_origin_intrinsic_metadata.pp index 93e3004d..81406042 100644 --- a/site-modules/profile/manifests/swh/deploy/worker/indexer_origin_intrinsic_metadata.pp +++ b/site-modules/profile/manifests/swh/deploy/worker/indexer_origin_intrinsic_metadata.pp @@ -1,13 +1,14 @@ # Deployment for swh-indexer-origin-intrinsic-metadata class profile::swh::deploy::worker::indexer_origin_intrinsic_metadata { include ::profile::swh::deploy::indexer Package[$::profile::swh::deploy::base_indexer::packages] ~> ::profile::swh::deploy::worker::instance {'indexer_origin_intrinsic_metadata': - ensure => present, - sentry_name => 'indexer', - require => [ + ensure => present, + sentry_name => 'indexer', + send_task_events => true, + require => [ Class['profile::swh::deploy::indexer'], ], } } diff --git a/site-modules/profile/manifests/swh/deploy/worker/instance.pp b/site-modules/profile/manifests/swh/deploy/worker/instance.pp index 1c3bdd5d..695fa6cf 100644 --- a/site-modules/profile/manifests/swh/deploy/worker/instance.pp +++ b/site-modules/profile/manifests/swh/deploy/worker/instance.pp @@ -1,84 +1,99 @@ # Instance of a worker + +# @param send_task_events +# True for workers whose tasks are still scheduled with scheduler-runner (they need to +# report their status to the listener which updates the scheduler accordingly). +# False, the default, for other workers whose recurring tasks are scheduled with +# next-gen scheduler-runner. Their status are updated through a journal client so no +# need for the events. define profile::swh::deploy::worker::instance ( $ensure = present, $instance_name = $title, $sentry_name = $title, $limit_no_file = undef, $private_tmp = undef, $merge_policy = 'deep', + $send_task_events = false, ) { include ::profile::swh::deploy::worker::base + # Parametrize the celery worker to actually send task events if required + if $send_task_events { + $celery_worker_extra_args = "--events" + } else { + $celery_worker_extra_args = "" + } + $service_basename = "swh-worker@${instance_name}" $service_name = "${service_basename}.service" $concurrency = lookup("swh::deploy::worker::${instance_name}::concurrency") $max_tasks_per_child = lookup("swh::deploy::worker::${instance_name}::max_tasks_per_child", Integer, first, 5) $loglevel = lookup("swh::deploy::worker::${instance_name}::loglevel") $config_file = lookup("swh::deploy::worker::${instance_name}::config_file") $config = lookup("swh::deploy::worker::${instance_name}::config", Hash, $merge_policy) $sentry_dsn = lookup("swh::deploy::${sentry_name}::sentry_dsn", Optional[String], 'first', undef) $sentry_environment = lookup("swh::deploy::${sentry_name}::sentry_environment", Optional[String], 'first', undef) $sentry_swh_package = lookup("swh::deploy::${sentry_name}::sentry_swh_package", Optional[String], 'first', undef) $celery_hostname = $::profile::swh::deploy::worker::base::celery_hostname case $ensure { 'present', 'running': { # Uses variables # - $concurrency # - $loglevel # - $max_tasks_per_child # - $celery_hostname # - $sentry_{dsn,environment,swh_package} ::systemd::dropin_file {"${service_basename}/parameters.conf": ensure => present, unit => $service_name, filename => 'parameters.conf', content => template('profile/swh/deploy/worker/parameters.conf.erb'), } file {$config_file: ensure => 'present', owner => 'swhworker', group => 'swhworker', mode => '0640', content => inline_template("<%= @config.to_yaml %>\n"), } if $ensure == 'running' { $service_ensure = 'running' } else { $service_ensure = undef } service {$service_basename: ensure => $service_ensure, enable => true, require => [ File[$config_file], ] } profile::cron::d {"swh-worker-${instance_name}-autorestart": command => "chronic /usr/local/sbin/swh-worker-ping-restart ${instance_name}@${celery_hostname} ${instance_name}", target => 'swh-worker', minute => 'fqdn_rand/15', require => File['/usr/local/sbin/swh-worker-ping-restart'], } } default: { ::systemd::dropin_file {"${service_basename}/parameters.conf": ensure => absent, unit => $service_name, filename => 'parameters.conf', } file {$config_file: ensure => absent, } } } } diff --git a/site-modules/profile/manifests/swh/deploy/worker/lister.pp b/site-modules/profile/manifests/swh/deploy/worker/lister.pp index 62e94ff5..25435903 100644 --- a/site-modules/profile/manifests/swh/deploy/worker/lister.pp +++ b/site-modules/profile/manifests/swh/deploy/worker/lister.pp @@ -1,17 +1,18 @@ # Deployment for swh-lister class profile::swh::deploy::worker::lister { $packages = ['python3-swh.lister', 'r-base-core', 'r-cran-jsonlite'] package {$packages: ensure => present, } ::profile::swh::deploy::worker::instance {'lister': - ensure => present, - require => [ + ensure => present, + send_task_events => true, + require => [ Package['python3-swh.lister'], ], - merge_policy => 'first', # do not merge configuration, take the first - # encountered configuration + merge_policy => 'first', # do not merge configuration, take the first + # encountered configuration } } diff --git a/site-modules/profile/manifests/swh/deploy/worker/loader_high_priority.pp b/site-modules/profile/manifests/swh/deploy/worker/loader_high_priority.pp index cca9f1d8..6e87722c 100644 --- a/site-modules/profile/manifests/swh/deploy/worker/loader_high_priority.pp +++ b/site-modules/profile/manifests/swh/deploy/worker/loader_high_priority.pp @@ -1,14 +1,15 @@ # Deployment for high priority loader class profile::swh::deploy::worker::loader_high_priority { include ::profile::swh::deploy::base_loader_git include ::profile::swh::deploy::base_loader_mercurial include ::profile::swh::deploy::base_loader_svn $packages = $::profile::swh::deploy::base_loader_git::packages + $::profile::swh::deploy::base_loader_mercurial::packages + $::profile::swh::deploy::base_loader_svn::packages ::profile::swh::deploy::worker::instance {'loader_high_priority': - ensure => present, - require => Package[$packages], + ensure => present, + send_task_events => true, + require => Package[$packages], } } diff --git a/site-modules/profile/manifests/swh/deploy/worker/vault_cooker.pp b/site-modules/profile/manifests/swh/deploy/worker/vault_cooker.pp index 6ab94214..6a7aa92f 100644 --- a/site-modules/profile/manifests/swh/deploy/worker/vault_cooker.pp +++ b/site-modules/profile/manifests/swh/deploy/worker/vault_cooker.pp @@ -1,25 +1,26 @@ # Deployment of a vault cooker class profile::swh::deploy::worker::vault_cooker { include ::profile::swh::deploy::base_vault $instance_name = 'vault_cooker' $config = lookup("swh::deploy::worker::${instance_name}::config", Hash, 'deep') if $config['graph'] { $extra_packages = [ "python3-swh.graph.client", ] package {$extra_packages: ensure => 'present', } } ::profile::swh::deploy::worker::instance {$instance_name: - ensure => present, - sentry_name => 'vault', - require => [ + ensure => present, + sentry_name => 'vault', + send_task_events => true, + require => [ Package[$extra_packages], Package[$::profile::swh::deploy::base_vault::packages], ], } } diff --git a/site-modules/profile/templates/swh/deploy/worker/parameters.conf.erb b/site-modules/profile/templates/swh/deploy/worker/parameters.conf.erb index 9686f2d8..1b45d870 100644 --- a/site-modules/profile/templates/swh/deploy/worker/parameters.conf.erb +++ b/site-modules/profile/templates/swh/deploy/worker/parameters.conf.erb @@ -1,22 +1,25 @@ # Managed by puppet - modifications will be overwritten # In defined class profile::swh::deploy::worker::instance [Service] Environment=CONCURRENCY=<%= @concurrency %> Environment=MAX_TASKS_PER_CHILD=<%= @max_tasks_per_child %> Environment=LOGLEVEL=<%= @loglevel %> <%- if !@sentry_dsn.nil? and !@sentry_dsn.empty? -%> Environment=SWH_SENTRY_DSN=<%= @sentry_dsn %> <%- end -%> <%- if !@sentry_environment.nil? and !@sentry_environment.empty? -%> Environment=SWH_SENTRY_ENVIRONMENT=<%= @sentry_environment %> <%- end -%> <%- if !@sentry_swh_package.nil? and !@sentry_swh_package.empty? -%> Environment=SWH_MAIN_PACKAGE=<%= @sentry_swh_package %> <%- end -%> <% if @limit_no_file %> LimitNOFILE=<%= @limit_no_file %> <% end %> <% if @private_tmp %> PrivateTmp=<%= @private_tmp %> <% end %> +<%- if !@celery_worker_extra_args.nil? and !@celery_worker_extra_args.empty? -%> +Environment=CELERY_WORKER_EXTRA_ARGS=<%= @celery_worker_extra_args %> +<%- end -%> diff --git a/site-modules/profile/templates/swh/deploy/worker/swh-worker@.service.erb b/site-modules/profile/templates/swh/deploy/worker/swh-worker@.service.erb index ef0dde59..66c7b9bf 100644 --- a/site-modules/profile/templates/swh/deploy/worker/swh-worker@.service.erb +++ b/site-modules/profile/templates/swh/deploy/worker/swh-worker@.service.erb @@ -1,28 +1,34 @@ +# Managed by puppet - modifications will be overwritten +# In defined class profile::swh::deploy::worker::base + [Unit] Description=Software Heritage Worker (%i) After=network.target [Service] User=swhworker Group=swhworker Type=simple +# Following environment variables can be overriden in the respective +# swh-worker@<service>.service.d/parameters.conf Environment=SWH_CONFIG_FILENAME=/etc/softwareheritage/%i.yml Environment=SWH_LOG_TARGET=journal Environment=CONCURRENCY=10 Environment=MAX_TASKS_PER_CHILD=5 Environment=LOGLEVEL=info Environment=CELERY_HOSTNAME=<%= @celery_hostname %> -ExecStart=/usr/bin/python3 -m celery worker -n %i@${CELERY_HOSTNAME} --app=swh.scheduler.celery_backend.config.app --pool=prefork --events --concurrency=${CONCURRENCY} --maxtasksperchild=${MAX_TASKS_PER_CHILD} -Ofair --loglevel=${LOGLEVEL} --without-gossip --without-mingle --without-heartbeat +Environment=CELERY_WORKER_EXTRA_ARGS= +ExecStart=/usr/bin/python3 -m celery worker -n %i@${CELERY_HOSTNAME} --app=swh.scheduler.celery_backend.config.app --pool=prefork ${CELERY_WORKER_EXTRA_ARGS} --concurrency=${CONCURRENCY} --maxtasksperchild=${MAX_TASKS_PER_CHILD} -Ofair --loglevel=${LOGLEVEL} --without-gossip --without-mingle --without-heartbeat KillMode=process KillSignal=SIGTERM TimeoutStopSec=15m OOMPolicy=kill Restart=always RestartSec=10 [Install] WantedBy=multi-user.target