Defined Type: systemd::monitor

Defined in:
modules/systemd/manifests/monitor.pp

Overview

Parameters:

  • notes_url (Stdlib::HTTPUrl)

    the url to the wiki tech page used to debug a failed unit

  • ensure (Wmflib::Ensure) (defaults to: 'present')
  • check_interval (Integer[1]) (defaults to: 10)

    how often, in seconds, to check the unit

  • retries (Integer[1]) (defaults to: 2)

    how many time to retry the check before going critical

  • contact_group (String) (defaults to: 'admin')

    the monitoring contact group to alert

  • critical (Boolean) (defaults to: false)

    if true, this will be a paging alert



8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# File 'modules/systemd/manifests/monitor.pp', line 8

define systemd::monitor (
    Stdlib::HTTPUrl $notes_url,
    Wmflib::Ensure  $ensure         = 'present',
    Integer[1]      $check_interval = 10,
    Integer[1]      $retries        = 2,
    String          $contact_group  = 'admin',
    Boolean         $critical       = false,
) {
    # T225268 - always provision NRPE plugin script
    ensure_resource('file', '/usr/local/lib/nagios/plugins/check_systemd_unit_status', {
        ensure => present,
        source => 'puppet:///modules/systemd/check_systemd_unit_status',
        mode   => '0555',
        owner  => 'root',
        group  => 'root',
    })

    nrpe::monitor_service { "check_${title}_status":
        ensure         => $ensure,
        description    => "Check unit status of ${title}",
        nrpe_command   => "/usr/local/lib/nagios/plugins/check_systemd_unit_status ${title}",
        check_interval => $check_interval,
        retries        => $retries,
        contact_group  => $contact_group,
        notes_url      => $notes_url,
        critical       => $critical,
    }
}