Puppet Class: pybal::monitoring

Defined in:
modules/pybal/manifests/monitoring.pp

Overview

Parameters:

  • config_host (String)
  • config_source (Enum['etcd', 'http'])
  • etcd_port (Wmflib::UserIpPort)
  • services (Hash[String, Wmflib::Service])


4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# File 'modules/pybal/manifests/monitoring.pp', line 4

class pybal::monitoring(
    String $config_host,
    Enum['etcd', 'http'] $config_source,
    Wmflib::UserIpPort $etcd_port,
    Hash[String, Wmflib::Service] $services,
) {
    require ::pybal::configuration
    require_package([
        'libmonitoring-plugin-perl',
        'libwww-perl',
        'python-prometheus-client',
        'python-requests',
    ])

    file { '/usr/local/lib/nagios/plugins/check_pybal':
        ensure => present,
        source => 'puppet:///modules/pybal/check_pybal',
        owner  => 'root',
        group  => 'root',
        mode   => '0555',
    }

    nrpe::monitor_service { 'pybal_backends':
        description  => 'PyBal backends health check',
        nrpe_command => '/usr/local/lib/nagios/plugins/check_pybal --url http://localhost:9090/alerts',
        require      => File['/usr/local/lib/nagios/plugins/check_pybal'],
        notes_url    => 'https://wikitech.wikimedia.org/wiki/PyBal',
    }

    file { '/usr/local/lib/nagios/plugins/check_pybal_ipvs_diff':
        ensure => present,
        source => 'puppet:///modules/pybal/check_pybal_ipvs_diff.py',
        owner  => 'root',
        group  => 'root',
        mode   => '0555',
    }

    nrpe::monitor_service { 'pybal_ipvs_diff':
        description    => 'PyBal IPVS diff check',
        nrpe_command   => "/usr/local/lib/nagios/plugins/check_pybal_ipvs_diff --req-timeout=10.0 --prometheus-url http://${::ipaddress}:9100/metrics",
        check_interval => 5,
        timeout        => 60,
        require        => File['/usr/local/lib/nagios/plugins/check_pybal_ipvs_diff'],
        notes_url      => 'https://wikitech.wikimedia.org/wiki/PyBal',
    }

    if $config_source == 'etcd' {
        # Get the configuration of all services for this LVS host
        # then sum all values.
        $n_etcd_connections = map($services) |$name,$service| {
            size($service['ip'][$::site])
        }.reduce() |$memo,$value| { $memo + $value }

        nrpe::monitor_service { 'pybal_etcd_connections':
            description    => 'PyBal connections to etcd',
            nrpe_command   => "/usr/lib/nagios/plugins/check_established_connections ${config_host} ${etcd_port} ${n_etcd_connections}",
            check_interval => 5,
            timeout        => 60,
            require        => File['/usr/lib/nagios/plugins/check_established_connections'],
            notes_url      => 'https://wikitech.wikimedia.org/wiki/PyBal',
        }
    }


    $prometheus_labels = "{instance=\"${::hostname}:9090\"}"
    monitoring::check_prometheus { 'pybal_bgp_sessions':
        description     => 'PyBal BGP sessions are established',
        dashboard_links => ["https://grafana.wikimedia.org/dashboard/db/pybal-bgp?var-datasource=${::site} prometheus/ops"],
        query           => "scalar(pybal_bgp_session_established${prometheus_labels} and ignoring (local_asn, peer) pybal_bgp_enabled${prometheus_labels} == 1)",
        method          => 'le',
        warning         => 0,
        critical        => 0,
        nan_ok          => true,
        prometheus_url  => "http://prometheus.svc.${::site}.wmnet/ops",
        notes_link      => 'https://wikitech.wikimedia.org/wiki/PyBal',
    }
}