Defined Type: cassandra::instance::monitoring

Defined in:
modules/cassandra/manifests/instance/monitoring.pp

Overview

Define: cassandra::instance::monitoring

Configures monitoring for Cassandra

Usage

cassandra::instance::monitoring { 'instance-name':

instances      => ...
contact_group  => ...

}

Parameters:

  • contact_group (String) (defaults to: 'admins,team-services')
  • monitor_enabled (Boolean) (defaults to: true)
  • tls_use_pki (Boolean) (defaults to: false)
  • instances (Hash) (defaults to: {})
  • tls_cluster_name (Optional[String]) (defaults to: undef)
  • tls_port (Optional[Integer]) (defaults to: 7001)
  • cql_port (Optional[Integer]) (defaults to: 9042)


10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# File 'modules/cassandra/manifests/instance/monitoring.pp', line 10

define cassandra::instance::monitoring (
    String            $contact_group    = 'admins,team-services',
    Boolean           $monitor_enabled  = true,
    Boolean           $tls_use_pki      = false,
    Hash              $instances        = {},
    Optional[String]  $tls_cluster_name = undef,
    Optional[Integer] $tls_port         = 7001,
    Optional[Integer] $cql_port         = 9042,
) {

    include cassandra
    $_instances = $instances.empty ? {
        true    => $cassandra::instances,
        default => $instances,
    }
    $instance_name  = $title
    $this_instance  = $_instances[$instance_name]
    $listen_address = $this_instance['listen_address']

    if ! has_key($instances, $instance_name) {
        fail("instance ${instance_name} not found in ${_instances}")
    }

    $service_name = $instance_name ? {
        'default' => 'cassandra',
        default   => "cassandra-${instance_name}"
    }

    $ensure_monitor = $monitor_enabled.bool2str('present', 'absent')

    # SSL cert expiration monitoring (T120662)
    if $tls_cluster_name {
        $ensure_nagios_monitor = $tls_use_pki ? {
            true  => 'absent',
            false => $ensure_monitor,
        }
        if $tls_use_pki {
            # The TLS certificates provided by PKI are automatically
            # renewed by puppet, and reloaded by Cassandra automatically.
            # This alert is needed to warn the admins in case something goes
            # wrong and the new cert is not picked up as expected.
            prometheus::blackbox::check::tcp { "${service_name}-ssl":
                # The blackbox probe doesn't support one servername
                # for each instance, so we fallback to a CN: cassandra
                # to have a single config supported by all PKI-enabled
                # instances.
                server_name             => 'cassandra',
                port                    => $tls_port,
                force_tls               => true,
                certificate_expiry_days => 5,
                ip4                     => $listen_address,
                ip_families             => ['ip4'],
                instance_label          => "${::hostname}-${instance_name}",
            }

            prometheus::blackbox::check::tcp { "${service_name}-cql":
                # The blackbox probe doesn't support one servername
                # for each instance, so we fallback to a CN: cassandra
                # to have a single config supported by all PKI-enabled
                # instances.
                server_name             => 'cassandra',
                port                    => $cql_port,
                force_tls               => true,
                certificate_expiry_days => 5,
                ip4                     => $listen_address,
                ip_families             => ['ip4'],
                instance_label          => "${::hostname}-${instance_name}",
            }
        }

        # CQL query interface monitoring (T93886)
        monitoring::service { "${service_name}-cql":
            ensure        => $ensure_nagios_monitor,
            description   => "${service_name} CQL ${listen_address}:${cql_port}",
            check_command => "check_tcp_ip!${listen_address}!${cql_port}",
            contact_group => $contact_group,
            notes_url     => 'https://phabricator.wikimedia.org/T93886',
        }

        monitoring::service { "${service_name}-ssl":
            ensure        => $ensure_nagios_monitor,
            description   => "${service_name} SSL ${listen_address}:${tls_port}",
            check_command => "check_ssl_on_host_port!${facts['hostname']}-${instance_name}!${listen_address}!${tls_port}",
            contact_group => $contact_group,
            notes_url     => 'https://wikitech.wikimedia.org/wiki/Cassandra#Installing_and_generating_certificates',
        }
    }
}