Puppet Class: profile::elasticsearch::cirrus

Defined in:
modules/profile/manifests/elasticsearch/cirrus.pp

Overview

SPDX-License-Identifier: Apache-2.0

This class configures elasticsearch

Parameters:

For documentation of parameters, see the elasticsearch profile.

expose_http

For historical reason we expose HTTP endpoints. For new clusters, we want to disable that, and cleanup the old ones. For transition, let's make this configureable.

Parameters:

  • cluster (String) (defaults to: lookup('cluster'))
  • ferm_srange (String) (defaults to: lookup('profile::elasticsearch::cirrus::ferm_srange'))
  • ferm_ro_srange (String) (defaults to: lookup('profile::elasticsearch::cirrus::ferm_ro_srange', {default_value => ''}))
  • expose_http (Boolean) (defaults to: lookup('profile::elasticsearch::cirrus::expose_http'))
  • storage_device (String) (defaults to: lookup('profile::elasticsearch::cirrus::storage_device'))
  • enable_remote_search (Boolean) (defaults to: lookup('profile::elasticsearch::cirrus::enable_remote_search'))
  • ssl_provider (Profile::Pki::Provider) (defaults to: lookup('profile::elasticsearch::cirrus::ssl_provider'))


13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
# File 'modules/profile/manifests/elasticsearch/cirrus.pp', line 13

class profile::elasticsearch::cirrus(
    String $cluster = lookup('cluster'),
    String $ferm_srange = lookup('profile::elasticsearch::cirrus::ferm_srange'),
    String $ferm_ro_srange = lookup('profile::elasticsearch::cirrus::ferm_ro_srange', {default_value => ''}),
    Boolean $expose_http = lookup('profile::elasticsearch::cirrus::expose_http'),
    String $storage_device = lookup('profile::elasticsearch::cirrus::storage_device'),
    Boolean $enable_remote_search = lookup('profile::elasticsearch::cirrus::enable_remote_search'),
    Profile::Pki::Provider $ssl_provider = lookup('profile::elasticsearch::cirrus::ssl_provider'),
) {
    include ::profile::elasticsearch

    # syslog logstash transport type depends on this. See T225125.
    include ::profile::rsyslog::udp_json_logback_compat

    # nginx, which terminates tls for elasticsearch, needs `/etc/ssl/dhparam.pem` to be in place in order to function.
    class { '::sslcert::dhparam': }

    package {'wmf-elasticsearch-search-plugins':
        ensure  => present,
        require => [Class['Java'], Package['elasticsearch-oss']],
    }

    # Since the elasticsearch service is dynamically named after the cluster
    # name, and because there can be multiple elasticsearch services on the
    # same node we need to use collectors.
    Package['wmf-elasticsearch-search-plugins'] -> Service <| tag == 'elasticsearch_services' |>

    # Alternatively we could pass these again?
    # certificate_name and tls_port aren't even
    # elasticsearch::instance params,

    $::profile::elasticsearch::filtered_instances.each |$instance_title, $instance_params| {
        $cluster_name = $instance_params['cluster_name']
        $http_port = $instance_params['http_port']
        $tls_port = $instance_params['tls_port']
        $tls_ro_port = $instance_params['tls_ro_port']

        if $expose_http {
            ferm::service { "elastic-http-${http_port}":
                proto   => 'tcp',
                port    => $http_port,
                notrack => true,
                srange  => $ferm_srange,
            }
        }

        ferm::service { "elastic-https-${tls_port}":
            proto  => 'tcp',
            port   => $tls_port,
            srange => $ferm_srange,
        }

        if $ssl_provider == 'acme_chief' {
            $proxy_cert_params = {
                acme_chief        => true,
                acme_certname     => $cluster,
                server_name       => $instance_params['certificate_name'],
            }
        }

        if $ssl_provider == 'cfssl' {
            $cfssl_paths = profile::pki::get_cert('discovery', $facts['networking']['fqdn'], {
                hosts => [$instance_params['certificate_name'], "search.svc.${::site}.wmnet"],
            })

            $proxy_cert_params = {
                'cfssl_paths'  => $cfssl_paths,
                server_aliases => [$instance_params['certificate_name'],"search.svc.${::site}.wmnet"],
            }
        }

        $proxy_params = merge($proxy_cert_params, {
            upstream_port => $http_port,
            tls_port      => $tls_port,
            enable_http2  => false,
        })

        elasticsearch::tlsproxy { $cluster_name:
            * => $proxy_params,
        }
        if $tls_ro_port {
            if empty($ferm_ro_srange) {
                fail('Read only port specified without a read only srange')
            }

            ferm::service { "elastic-ro-https-${tls_ro_port}":
                proto  => 'tcp',
                port   => $tls_ro_port,
                srange => $ferm_ro_srange,
            }

            elasticsearch::tlsproxy { "${cluster_name}-ro":
                * => merge($proxy_params, {
                    tls_port  => $tls_ro_port,
                    read_only => true,
                })
            }
        }

        elasticsearch::log::hot_threads_cluster { $cluster_name:
            http_port => $http_port,
        }

        # Also limit this check to only the master nodes to reduce duplication
        # of this check on all nodes until we find a better way to run this check
        # only on icinga nodes
        if $facts['fqdn'] in $instance_params['unicast_hosts'] {
            elasticsearch::cross_cluster_settings { $instance_title:
                http_port            => $http_port,
                settings             => $::profile::elasticsearch::configured_instances,
                enable_remote_search => $enable_remote_search,
            }

            icinga::monitor::elasticsearch::cirrus_settings_check { $instance_title:
                port                 => $http_port,
                settings             => $::profile::elasticsearch::configured_instances,
                enable_remote_search => $enable_remote_search,
            }
        }
    }

    $read_ahead_kb = 16
    udev::rule { 'elasticsearch-readahead':
        content => "SUBSYSTEM==\"block\", KERNEL==\"${storage_device}\", ACTION==\"add|change\", ATTR{bdi/read_ahead_kb}=\"${read_ahead_kb}\"",
    }

    ## BEGIN Temporary mitigation put in place for T264053
    # Source code lives here: https://phabricator.wikimedia.org/P5883
    package {'elasticsearch-madvise':
        ensure => present,
    }

    # Add elastic bin to root's PATH
    file_line { 'elastic_bin_bashrc':
      ensure => present,
      path   => '/root/.bashrc',
      line   => "PATH=\${PATH}:/usr/share/elasticsearch/bin  # Managed by puppet",
    }

    # Wrapper script to run elasticsearch-madvise-random once per elasticsearch process, passing PID
    file { '/usr/local/bin/elasticsearch-disable-readahead.sh':
        ensure => file,
        owner  => 'root',
        group  => 'root',
        mode   => '0555',
        source => 'puppet:///modules/elasticsearch/cirrus/elasticsearch-disable-readahead.sh',
    }

    # Run the wrapper every 30 mins
    systemd::timer::job { 'elasticsearch-disable-readahead':
        description => 'Disables readahead on all open files every 30 minutes to alleviate Cirrussearch / elasticsearch IO load spikes',
        command     => '/usr/local/bin/elasticsearch-disable-readahead.sh',
        user        => 'root',
        interval    => [{'start' => 'OnUnitActiveSec', 'interval' => '30min'}, {'start' => 'OnBootSec', 'interval' => '1min'}],
    }
    ## END   Temporary mitigation put in place for T264053

    # Install prometheus data collection
    $::profile::elasticsearch::filtered_instances.reduce(9108) |$prometheus_port, $kv_pair| {
        $instance_params = $kv_pair[1]
        $http_port = $instance_params['http_port']
        $indices_to_monitor = $instance_params['indices_to_monitor'] ? {
            undef   => [],
            default => $instance_params['indices_to_monitor']
        }

        profile::prometheus::elasticsearch_exporter { "${::hostname}:${http_port}":
            prometheus_port    => $prometheus_port,
            elasticsearch_port => $http_port,
        }
        profile::prometheus::wmf_elasticsearch_exporter { "${::hostname}:${http_port}":
            prometheus_port    => $prometheus_port + 1,
            elasticsearch_port => $http_port,
            indices_to_monitor => $indices_to_monitor,
        }
        $prometheus_port + 2
    }
    motd::script { 'cluster_memberships':
      ensure   => present,
      priority => 96,
      source   => 'puppet:///modules/elasticsearch/elastic.motd',
    }
}