Puppet Class: profile::graphite::base
- Defined in:
- modules/profile/manifests/graphite/base.pp
Overview
SPDX-License-Identifier: Apache-2.0
Class: profile::graphite::base
Base class for setting up a graphite instance.
Sets up graphite + carbon listeners, with 8 carbon listeners running on localhost feeding data into graphite. Also sets up basic icinga checks.
Parameters
- storage_dir
-
Location to store the whisper files used by graphite in
- provide_vhost
-
If enabled, configure an Apache vhost config (which is provided by a different profile if running with CAS auth)
20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 |
# File 'modules/profile/manifests/graphite/base.pp', line 20
class profile::graphite::base(
$storage_dir = '/var/lib/carbon',
$hostname = 'graphite.wikimedia.org',
$cors_origins = [ 'https://grafana.wikimedia.org', 'https://grafana-next.wikimedia.org', 'https://grafana-rw.wikimedia.org', 'https://grafana-next-rw.wikimedia.org' ],
$c_relay_settings = {},
$cluster_servers = lookup('profile::graphite::base::cluster_servers'),
$uwsgi_processes = lookup('profile::graphite::base::uwsgi_processes'),
$uwsgi_max_request_duration_seconds = undef, # lint:ignore:wmf_styleguide
$uwsgi_max_request_rss_megabytes = undef, # lint:ignore:wmf_styleguide
$provide_vhost = true, # lint:ignore:wmf_styleguide
Hash $ldap_config = lookup('ldap'),
) {
include ::passwords::graphite
$carbon_storage_dir = $storage_dir
class { '::graphite':
# First match wins with storage schemas
# lint:ignore:arrow_alignment
storage_schemas => {
# Retain daily metrics for 25 years. Per metric size: 109528 bytes
'daily' => {
pattern => '(^daily\..*|.*\.daily$)',
retentions => '1d:25y',
},
# Retain aggregated data at 1 hour resolution for 1 year and at
# 1 day resolution for 5 years. Per metric size: 127060 bytes
'hourly' => {
pattern => '(^hourly\..*|.*\.hourly$)',
retentions => '1h:1y,1d:5y',
},
# Decom https://phabricator.wikimedia.org/T335242
'coal' => {
pattern => '^coal\..*',
retentions => '1m:1y',
},
# Retain aggregated data at a one-minute resolution for one week; at
# five-minute resolution for two weeks; at 15-minute resolution for
# one month; one-hour resolution for one year, and 1d for five years.
# Per metric size: 331000 bytes
# Note that the different schemas are written to configuration file
# in alphabetical order and matched in that order. The "default"
# schema has to be the last one in the list, thus the "zzdefault"
# name.
'zzdefault' => {
pattern => '.*',
retentions => '1m:7d,5m:14d,15m:30d,1h:1y,1d:5y',
},
},
# lint:endignore
# Aggregation methods for whisper files.
# lint:ignore:arrow_alignment
storage_aggregation => {
# Cassandra "count" metrics are treated like gauges, not like
# statsd "counters". See also T121789 for rationale.
'10_cassandra_count' => {
pattern => '^cassandra\.[^.]+\.org\.apache\.cassandra\..*\.count$',
xFilesFactor => 0.01,
aggregationMethod => 'average',
},
'min' => {
pattern => '\.min$',
xFilesFactor => 0.01,
aggregationMethod => 'min',
},
'max' => {
pattern => '\.max$',
xFilesFactor => 0.01,
aggregationMethod => 'max',
},
'count' => {
pattern => '\.count$',
xFilesFactor => 0,
aggregationMethod => 'sum',
},
'sum' => {
pattern => '\.sum$',
xFilesFactor => 0,
aggregationMethod => 'sum',
},
# statsite extended counters
'lower' => {
pattern => '\.lower$',
xFilesFactor => 0.01,
aggregationMethod => 'min',
},
'upper' => {
pattern => '\.upper$',
xFilesFactor => 0.01,
aggregationMethod => 'max',
},
# Like storage_schemas, this hash is written in order in the
# configuration file and read in order by graphite
# (lib/carbon/storage.py). Therefore put default as last item for
# matching to work correctly.
'zzdefault' => {
pattern => '.*',
xFilesFactor => 0.01,
},
},
# lint:endignore
# All metric data goes through a single carbon-relay instance, which
# forwards each data point to one of eight carbon-cache instances, using
# a consistent hash ring to distribute the load.
#
# Why is this necessary? Because carbon-cache is CPU-bound, and the Python
# GIL prevents it from utilizing multiple processor cores efficiently.
#
# cf. "Single node, multiple carbon-caches"
# <http://bitprophet.org/blog/2013/03/07/graphite/>
#
# If we need to scale up, the next step is multi-node.
# <http://tinyurl.com/graphite-cluster-setup>.
carbon_settings => {
'cache' => {
line_receiver_interface => '127.0.0.1', # Only the relay binds to 0.0.0.0.
pickle_receiver_interface => '127.0.0.1',
max_cache_size => 'inf',
max_creates_per_minute => '100',
max_updates_per_second_on_shutdown => '1000',
},
## Carbon caches ##
'cache:a' => {
line_receiver_port => 2103,
pickle_receiver_port => 2104,
cache_query_port => 7102,
},
'cache:b' => {
line_receiver_port => 2203,
pickle_receiver_port => 2204,
cache_query_port => 7202,
},
'cache:c' => {
line_receiver_port => 2303,
pickle_receiver_port => 2304,
cache_query_port => 7302,
},
'cache:d' => {
line_receiver_port => 2403,
pickle_receiver_port => 2404,
cache_query_port => 7402,
},
'cache:e' => {
line_receiver_port => 2503,
pickle_receiver_port => 2504,
cache_query_port => 7502,
},
'cache:f' => {
line_receiver_port => 2603,
pickle_receiver_port => 2604,
cache_query_port => 7602,
},
'cache:g' => {
line_receiver_port => 2703,
pickle_receiver_port => 2704,
cache_query_port => 7702,
},
'cache:h' => {
line_receiver_port => 2803,
pickle_receiver_port => 2804,
cache_query_port => 7802,
},
## Carbon relay ##
'relay' => {
pickle_receiver_interface => '0.0.0.0',
# disabled, see ::graphite::carbon_c_relay
line_receiver_port => '0',
relay_method => 'consistent-hashing',
max_queue_size => '500000',
destinations => [
'127.0.0.1:2104:a',
'127.0.0.1:2204:b',
'127.0.0.1:2304:c',
'127.0.0.1:2404:d',
'127.0.0.1:2504:e',
'127.0.0.1:2604:f',
'127.0.0.1:2704:g',
'127.0.0.1:2804:h',
],
},
},
storage_dir => $carbon_storage_dir,
whisper_lock_writes => true,
c_relay_settings => $c_relay_settings,
}
class { '::graphite::web':
admin_user => $::passwords::graphite::user,
admin_pass => $::passwords::graphite::pass,
remote_user_auth => true,
secret_key => $::passwords::graphite::secret_key,
storage_dir => $carbon_storage_dir,
documentation_url => '//wikitech.wikimedia.org/wiki/Graphite',
cluster_servers => $cluster_servers,
cors_origins => $cors_origins,
uwsgi_max_request_duration_seconds => $uwsgi_max_request_duration_seconds,
uwsgi_max_request_rss_megabytes => $uwsgi_max_request_rss_megabytes,
uwsgi_processes => $uwsgi_processes,
}
if $provide_vhost {
httpd::site { $hostname:
content => template('role/graphite/graphite.apache.erb'),
}
}
ferm::service { 'graphite-http':
proto => 'tcp',
port => [80],
}
# This check goes to the backend, which is http.
monitoring::service { 'graphite_render':
description => "${hostname} render",
check_command => "check_http_url!${hostname}!/render",
notes_url => 'https://wikitech.wikimedia.org/wiki/Graphite#Operations_troubleshooting',
}
# Probe the API, the "carbon" hierarchy should always exist
monitoring::service { 'graphite_api':
description => "${hostname} api",
check_command => "check_http_url!${hostname}!/metrics/find?query=carbon.*",
notes_url => 'https://wikitech.wikimedia.org/wiki/Graphite#Operations_troubleshooting',
}
}
|