9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
|
# File 'modules/graphite/manifests/monitoring/graphite.pp', line 9
class graphite::monitoring::graphite (
Stdlib::HTTPUrl $graphite_url = 'https://graphite.wikimedia.org/',
) {
monitoring::graphite_threshold {
default:
graphite_url => $graphite_url,
percentage => 80,
nagios_critical => false,
notes_link => 'https://wikitech.wikimedia.org/wiki/Graphite#Operations_troubleshooting';
'carbon-frontend-relay_drops':
description => 'carbon-frontend-relay metric drops',
dashboard_links => [
'https://grafana.wikimedia.org/d/000000020/graphite-eqiad?orgId=1&viewPanel=21',
'https://grafana.wikimedia.org/d/000000337/graphite-codfw?orgId=1&viewPanel=21',
],
metric => 'sumSeries(transformNull(perSecond(carbon.relays.graphite*_frontend.destinations.*.dropped)))',
from => '5minutes',
warning => 25,
critical => 100;
'carbon-local-relay_drops':
description => 'carbon-local-relay metric drops',
dashboard_links => [
'https://grafana.wikimedia.org/d/000000020/graphite-eqiad?orgId=1&viewPanel=29',
'https://grafana.wikimedia.org/d/000000337/graphite-codfw?orgId=1&viewPanel=29',
],
metric => 'sumSeries(transformNull(perSecond(carbon.relays.graphite*_local.destinations.*.dropped)))',
from => '5minutes',
warning => 25,
critical => 100;
# is carbon-cache able to write to disk (e.g. permissions)
'carbon-cache_write_error':
description => 'carbon-cache write error',
dashboard_links => ['https://grafana.wikimedia.org/d/000000020/graphite-eqiad?orgId=1&viewPanel=30'],
metric => 'secondYAxis(sumSeries(carbon.agents.graphite1005-*.errors))',
from => '10minutes',
warning => 1,
critical => 8;
# are carbon-cache queues overflowing their capacity?
'carbon-cache_overflow':
description => 'carbon-cache queues overflow',
dashboard_links => ['https://grafana.wikimedia.org/d/000000020/graphite-eqiad?orgId=1&viewPanel=8'],
metric => 'secondYAxis(sumSeries(carbon.agents.graphite1005-*.cache.overflow))',
from => '10minutes',
warning => 1,
critical => 8;
# are we creating too many metrics?
'carbon-cache_many_creates':
description => 'carbon-cache too many creates',
dashboard_links => ['https://grafana.wikimedia.org/d/000000020/graphite-eqiad?orgId=1&viewPanel=9'],
metric => 'sumSeries(carbon.agents.graphite1005-*.creates)',
from => '30min',
warning => 500,
critical => 1000;
}
}
|