Puppet Class: graphite::monitoring::graphite

Defined in:
modules/graphite/manifests/monitoring/graphite.pp

Overview



9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# File 'modules/graphite/manifests/monitoring/graphite.pp', line 9

class graphite::monitoring::graphite {
    monitoring::graphite_threshold { 'carbon-frontend-relay_drops':
        description     => 'carbon-frontend-relay metric drops',
        dashboard_links => [
            'https://grafana.wikimedia.org/dashboard/db/graphite-eqiad?orgId=1&panelId=21&fullscreen',
            'https://grafana.wikimedia.org/dashboard/db/graphite-codfw?orgId=1&panelId=21&fullscreen',
        ],
        metric          => 'sumSeries(transformNull(perSecond(carbon.relays.graphite*_frontend.destinations.*.dropped)))',
        from            => '5minutes',
        warning         => 25,
        critical        => 100,
        percentage      => 80,
        nagios_critical => false,
    }

    monitoring::graphite_threshold { 'carbon-local-relay_drops':
        description     => 'carbon-local-relay metric drops',
        dashboard_links => [
            'https://grafana.wikimedia.org/dashboard/db/graphite-eqiad?orgId=1&panelId=29&fullscreen',
            'https://grafana.wikimedia.org/dashboard/db/graphite-codfw?orgId=1&panelId=29&fullscreen',
        ],
        metric          => 'sumSeries(transformNull(perSecond(carbon.relays.graphite*_local.destinations.*.dropped)))',
        from            => '5minutes',
        warning         => 25,
        critical        => 100,
        percentage      => 80,
        nagios_critical => false,
    }

    # is carbon-cache able to write to disk (e.g. permissions)
    monitoring::graphite_threshold { 'carbon-cache_write_error':
        description     => 'carbon-cache write error',
        dashboard_links => ['https://grafana.wikimedia.org/dashboard/db/graphite-eqiad?orgId=1&panelId=30&fullscreen'],
        metric          => 'secondYAxis(sumSeries(carbon.agents.graphite1004-*.errors))',
        from            => '10minutes',
        warning         => 1,
        critical        => 8,
        percentage      => 80,
        nagios_critical => false,
    }

    # are carbon-cache queues overflowing their capacity?
    monitoring::graphite_threshold { 'carbon-cache_overflow':
        description     => 'carbon-cache queues overflow',
        dashboard_links => ['https://grafana.wikimedia.org/dashboard/db/graphite-eqiad?orgId=1&panelId=8&fullscreen'],
        metric          => 'secondYAxis(sumSeries(carbon.agents.graphite1004-*.cache.overflow))',
        from            => '10minutes',
        warning         => 1,
        critical        => 8,
        percentage      => 80,
        nagios_critical => false,
    }

    # are we creating too many metrics?
    monitoring::graphite_threshold { 'carbon-cache_many_creates':
        description     => 'carbon-cache too many creates',
        dashboard_links => ['https://grafana.wikimedia.org/dashboard/db/graphite-eqiad?orgId=1&panelId=9&fullscreen'],
        metric          => 'sumSeries(carbon.agents.graphite1004-*.creates)',
        from            => '30min',
        warning         => 500,
        critical        => 1000,
        nagios_critical => false,
        percentage      => 80,
    }
}