Puppet Class: profile::prometheus::alerts

Defined in:
modules/profile/manifests/prometheus/alerts.pp

Overview

SPDX-License-Identifier: Apache-2.0

Class: profile::prometheus::alerts

Install icinga alerts based on Prometheus metrics. NOTE to be included only from one host, icinga will generate different alerts for all hosts that include this class.

Parameters:

  • datacenters (Array[String]) (defaults to: lookup('datacenters'))


8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# File 'modules/profile/manifests/prometheus/alerts.pp', line 8

class profile::prometheus::alerts (
    Array[String] $datacenters = lookup('datacenters'),
) {

    # Monitor throughput and dropped messages on MirrorMaker instances.
    # main-eqiad -> jumbo MirrorMaker
    profile::kafka::mirror::alerts { 'main-eqiad_to_jumbo-eqiad':
        #  For now, alert Data Platform SREs.  Change this back to admins soon.
        contact_group         => 'team-data-platform',
        topic_blacklist       => '.*(change-prop|\.job\.|changeprop).*',
        prometheus_url        => 'http://prometheus.svc.eqiad.wmnet/ops',
        source_prometheus_url => 'http://prometheus.svc.eqiad.wmnet/ops',
    }

    # Cross DC main-eqiad <-> main-codfw MirrorMakers.
    profile::kafka::mirror::alerts { 'main-eqiad_to_main-codfw':
        prometheus_url        => 'http://prometheus.svc.codfw.wmnet/ops',
        source_prometheus_url => 'http://prometheus.svc.eqiad.wmnet/ops',
    }
    # main-eqiad is getting the bulk of the traffic from MediaWiki,
    # and it currently pulls msgs from main-codfw at a very low rate
    # (but we want to make sure that it doesn't drop to zero).
    profile::kafka::mirror::alerts { 'main-codfw_to_main-eqiad':
        #  For now, alert analytics admins, until alerts are more stable.
        prometheus_url        => 'http://prometheus.svc.eqiad.wmnet/ops',
        source_prometheus_url => 'http://prometheus.svc.codfw.wmnet/ops',
        warning_throughput    => 3,
    }
}