Puppet Class: role::prometheus::global

Defined in:
modules/role/manifests/prometheus/global.pp

Overview



1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# File 'modules/role/manifests/prometheus/global.pp', line 1

class role::prometheus::global {

    system::role { 'prometheus::global':
        description => 'Prometheus server (global)',
    }

    include ::profile::base::firewall

    # Pull selected metrics from all DC-local Prometheus servers.
    $federation_jobs = [
      {
        'job_name'        => 'federate-ops',
        'honor_labels'    => true,
        'metrics_path'    => '/ops/federate',
        'params'          => {
          # Pull the union of metrics matching the following queries.
          # Note: all regexps are implicitly anchored with ^$
          'match[]' => [
            # Up status for targets and exporters
            '{__name__=~"up"}',
            '{__name__=~"^([a-z_]+)_up"}',
            # Self-monitoring job
            '{job="prometheus"}',
            # Per-cluster aggregated metrics
            '{__name__=~"^cluster.*:.*"}',
            '{__name__=~"^instance.*:.*"}',
            # Version stats for auditing purposes
            '{__name__="node_uname_info"}',
            '{__name__="node_exporter_build_info"}',
            '{__name__="varnish_version"}',
            '{__name__="mysql_version_info"}',
            '{__name__="mysqld_exporter_build_info"}',
            '{__name__="memcached_version"}',
            # Service-level aggregated metrics
            '{__name__=~"^.*:mysql_.*"}',
            '{__name__=~"^.*:memcached_.*"}',
            '{__name__=~"^.*:varnish_.*"}',
            '{__name__=~"^.*:trafficserver_.*"}',
            # blackbox_exporter probes results
            '{__name__=~"^probe_.*"}',
            # Bird prefix export count
            '{__name__="bird_protocol_prefix_export_count"}',
            # Swift container/account stats
            '{__name__=~"^swift_account_stats.*"}',
            '{__name__=~"^swift_container_stats.*"}',
            # IPsec Status metrics
            '{__name__="ipsec_status"}',
            # Global authdns metrics
            '{__name__=~"^gdnsd_.*"}',
          ],
        },
        'static_configs' => [
          { 'targets' => [
              'prometheus.svc.eqiad.wmnet',
              'prometheus.svc.codfw.wmnet',
              'prometheus.svc.ulsfo.wmnet',
              'prometheus.svc.esams.wmnet',
              'prometheus.svc.eqsin.wmnet',
            ],
          },
        ],
        # The following labels are external_labels added for Thanos to pick up.
        # Discard them here for backwards compatibility, and because 'replica'
        # would flip depending on which Prometheus replica replies, creating
        # new metrics in the process.
        'metric_relabel_configs' => [
          { 'regex'         => '(prometheus|replica)',
            'action'        => 'labeldrop',
          },
        ],
      },
    ]

    prometheus::rule { 'rules_global.yml':
        instance => 'global',
        source   => 'puppet:///modules/role/prometheus/rules_global.yml',
    }

    prometheus::server { 'global':
        # 2.25 years. The extra .25 is to allow for year-over-year comparisons
        storage_retention    => '19656h',
        listen_address       => '127.0.0.1:9904',
        scrape_configs_extra => $federation_jobs,
    }

    prometheus::web { 'global':
        proxy_pass => 'http://localhost:9904/global',
    }

    ferm::service { 'prometheus-web-global':
        proto  => 'tcp',
        port   => '80',
        srange => '$DOMAIN_NETWORKS',
    }

    # Used for migrations / hardware refresh, but not continuously
    include rsync::server
    $prometheus_nodes = hiera('prometheus_nodes')
    rsync::server::module { 'prometheus-global':
        ensure      => absent,
        path        => '/srv/prometheus/global/metrics',
        uid         => 'prometheus',
        gid         => 'prometheus',
        hosts_allow => $prometheus_nodes,
        auto_ferm   => true,
    }
}