Puppet Class: profile::maps::alerts

Defined in:
modules/profile/manifests/maps/alerts.pp

Overview

Define various checks for Maps

Parameters:

  • graphite_url (Any) (defaults to: hiera('graphite_url'))


2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# File 'modules/profile/manifests/maps/alerts.pp', line 2

class profile::maps::alerts($graphite_url = hiera('graphite_url')) {
    monitoring::graphite_threshold { 'tilerator-tile-generation':
        description     => 'Maps tiles generation',
        dashboard_links => ['https://grafana.wikimedia.org/dashboard/db/maps-performances?panelId=8&fullscreen&orgId=1'],
        metric          => 'transformNull(sumSeries(tilerator.gen.*.*.*.done.sample_rate),0)',
        # Tilerator should be generating tiles at least 2 hours per day
        # Values need to be adjusted if synchronization frequency is changed
        under           => true,
        warning         => 10,
        critical        => 5,
        from            => '1day',
        percentage      => 90,
        graphite_url    => $graphite_url,
        notes_link      => 'https://wikitech.wikimedia.org/wiki/Maps/Runbook',
    }

    monitoring::check_prometheus { 'maps-osm-sync-lag-eqiad':
      description     => 'Maps - OSM synchronization lag - eqiad',
      dashboard_links => ['https://grafana.wikimedia.org/dashboard/db/maps-performances?panelId=11&fullscreen&orgId=1'],
      query           => 'scalar(max(time()-osm_sync_timestamp{cluster="maps"}))',
      warning         => 49 * 3600, # 49 hours
      critical        => 3 * 24 * 3600, # 3 days
      prometheus_url  => 'http://prometheus.svc.eqiad.wmnet/ops',
      notes_link      => 'https://wikitech.wikimedia.org/wiki/Maps/Runbook',
    }
    monitoring::check_prometheus { 'maps-osm-sync-lag-codf':
        description     => 'Maps - OSM synchronization lag - codfw',
        dashboard_links => ['https://grafana.wikimedia.org/dashboard/db/maps-performances?panelId=12&fullscreen&orgId=1'],
        # restrict check to maps2001 while data is being reloaded on maps2004
        query           => 'scalar(max(time()-osm_sync_timestamp{cluster="maps"}))',
        warning         => 49 * 3600, # 49 hours
        critical        => 3 * 24 * 3600, # 3 days
        prometheus_url  => 'http://prometheus.svc.codfw.wmnet/ops',
        notes_link      => 'https://wikitech.wikimedia.org/wiki/Maps/Runbook',
    }
}