Defined Type: mariadb::monitor_replication
- Defined in:
- modules/mariadb/manifests/monitor_replication.pp
Overview
MariaDB 10 multi-source replication TODO: Revisit the is_critical part. We probably want pages for DB problems for at least a group of people
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
# File 'modules/mariadb/manifests/monitor_replication.pp', line 4
define mariadb::monitor_replication(
$is_critical = false,
$contact_group = 'admins',
$lag_warn = 60,
$lag_crit = 300,
$socket = '/run/mysqld/mysqld.sock',
$multisource = false,
$warn_stopped = true,
$source_dc = mediawiki::state('primary_dc'),
) {
include passwords::nagios::mysql
$password = $passwords::nagios::mysql::mysql_check_pass
$check_command = "/usr/local/lib/nagios/plugins/check_mariadb --sock=${socket} --user=nagios --pass=${password}"
$check_set = $multisource ? {
true => "--set=default_master_connection=${name}",
false => ''
}
$check_warn = $warn_stopped ? {
true => '--warn-stopped',
false => '--no-warn-stopped'
}
$check_mariadb = "${check_command} ${check_set} ${check_warn}"
nrpe::monitor_service { "mariadb_replica_io_state_${name}":
description => "MariaDB Replica IO: ${name}",
nrpe_command => "${check_mariadb} --check=slave_io_state",
critical => $is_critical,
contact_group => $contact_group,
notes_url => 'https://wikitech.wikimedia.org/wiki/MariaDB/troubleshooting#Depooling_a_replica',
}
nrpe::monitor_service { "mariadb_replica_sql_state_${name}":
description => "MariaDB Replica SQL: ${name}",
nrpe_command => "${check_mariadb} --check=slave_sql_state",
critical => $is_critical,
contact_group => $contact_group,
notes_url => 'https://wikitech.wikimedia.org/wiki/MariaDB/troubleshooting#Depooling_a_replica',
}
# check the lag towards the $source_dc's master
nrpe::monitor_service { "mariadb_replica_sql_lag_${name}":
description => "MariaDB Replica Lag: ${name}",
nrpe_command => "${check_mariadb} --check=slave_sql_lag \
--shard=${name} --datacenter=${source_dc} \
--sql-lag-warn=${lag_warn} \
--sql-lag-crit=${lag_crit}",
retries => 10,
critical => $is_critical,
contact_group => $contact_group,
notes_url => 'https://wikitech.wikimedia.org/wiki/MariaDB/troubleshooting#Depooling_a_replica',
}
}
|