Puppet Class: profile::ntp

Defined in:
modules/profile/manifests/ntp.pp

Summary

profile to configure ntp

Overview

Parameters:

  • monitoring_hosts (Array[Stdlib::Host]) (defaults to: lookup('monitoring_hosts'))
  • ntp_peers (Hash[Wmflib::Sites, Array[Stdlib::Fqdn]]) (defaults to: lookup('ntp_peers'))


4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# File 'modules/profile/manifests/ntp.pp', line 4

class profile::ntp (
    Array[Stdlib::Host]                      $monitoring_hosts = lookup('monitoring_hosts'),
    Hash[Wmflib::Sites, Array[Stdlib::Fqdn]] $ntp_peers        = lookup('ntp_peers'),
){
    include network::constants

    # required for monitoring changes to the ntp.conf file
    ensure_packages(['python3-pystemd'])

    # all global peers at all sites
    $wmf_all_peers = flatten(values($ntp_peers))

    # $wmf_servers is a full list of peer servers applicable at each site
    # (which will, for any given server, also include itself):
    $wmf_servers_plus_self = $::site ? {
        # core sites peer with all global peers at all sites
        eqiad   => $wmf_all_peers,
        codfw   => $wmf_all_peers,
        # edge sites only peer with core DCs and themselves:
        default => [$ntp_peers['eqiad'], $ntp_peers['codfw'], $ntp_peers[$::site]].flatten,
    }
    # a server can't peer with itself, so remove self from the list:
    $wmf_servers = delete($wmf_servers_plus_self, $facts['networking']['fqdn'])

    $pool_zone = $::site ? {
        esams   => 'nl',
        eqsin   => 'sg',
        drmrs   => 'fr',
        magru   => 'br',
        default => 'us',
    }

    $wmf_server_upstream_pools = ["0.${pool_zone}.pool.ntp.org"]

    ### Extra "tos" config for our servers:

    # minsane <N> - the number of acceptably-working pool-servers + peers we
    # must be syncing with to consider *ourselves* to be a reliable source for
    # others. These numbers can be bikeshedded a bit, but the default of 1 is
    # lower than we'd like.  Setting it too high can break time sync in some
    # otherwise-survivable scenarios.  The cores have more local peers between
    # them and greater reliability in general, so they can tolerate a slightly
    # higher number than the edges.
    $minsane = $::site ? {
        eqiad   => 3,
        codfw   => 3,
        default => 2,
    }

    # orphan <stratum> - if no internet servers are reachable, our servers will
    #     operate as an orphaned peer island and maintain some kind of stable
    #     sync with each other.  Without this, if all of our global servers
    #     lost their upstreams, within a few minutes we'd have no time syncing
    #     happening at all ("peer" only protects you from *some* servers losing
    #     upstreams, not all).  A plausible scenario here would be some global
    #     screwup of pool.ntp.org DNS ops.  So set cores to do the orphan job.
    $orphan = $::site ? {
        eqiad   => 12,
        codfw   => 12,
        default => 13,
    }

    # maxclock - This needs to be the sum of:
    #     * The count of servers in wmf_servers for this host
    #     * The number (4) we want to use from the "pool" DNS lookup
    #     * One extra to account for the dummy "0.X.pool.ntp.org" entry
    $maxclock = length($wmf_servers) + 4 + 1

    # Generate a list of ACLs from "external networks" automatically. We also
    # need 10.0.0.0/8 in addition to these. We cannot use production_networks
    # since that will also include 127.0.0.0/8 and ::1/128.
    $time_acl = $network::constants::external_networks << '10.0.0.0/8'

    ntp::daemon { 'server':
        servers      => $wmf_servers,
        pools        => $wmf_server_upstream_pools,
        time_acl     => $time_acl,
        extra_config => "tos minsane ${minsane} orphan ${orphan} maxclock ${maxclock}",
        query_acl    => $monitoring_hosts,
    }

    ferm::service { 'ntp':
        proto  => 'udp',
        port   => 123,
        srange => '($PRODUCTION_NETWORKS $FRACK_NETWORKS $MGMT_NETWORKS $NETWORK_INFRA)',
    }

    monitoring::service { 'ntp peers':
        description    => 'NTP peers and stratum check',
        check_interval => 5, # min
        retry_interval => 5, # min
        check_command  => 'check_ntp_peer_and_stratum!0.05!0.1!5!10',   # -W stratum 5, -C stratum 10
        notes_url      => 'https://wikitech.wikimedia.org/wiki/NTP',
    }

    nrpe::plugin { 'check_ntp_service':
        source => 'puppet:///modules/profile/monitoring/check_service_restart.py',
    }

    $services_to_check = {
        'ntpsec.service' => '/etc/ntpsec/ntp.conf',
    }
    $services_to_check.each |$service, $conf_file| {
        nrpe::monitor_service { "check_service_restart_${service}":
            description    => "Check if ${service} has been restarted after ${conf_file} was changed",
            nrpe_command   => "/usr/local/lib/nagios/plugins/check_ntp_service --service ${service} --file ${conf_file} --critical 2",
            sudo_user      => 'root',
            check_interval => 60, # 60mins
            retry_interval => 30, # 30mins
            notes_url      => 'https://wikitech.wikimedia.org/wiki/NTP#Monitoring',
        }
    }

}