Puppet Class: profile::hadoop::worker

Defined in:
modules/profile/manifests/hadoop/worker.pp

Overview

Class profile::hadoop::worker

Configure a Analytics Hadoop worker node.

Parameters

[*monitoring_enabled*]
  If production monitoring needs to be enabled or not.

[*use_kerberos*]
  Make Puppet use Kerberos authentication when executing hdfs commands.

Parameters:

  • cluster_name (String) (defaults to: lookup('profile::hadoop::common::hadoop_cluster_name'))
  • monitoring_enabled (Boolean) (defaults to: lookup('profile::hadoop::worker::monitoring_enabled', { 'default_value' => false }))
  • ferm_srange (String) (defaults to: lookup('profile::hadoop::worker::ferm_srange', { 'default_value' => '$DOMAIN_NETWORKS' }))
  • check_mountpoints_disk_space (Boolean) (defaults to: lookup('profile::hadoop::worker::check_mountpoints_disk_space', { 'default_value' => true }))


13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# File 'modules/profile/manifests/hadoop/worker.pp', line 13

class profile::hadoop::worker(
    String $cluster_name                  = lookup('profile::hadoop::common::hadoop_cluster_name'),
    Boolean $monitoring_enabled           = lookup('profile::hadoop::worker::monitoring_enabled', { 'default_value' => false }),
    String $ferm_srange                   = lookup('profile::hadoop::worker::ferm_srange', { 'default_value' => '$DOMAIN_NETWORKS' }),
    Boolean $check_mountpoints_disk_space = lookup('profile::hadoop::worker::check_mountpoints_disk_space', { 'default_value' => true }),
) {
    require ::profile::analytics::cluster::packages::common
    require ::profile::hadoop::common
    require ::profile::java

    if $monitoring_enabled {
        # Prometheus exporters
        require ::profile::hadoop::monitoring::datanode
        require ::profile::hadoop::monitoring::nodemanager
    }

    class { '::bigtop::hadoop::worker': }

    # The HDFS journalnodes are co-located for convenience,
    # but it is not a strict requirement.
    if $::fqdn in $::bigtop::hadoop::journalnode_hosts {
        if $monitoring_enabled {
            require profile::hadoop::monitoring::journalnode
        }
        class { 'bigtop::hadoop::journalnode': }
    }


    # This allows Hadoop daemons to talk to each other.
    ferm::service{ 'hadoop-access':
        proto  => 'tcp',
        port   => '1024:65535',
        srange => $ferm_srange,
    }

    # Needed to ease enabling Kerberos and Linux containers
    file { '/usr/local/sbin/set_yarn_dir_ownership':
        ensure => 'present',
        owner  => 'root',
        group  => 'root',
        mode   => '0550',
        source => 'puppet:///modules/profile/hadoop/worker/set_yarn_dir_ownership',
    }

    if $monitoring_enabled {
        # Icinga process alerts for DataNode and NodeManager
        nrpe::monitor_service { 'hadoop-hdfs-datanode':
            description   => 'Hadoop DataNode',
            nrpe_command  => '/usr/lib/nagios/plugins/check_procs -c 1:1 -C java -a "org.apache.hadoop.hdfs.server.datanode.DataNode"',
            contact_group => 'admins,analytics',
            require       => Class['bigtop::hadoop::worker'],
            notes_url     => 'https://wikitech.wikimedia.org/wiki/Analytics/Systems/Cluster/Hadoop/Alerts#HDFS_Datanode_process',
        }
        nrpe::monitor_service { 'hadoop-yarn-nodemanager':
            description   => 'Hadoop NodeManager',
            nrpe_command  => '/usr/lib/nagios/plugins/check_procs -c 1:1 -C java -a "org.apache.hadoop.yarn.server.nodemanager.NodeManager"',
            contact_group => 'admins,analytics',
            require       => Class['bigtop::hadoop::worker'],
            notes_url     => 'https://wikitech.wikimedia.org/wiki/Analytics/Systems/Cluster/Hadoop/Alerts#Yarn_Nodemanager_process',
        }

        if $::fqdn in $::bigtop::hadoop::journalnode_hosts {
            nrpe::monitor_service { 'hadoop-hdfs-journalnode':
                description   => 'Hadoop JournalNode',
                nrpe_command  => '/usr/lib/nagios/plugins/check_procs -c 1:1 -C java -a "org.apache.hadoop.hdfs.qjournal.server.JournalNode"',
                contact_group => 'admins,analytics',
                require       => Class['bigtop::hadoop'],
                notes_url     => 'https://wikitech.wikimedia.org/wiki/Analytics/Systems/Cluster/Hadoop/Alerts#HDFS_Journalnode_process',
            }
        }

        if $check_mountpoints_disk_space {
            # Alert on datanode mount disk space.  These mounts are ignored by the
            # base module's check_disk via the base::monitoring::host::nrpe_check_disk_options
            # override in worker.yaml hieradata.
            nrpe::monitor_service { 'disk_space_hadoop_worker':
                description   => 'Disk space on Hadoop worker',
                nrpe_command  => '/usr/lib/nagios/plugins/check_disk --units GB -w 32 -c 16 -e -l  -r "/var/lib/hadoop/data"',
                contact_group => 'admins,analytics',
                notes_url     => 'https://wikitech.wikimedia.org/wiki/Analytics/Systems/Cluster/Hadoop/Administration',
            }
        }

        monitoring::check_prometheus { 'analytics_hadoop_hdfs_datanode':
            description     => 'HDFS DataNode JVM Heap usage',
            dashboard_links => ["https://grafana.wikimedia.org/d/000000585/hadoop?orgId=1&viewPanel=1&var-hadoop_cluster=${cluster_name}"],
            query           => "scalar(avg_over_time(jvm_memory_bytes_used{hadoop_cluster=\"${cluster_name}\",instance=\"${::hostname}:51010\",area=\"heap\"}[60m])/avg_over_time(jvm_memory_bytes_max{hadoop_cluster=\"${cluster_name}\",instance=\"${::hostname}:51010\",area=\"heap\"}[60m]))",
            warning         => 0.9,
            critical        => 0.95,
            contact_group   => 'analytics',
            prometheus_url  => "http://prometheus.svc.${::site}.wmnet/analytics",
            notes_link      => 'https://wikitech.wikimedia.org/wiki/Analytics/Systems/Cluster/Hadoop/Administration',
        }

        monitoring::check_prometheus { 'analytics_hadoop_yarn_nodemanager':
            description     => 'YARN NodeManager JVM Heap usage',
            dashboard_links => ["https://grafana.wikimedia.org/d/000000585/hadoop?orgId=1&viewPanel=17&var-hadoop_cluster=${cluster_name}"],
            query           => "scalar(avg_over_time(jvm_memory_bytes_used{hadoop_cluster=\"${cluster_name}\",instance=\"${::hostname}:8141\",area=\"heap\"}[60m])/avg_over_time(jvm_memory_bytes_max{hadoop_cluster=\"${cluster_name}\",instance=\"${::hostname}:8141\",area=\"heap\"}[60m]))",
            warning         => 0.9,
            critical        => 0.95,
            contact_group   => 'analytics',
            prometheus_url  => "http://prometheus.svc.${::site}.wmnet/analytics",
            notes_link      => 'https://wikitech.wikimedia.org/wiki/Analytics/Systems/Cluster/Hadoop/Administration',
        }
    }
}