Puppet Class: profile::hadoop::worker

Defined in:
modules/profile/manifests/hadoop/worker.pp

Overview

SPDX-License-Identifier: Apache-2.0

Class profile::hadoop::worker

Configure a Analytics Hadoop worker node.

Parameters

[*monitoring_enabled*]
  If production monitoring needs to be enabled or not.

[*use_kerberos*]
  Make Puppet use Kerberos authentication when executing hdfs commands.

Parameters:

  • cluster_name (String) (defaults to: lookup('profile::hadoop::common::hadoop_cluster_name'))
  • monitoring_enabled (Boolean) (defaults to: lookup('profile::hadoop::worker::monitoring_enabled', { 'default_value' => false }))
  • ferm_srange (String) (defaults to: lookup('profile::hadoop::worker::ferm_srange', { 'default_value' => '$DOMAIN_NETWORKS' }))
  • check_mountpoints_disk_space (Boolean) (defaults to: lookup('profile::hadoop::worker::check_mountpoints_disk_space', { 'default_value' => true }))


14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# File 'modules/profile/manifests/hadoop/worker.pp', line 14

class profile::hadoop::worker (
    String $cluster_name                  = lookup('profile::hadoop::common::hadoop_cluster_name'),
    Boolean $monitoring_enabled           = lookup('profile::hadoop::worker::monitoring_enabled', { 'default_value' => false }),
    String $ferm_srange                   = lookup('profile::hadoop::worker::ferm_srange', { 'default_value' => '$DOMAIN_NETWORKS' }),
    Boolean $check_mountpoints_disk_space = lookup('profile::hadoop::worker::check_mountpoints_disk_space', { 'default_value' => true }),
) {
    require ::profile::analytics::cluster::packages::common
    require ::profile::hadoop::common
    require ::profile::java

    if $monitoring_enabled {
        # Prometheus exporters
        require ::profile::hadoop::monitoring::datanode
        require ::profile::hadoop::monitoring::nodemanager
    }

    # Look up in the common hadoop config whether or not this cluster is configured to use multiple spark shufflers
    $yarn_use_multi_spark_shufflers = $profile::hadoop::common::hadoop_config['yarn_use_multi_spark_shufflers'] ? {
        undef   => false,
        default => $profile::hadoop::common::hadoop_config['yarn_use_multi_spark_shufflers'],
    }

    # Look up in the common hadoop config the hash of spark versions in use. We only need the versions here, not their ports.
    $yarn_multi_spark_shuffler_versions = $profile::hadoop::common::hadoop_config['yarn_multi_spark_shuffler_versions'] ? {
        undef   => [],
        default => $profile::hadoop::common::hadoop_config['yarn_multi_spark_shuffler_versions'].keys,
    }

    class { '::bigtop::hadoop::worker':
        yarn_use_multi_spark_shufflers     => $yarn_use_multi_spark_shufflers,
        yarn_multi_spark_shuffler_versions => $yarn_multi_spark_shuffler_versions,
    }

    # The HDFS journalnodes are co-located for convenience,
    # but it is not a strict requirement.
    if $::fqdn in $::bigtop::hadoop::journalnode_hosts {
        if $monitoring_enabled {
            require profile::hadoop::monitoring::journalnode
        }
        class { 'bigtop::hadoop::journalnode': }
    }

    # This allows Hadoop daemons to talk to each other.
    ferm::service{ 'hadoop-access':
        proto  => 'tcp',
        port   => '1024:65535',
        srange => $ferm_srange,
    }

    # Needed to ease enabling Kerberos and Linux containers
    file { '/usr/local/sbin/set_yarn_dir_ownership':
        ensure => 'present',
        owner  => 'root',
        group  => 'root',
        mode   => '0550',
        source => 'puppet:///modules/profile/hadoop/worker/set_yarn_dir_ownership',
    }

    if $monitoring_enabled {
        # Icinga process alerts for DataNode and NodeManager
        nrpe::monitor_service { 'hadoop-hdfs-datanode':
            description   => 'Hadoop DataNode',
            nrpe_command  => '/usr/lib/nagios/plugins/check_procs -c 1:1 -C java -a "org.apache.hadoop.hdfs.server.datanode.DataNode"',
            contact_group => 'admins,team-data-platform',
            require       => Class['bigtop::hadoop::worker'],
            notes_url     => 'https://wikitech.wikimedia.org/wiki/Analytics/Systems/Cluster/Hadoop/Alerts#HDFS_Datanode_process',
        }
        nrpe::monitor_service { 'hadoop-yarn-nodemanager':
            description   => 'Hadoop NodeManager',
            nrpe_command  => '/usr/lib/nagios/plugins/check_procs -c 1:1 -C java -a "org.apache.hadoop.yarn.server.nodemanager.NodeManager"',
            contact_group => 'admins,team-data-platform',
            require       => Class['bigtop::hadoop::worker'],
            notes_url     => 'https://wikitech.wikimedia.org/wiki/Analytics/Systems/Cluster/Hadoop/Alerts#Yarn_Nodemanager_process',
        }

        if $::fqdn in $::bigtop::hadoop::journalnode_hosts {
            nrpe::monitor_service { 'hadoop-hdfs-journalnode':
                description   => 'Hadoop JournalNode',
                nrpe_command  => '/usr/lib/nagios/plugins/check_procs -c 1:1 -C java -a "org.apache.hadoop.hdfs.qjournal.server.JournalNode"',
                contact_group => 'admins,team-data-platform',
                require       => Class['bigtop::hadoop'],
                notes_url     => 'https://wikitech.wikimedia.org/wiki/Analytics/Systems/Cluster/Hadoop/Alerts#HDFS_Journalnode_process',
            }
        }

        if $check_mountpoints_disk_space {
            # Alert on datanode mount disk space.  These mounts are ignored by the
            # base module's check_disk via the base::monitoring::host::nrpe_check_disk_options
            # override in worker.yaml hieradata.
            nrpe::monitor_service { 'disk_space_hadoop_worker':
                description   => 'Disk space on Hadoop worker',
                nrpe_command  => '/usr/lib/nagios/plugins/check_disk --units GB -w 32 -c 16 -e -l  -r "/var/lib/hadoop/data"',
                contact_group => 'admins,team-data-platform',
                notes_url     => 'https://wikitech.wikimedia.org/wiki/Analytics/Systems/Cluster/Hadoop/Administration',
            }
        }
    }
}