Puppet Class: bigtop::hadoop::nodemanager

Defined in:
modules/bigtop/manifests/hadoop/nodemanager.pp

Overview

Parameters:

  • yarn_use_multi_spark_shufflers (Boolean) (defaults to: false)
  • yarn_multi_spark_shuffler_versions (Array[Bigtop::Spark::Version]) (defaults to: [])


15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# File 'modules/bigtop/manifests/hadoop/nodemanager.pp', line 15

class bigtop::hadoop::nodemanager (
    Boolean $yarn_use_multi_spark_shufflers                           = false,
    Array[Bigtop::Spark::Version] $yarn_multi_spark_shuffler_versions = [],
) {
    Class['bigtop::hadoop'] -> Class['bigtop::hadoop::nodemanager']

    package { ['hadoop-yarn-nodemanager', 'hadoop-mapreduce']:
        ensure  => 'installed',
        require => User['yarn'],
    }

    if $yarn_use_multi_spark_shufflers and $yarn_multi_spark_shuffler_versions.length > 0 {
        $yarn_multi_spark_shuffler_versions.each | $version | {
            ensure_packages ("spark-${version}-yarn-shuffle")
        }
    }

    $nofiles_ulimit = $bigtop::hadoop::yarn_nodemanager_nofiles_ulimit
    # Some NodeManager defaults can be overridden
    file { '/etc/default/hadoop-yarn-nodemanager':
        content => template('bigtop/hadoop/hadoop-yarn-nodemanager.default.erb'),
        owner   => 'root',
        group   => 'root',
        mode    => '0644',
    }

    # Some Hadoop jobs need Zookeeper libraries, but for some reason they
    # are not installed via package dependencies.  Install the CDH
    # zookeeper package here explicitly.  This avoids
    # java.lang.NoClassDefFoundError: org/apache/zookeeper/KeeperException
    # errors.
    if !defined(Package['zookeeper']) {
        package { 'zookeeper':
            ensure => 'installed'
        }
    }

    # NodeManager (YARN TaskTracker)
    # The override was added for https://phabricator.wikimedia.org/T281792
    # The TaskMax value has been calculated as 80% of the lowest kernel.pid_max
    # among the Hadoop workers. The value 'infinity' may also be used for more
    # flexibility, but due to other important daemons requiring threads on the host
    # (HDFS Datanode and Journalnode) we want to be careful.
    systemd::service { 'hadoop-yarn-nodemanager':
        ensure         => 'present',
        restart        => true,
        override       => true,
        content        => "[Service]\nTasksMax=26214\n",
        service_params => {
            ensure     => 'running',
            alias      => 'nodemanager',
            hasstatus  => true,
            enable     => true,
            hasrestart => true,
        },
        require        => [
            Package['hadoop-yarn-nodemanager', 'hadoop-mapreduce'],
            File['/etc/default/hadoop-yarn-nodemanager'],
        ],
    }
}