Puppet Class: bigtop::hadoop::namenode

Defined in:
modules/bigtop/manifests/hadoop/namenode.pp

Overview

SPDX-License-Identifier: Apache-2.0

Class bigtop::hadoop::namenode

Installs and configureds Hadoop NameNode. This will format the NameNode if it is not already formatted. It will also create a common HDFS directory hierarchy.

Note: If you are using HA NameNode (indicated by setting bigtop::hadoop::nameservice_id), your JournalNodes should be running before this class is applied.

Parameters:

  • standby (Any) (defaults to: false)
  • excluded_hosts (Any) (defaults to: [])


12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# File 'modules/bigtop/manifests/hadoop/namenode.pp', line 12

class bigtop::hadoop::namenode(
    $standby = false,
    $excluded_hosts = [],
) {
    Class['bigtop::hadoop'] -> Class['bigtop::hadoop::namenode']


    file { "${::bigtop::hadoop::config_directory}/hosts.exclude":
        ensure  => present,
        content => template('bigtop/hadoop/hosts.exclude.erb'),
    }

    # install namenode daemon package
    package { 'hadoop-hdfs-namenode':
        ensure  => 'installed',
        require => [
          File["${::bigtop::hadoop::config_directory}/hosts.exclude"],
          User['hdfs']
        ],
    }

    if ($::bigtop::hadoop::ha_enabled and $::bigtop::hadoop::zookeeper_hosts) {
        if !defined(Package['zookeeper']) {
            package { 'zookeeper':
                ensure => 'installed'
            }
        }

        package { 'hadoop-hdfs-zkfc':
            ensure  => 'installed',
            require => User['hdfs'],
        }
    }

    # Ensure that the namenode directory has the correct permissions.
    file { $::bigtop::hadoop::dfs_name_dir:
        ensure  => 'directory',
        owner   => 'hdfs',
        group   => 'hdfs',
        mode    => '0700',
        require => Package['hadoop-hdfs-namenode'],
    }

    if $standby {
        $namenode_format_command = '/usr/bin/hdfs namenode -bootstrapStandby -nonInteractive'
    } else {
        $namenode_format_command = '/usr/bin/hdfs namenode -format -nonInteractive'
    }

    # If $dfs_name_dir/current/VERSION doesn't exist, assume
    # NameNode has not been formated.  Format it before
    # the namenode service is started.
    kerberos::exec { 'hadoop-namenode-format':
        command => $namenode_format_command,
        creates => "${::bigtop::hadoop::dfs_name_dir_main}/current/VERSION",
        user    => 'hdfs',
        require => [File[$::bigtop::hadoop::dfs_name_dir], File["${::bigtop::hadoop::config_directory}/hosts.exclude"]],
    }

    service { 'hadoop-hdfs-namenode':
        ensure     => 'running',
        enable     => true,
        hasstatus  => true,
        hasrestart => true,
        alias      => 'namenode',
        require    => Exec['hadoop-namenode-format'],
    }

    if ($::bigtop::hadoop::ha_enabled and $::bigtop::hadoop::zookeeper_hosts) {
        # Create a znode in ZooKeeper inside of which the automatic failover
        # system stores its data. The command will create a znode in ZooKeeper
        # and it needs to be executed only when the znode is not present.

        # Catch-all if the zookeeper_hosts is not an array.
        $zookeeper_hosts = $::bigtop::hadoop::zookeeper_hosts
        $zookeeper_hosts_string = inline_template(
            '<%= Array(@zookeeper_hosts).join(",") %>'
        )

        kerberos::exec { 'hadoop-hdfs-zkfc-init':
            # If the znode created by -formatZK already exists, and for
            # some buggy reason it happens to run, -formatZK will prompt
            # the user to confirm if the znode should be reformatted.
            # Puppet isn't able to answer this question on its own.
            # Default to answering with 'N' if the command asks.
            # This should never happen, but just in case it does,
            # We don't want this eternally unanswered prompt to fill up
            # puppet logs and disks.
            command => 'echo N | hdfs zkfc -formatZK',
            user    => 'hdfs',
            require => [
                Service['hadoop-hdfs-namenode'],
                Package['zookeeper'],
            ],
            # NOTE. zkCli.sh from debian uses different install path than
            # from CDH.  Add both possibilities to PATH.
            path    => '/bin:/usr/bin:/usr/share/zookeeper/bin:/usr/lib/zookeeper/bin',
            # Don't attempt to run this command if the znode already exists
            # or if a Java Exception is returned by the zkCli tool containing
            # the ERROR log (for example when the Zookeeper node is down).
            unless  => "zkCli.sh -server ${zookeeper_hosts_string} \
                stat /hadoop-ha/${::bigtop::hadoop::cluster_name} 2>&1 \
                | egrep -q 'ctime|ERROR'",
        }

        # Supporting daemon to enable automatic-failover via health-check.
        # Stores its state in zookeeper.
        service { 'hadoop-hdfs-zkfc':
            ensure     => 'running',
            enable     => true,
            hasstatus  => true,
            hasrestart => true,
            require    => [
                Exec['hadoop-hdfs-zkfc-init'],
                Service['hadoop-hdfs-namenode'],
            ],
        }
    }
}