Puppet Class: profile::hive::client

Defined in:
modules/profile/manifests/hive/client.pp

Overview

SPDX-License-Identifier: Apache-2.0

Class profile::hive::client

Installs base configs and packages for hive client nodes.

Parameters:

  • zookeeper_clusters (Hash[String, Any]) (defaults to: lookup('zookeeper_clusters'))
  • hive_services (Hash[String, Any]) (defaults to: lookup('hive_services'))
  • hive_service_name (String) (defaults to: lookup('profile::hive::client::hive_service_name'))
  • config_files_group_ownership (Optional[String]) (defaults to: lookup('profile::hive::client::config_files_group_ownership', { 'default_value' => undef }))
  • hive_metastore_jdbc_password (Optional[String]) (defaults to: lookup('profile::hive::client::hive_metastore_jdbc_password', { 'default_value' => undef }))
  • deploy_jdbc_settings (Boolean) (defaults to: lookup('profile::hive::client::deploy_jdbc_settings', { 'default_value' => false }))
  • hive_log4j_version (Integer[1,2]) (defaults to: lookup('profile::hive::client::log4j_version', default_value => 2))
  • hive_metastore_host (Optional[Stdlib::Host]) (defaults to: lookup('profile::hive::client::hive_metastore_host', { 'default_value' => undef }))


5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# File 'modules/profile/manifests/hive/client.pp', line 5

class profile::hive::client(
    Hash[String, Any] $zookeeper_clusters          = lookup('zookeeper_clusters'),
    Hash[String, Any] $hive_services               = lookup('hive_services'),
    String $hive_service_name                      = lookup('profile::hive::client::hive_service_name'),
    Optional[String] $config_files_group_ownership = lookup('profile::hive::client::config_files_group_ownership', { 'default_value' => undef }),
    Optional[String] $hive_metastore_jdbc_password = lookup('profile::hive::client::hive_metastore_jdbc_password', { 'default_value' => undef }),
    Boolean $deploy_jdbc_settings                  = lookup('profile::hive::client::deploy_jdbc_settings', { 'default_value' => false }),
    Integer[1,2] $hive_log4j_version               = lookup('profile::hive::client::log4j_version', default_value => 2),
    Optional[Stdlib::Host] $hive_metastore_host    = lookup('profile::hive::client::hive_metastore_host', { 'default_value' => undef }),
) {
    require ::profile::hadoop::common

    $hiveserver_host = $hive_services[$hive_service_name]['server_host']
    $hiveserver_port = $hive_services[$hive_service_name]['server_port']

    # In a multi-metastore setup, we want to force the hive server to use the
    # metastore co-located rather than the one referenced by the metastore_host
    # variable, since it could be a DNS CNAME. Example:
    #
    # analytics-hive.eqiad.wmnet -> resolves to -> an-coord1003
    #
    # If we have a metastore on an-coord1004, and metastore_host=analytics-hive.eqiad.wmnet,
    # then the hive server on the same node would point to the metastore on 1001.
    # This would work but then if an-coord1003 went down, the failover of the DNS CNAME
    # wouldn't be enough, since the hive server on 1002 would still point to the metastore
    # on 1001 (and a restart would be needed to pick up the new settings).
    $metastore_host = $hive_metastore_host ? {
        undef   => $hive_services[$hive_service_name]['metastore_host'],
        default => $hive_metastore_host,
    }

    $zookeeper_cluster_name = $hive_services[$hive_service_name]['zookeeper_cluster_name']
    $hive_server_opts = $hive_services[$hive_service_name]['server_opts']
    $hive_metastore_opts = $hive_services[$hive_service_name]['metastore_opts']
    $java_home = $hive_services[$hive_service_name]['java_home']
    $hive_metastore_sasl_enabled = $hive_services[$hive_service_name]['metastore_sasl_enabled']
    $hive_metastore_kerberos_keytab_file = $hive_services[$hive_service_name]['metastore_kerberos_keytab_file']
    $hive_metastore_kerberos_principal = $hive_services[$hive_service_name]['metastore_kerberos_principal']
    $hive_server2_authentication = $hive_services[$hive_service_name]['server_authentication']
    $hive_server2_authentication_kerberos_principal = $hive_services[$hive_service_name]['server_authentication_kerberos_principal']
    $hive_server2_authentication_kerberos_keytab = $hive_services[$hive_service_name]['server_authentication_kerberos_keytab']
    $hive_metastore_jdbc_host = $hive_services[$hive_service_name]['metastore_jdbc_host']
    $hive_metastore_jdbc_port = $hive_services[$hive_service_name]['metastore_jdbc_port']
    $hive_metastore_jdbc_user = $hive_services[$hive_service_name]['metastore_jdbc_user']
    $hive_metastore_database = $hive_services[$hive_service_name]['metastore_jdbc_database']
    $hive_cluster_delegation_token_store_class = $hive_services[$hive_service_name]['hive_cluster_delegation_token_store_class'] ? {
        undef   => 'org.apache.hadoop.hive.thrift.DBTokenStore',
        default => $hive_services[$hive_service_name]['hive_cluster_delegation_token_store_class'],
    }
    $hive_metastore_disallow_incompatible_col_type_changes = $hive_services[$hive_service_name]['hive_metastore_disallow_incompatible_col_type_changes'] ? {
        undef   => undef,
        default => $hive_services[$hive_service_name]['hive_metastore_disallow_incompatible_col_type_changes'],
    }

    # The WMF webrequest table uses HCatalog's JSON Serde.
    # Automatically include this in Hive client classpaths.
    $hcatalog_jar = 'file:///usr/lib/hive-hcatalog/share/hcatalog/hive-hcatalog-core.jar'
    $auxpath = $hcatalog_jar

    # If given a $zookeeper_cluster_name to use for query locking,
    # look up the hosts from $zookeeper_clusters.
    $zookeeper_hosts = $zookeeper_cluster_name ? {
        undef   => undef,
        default => keys($zookeeper_clusters[$zookeeper_cluster_name]['hosts']),
    }

    # You must set at least:
    #   metastore_host
    class { '::bigtop::hive':
        # Hive uses Zookeeper for table locking.
        zookeeper_hosts                                       => $zookeeper_hosts,
        # We set support concurrency to false by default.
        # if someone needs to use it in their hive job, they
        # may manually set it to true via
        # set hive.support.concurrency = true;
        support_concurrency                                   => false,
        # Set this pretty high, to avoid limiting the number
        # of substitution variables a Hive script can use.
        variable_substitute_depth                             => 10000,
        auxpath                                               => $auxpath,
        # default to using Snappy for parquet formatted tables
        parquet_compression                                   => 'SNAPPY',
        hive_server_opts                                      => $hive_server_opts,
        hive_metastore_opts                                   => $hive_metastore_opts,
        metastore_host                                        => $metastore_host,
        java_home                                             => $java_home,
        # Precaution for CVE-2018-1284
        hive_server_udf_blacklist                             => 'xpath,xpath_string,xpath_boolean,xpath_number,xpath_double,xpath_float,xpath_long,xpath_int,xpath_short',

        # Optional security configs
        hive_metastore_sasl_enabled                           => $hive_metastore_sasl_enabled,
        hive_metastore_kerberos_keytab_file                   => $hive_metastore_kerberos_keytab_file,
        hive_metastore_kerberos_principal                     => $hive_metastore_kerberos_principal,
        hive_server2_authentication                           => $hive_server2_authentication,
        hive_server2_authentication_kerberos_principal        => $hive_server2_authentication_kerberos_principal,
        hive_server2_authentication_kerberos_keytab           => $hive_server2_authentication_kerberos_keytab,
        jdbc_host                                             => $hive_metastore_jdbc_host,
        jdbc_port                                             => $hive_metastore_jdbc_port,
        jdbc_username                                         => $hive_metastore_jdbc_user,
        jdbc_password                                         => $hive_metastore_jdbc_password,
        jdbc_database                                         => $hive_metastore_database,
        jdbc_driver                                           => 'com.mysql.jdbc.Driver',
        deploy_jdbc_settings                                  => $deploy_jdbc_settings,
        config_files_group_ownership                          => $config_files_group_ownership,
        hive_cluster_delegation_token_store_class             => $hive_cluster_delegation_token_store_class,
        hive_metastore_disallow_incompatible_col_type_changes => $hive_metastore_disallow_incompatible_col_type_changes,

        # Optional logging configuration
        hive_log4j_version                                    => $hive_log4j_version,
    }

    # Set up a wrapper script for beeline, the command line
    # interface to HiveServer2 and install it at
    # /usr/local/bin/beeline

    file { '/etc/beeline.ini':
        content => epp('profile/hive/client/beeline.ini.epp',
        {
            hiveserver_host    => $hiveserver_host,
            hiveserver_port    => $hiveserver_port,
            kerberos_principal => $hive_server2_authentication_kerberos_principal
        }),
        mode    => '0555',
    }

    file { '/usr/local/bin/beeline':
        source => 'puppet:///modules/profile/hive/client/beeline_wrapper.py',
        mode   => '0755',
    }
}