Puppet Class: profile::hadoop::worker::clients

Defined in:
modules/profile/manifests/hadoop/worker/clients.pp

Overview

SPDX-License-Identifier: Apache-2.0

Class profile::hadoop::worker::clients

Configure a Analytics Hadoop worker node with extra client tools to connect to Hive and use Sqoop/Spark2/etc..



7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# File 'modules/profile/manifests/hadoop/worker/clients.pp', line 7

class profile::hadoop::worker::clients {

    # hive::client is nice to have for jobs launched
    # from random worker nodes as app masters so they
    # have access to hive-site.xml and other hive jars.
    # This installs hive-hcatalog package on worker nodes to get
    # hcatalog jars, including Hive JsonSerde for using
    # JSON backed Hive tables.
    include ::profile::hive::client

    # We want to exclude spark2 from bullseye installs
    if debian::codename::lt('bullseye') {
        # Spark 2 is manually packaged by us, it is not part of CDH.
        include ::profile::hadoop::spark2
    }

    # Spark 3 is provided in our custom conda-analytics package
    # via pyspark installed in the conda environment in /opt/conda-analytics.
    include ::profile::hadoop::spark3

    # sqoop needs to be on worker nodes if Airflow is to
    # launch sqoop jobs.
    class { '::bigtop::sqoop': }
}