Defined Type: profile::analytics::refinery::job::spark_job

Defined in:
modules/profile/manifests/analytics/refinery/job/spark_job.pp

Overview

SPDX-License-Identifier: Apache-2.0

Define profile::analytics::refinery::job::spark_job

Renders a spark-submit wrapper script and sets up a systemd timer to run it.

Properties

jar

Path to Spark job .jar file

main_class

Spark job main class name.

spark_submit

Path to spark-submit executable to run. Default: /usr/bin/spark3-submit

job_name

Name of this spark job; will be used for script, cron job, and Spark –name. Default: $title

spark_opts

Extra Spark CLI opts to be passed to spark-submit

job_opts

CLI opts to append to the spark-submit command; these will be passed to your main function as args.

log_file

Default: /var/log/refinery/$job_name.log

interval

Systemd time interval. Default: '--* *:00:00' (hourly)

Parameters:

  • jar (Any)
  • main_class (Any)
  • spark_submit (Any) (defaults to: '/usr/bin/spark3-submit')
  • job_name (Any) (defaults to: $title)
  • spark_opts (Any) (defaults to: undef)
  • job_opts (Any) (defaults to: undef)
  • log_file (Any) (defaults to: "/var/log/refinery/${job_name}.log")
  • user (Any) (defaults to: 'analytics')
  • interval (Any) (defaults to: '*-*-* *:00:00')
  • environment (Any) (defaults to: undef)
  • ensure (Any) (defaults to: 'present')
  • send_mail (Any) (defaults to: true)
  • use_keytab (Any) (defaults to: false)


36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# File 'modules/profile/manifests/analytics/refinery/job/spark_job.pp', line 36

define profile::analytics::refinery::job::spark_job(
    $jar,
    $main_class,
    $spark_submit        = '/usr/bin/spark3-submit',
    $job_name            = $title,
    $spark_opts          = undef,
    $job_opts            = undef,
    $log_file            = "/var/log/refinery/${job_name}.log",
    $user                = 'analytics',
    $interval            = '*-*-* *:00:00',
    $environment         = undef,
    $ensure              = 'present',
    $send_mail           = true,
    $use_keytab          = false,
)
{
    require ::profile::analytics::refinery
    $refinery_path = $profile::analytics::refinery::path

    if $use_keytab {
        $spark_keytab_extra_opts = "--principal ${user}/${facts['fqdn']}@WIKIMEDIA --keytab /etc/security/keytabs/${user}/${user}.keytab"
    } else {
        $spark_keytab_extra_opts = undef
    }

    $script = "/usr/local/bin/${job_name}"

    file { $script:
        ensure  => $ensure,
        content => template('profile/analytics/refinery/job/spark_job.sh.erb'),
        owner   => 'root',
        group   => 'root',
        mode    => '0555',
    }

    kerberos::systemd_timer { $title:
        ensure                  => $ensure,
        description             => "Spark job for ${title}",
        command                 => $script,
        interval                => $interval,
        user                    => $user,
        environment             => $environment,
        send_mail               => $send_mail,
        logfile_basedir         => '/var/log/refinery',
        logfile_name            => "${title}.log",
        logfile_owner           => $user,
        logfile_group           => $user,
        logfile_perms           => 'all',
        syslog_force_stop       => true,
        # Only need to match equality here, not startswith.
        syslog_match_startswith => false,
        syslog_identifier       => $title,
        require                 => File[$script],
    }
}