Defined Type: profile::analytics::refinery::job::spark_job

Defined in:
modules/profile/manifests/analytics/refinery/job/spark_job.pp

Overview

Define profile::analytics::refinery::job::spark_job

Renders a spark2-submit wrapper script and sets up a systemd timer to run it.

Properties

jar

Path to Spark job .jar file

class

Spark job main class name.

job_name

Name of this spark job; will be used for script, cron job, and Spark –name. Default: $title

spark_opts

Extra Spark CLI opts to be passed to spark2-submit

job_opts

CLI opts to append to the spark2-submit command; these will be passed to your main function as args.

log_file

Default: /var/log/refinery/$job_name.log

interval

Systemd time interval. Default: '--* *:00:00' (hourly)

Parameters:

  • jar (Any)
  • class (Any)
  • job_name (Any) (defaults to: $title)
  • spark_opts (Any) (defaults to: undef)
  • job_opts (Any) (defaults to: undef)
  • log_file (Any) (defaults to: "/var/log/refinery/${job_name}.log")
  • user (Any) (defaults to: 'analytics')
  • interval (Any) (defaults to: '*-*-* *:00:00')
  • environment (Any) (defaults to: undef)
  • ensure (Any) (defaults to: 'present')
  • monitoring_enabled (Any) (defaults to: true)
  • use_kerberos (Any) (defaults to: false)
  • use_keytab (Any) (defaults to: false)


31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# File 'modules/profile/manifests/analytics/refinery/job/spark_job.pp', line 31

define profile::analytics::refinery::job::spark_job(
    $jar,
    $class,
    $job_name            = $title,
    $spark_opts          = undef,
    $job_opts            = undef,
    $log_file            = "/var/log/refinery/${job_name}.log",
    $user                = 'analytics',
    $interval            = '*-*-* *:00:00',
    $environment         = undef,
    $ensure              = 'present',
    $monitoring_enabled  = true,
    $use_kerberos        = false,
    $use_keytab          = false,
)
{
    require ::profile::analytics::refinery
    $refinery_path = $profile::analytics::refinery::path

    if $use_kerberos and $use_keytab {
        $spark_keytab_extra_opts = "--principal ${user}/${facts['fqdn']}@WIKIMEDIA --keytab /etc/security/keytabs/${user}/${user}.keytab"
    } else {
        $spark_keytab_extra_opts = undef
    }

    $script = "/usr/local/bin/${job_name}"

    file { $script:
        ensure  => $ensure,
        content => template('profile/analytics/refinery/job/spark_job.sh.erb'),
        owner   => 'root',
        group   => 'root',
        mode    => '0555',
    }

    kerberos::systemd_timer { $title:
        ensure                    => $ensure,
        description               => "Spark job for ${title}",
        command                   => $script,
        interval                  => $interval,
        user                      => $user,
        use_kerberos              => $use_kerberos,
        environment               => $environment,
        monitoring_enabled        => $monitoring_enabled,
        monitoring_contact_groups => 'analytics',
        logfile_basedir           => '/var/log/refinery',
        logfile_name              => "${title}.log",
        logfile_owner             => $user,
        logfile_group             => $user,
        logfile_perms             => 'all',
        syslog_force_stop         => true,
        # Only need to match equality here, not startswith.
        syslog_match_startswith   => false,
        syslog_identifier         => $title,
        require                   => File[$script],
    }
}