Defined Type: profile::analytics::refinery::job::spark_job
- Defined in:
- modules/profile/manifests/analytics/refinery/job/spark_job.pp
Overview
SPDX-License-Identifier: Apache-2.0
Define profile::analytics::refinery::job::spark_job
Renders a spark-submit wrapper script and sets up a systemd timer to run it.
Properties
- jar
-
Path to Spark job .jar file
- main_class
-
Spark job main class name.
- spark_submit
-
Path to spark-submit executable to run. Default: /usr/bin/spark3-submit
- job_name
-
Name of this spark job; will be used for script, cron job, and Spark –name. Default: $title
- spark_opts
-
Extra Spark CLI opts to be passed to spark-submit
- job_opts
-
CLI opts to append to the spark-submit command; these will be passed to your main function as args.
- log_file
-
Default: /var/log/refinery/$job_name.log
- interval
-
Systemd time interval. Default: '--* *:00:00' (hourly)
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
# File 'modules/profile/manifests/analytics/refinery/job/spark_job.pp', line 36
define profile::analytics::refinery::job::spark_job(
$jar,
$main_class,
$spark_submit = '/usr/bin/spark3-submit',
$job_name = $title,
$spark_opts = undef,
$job_opts = undef,
$log_file = "/var/log/refinery/${job_name}.log",
$user = 'analytics',
$interval = '*-*-* *:00:00',
$environment = undef,
$ensure = 'present',
$send_mail = true,
$use_keytab = false,
)
{
require ::profile::analytics::refinery
$refinery_path = $profile::analytics::refinery::path
if $use_keytab {
$spark_keytab_extra_opts = "--principal ${user}/${facts['fqdn']}@WIKIMEDIA --keytab /etc/security/keytabs/${user}/${user}.keytab"
} else {
$spark_keytab_extra_opts = undef
}
$script = "/usr/local/bin/${job_name}"
file { $script:
ensure => $ensure,
content => template('profile/analytics/refinery/job/spark_job.sh.erb'),
owner => 'root',
group => 'root',
mode => '0555',
}
kerberos::systemd_timer { $title:
ensure => $ensure,
description => "Spark job for ${title}",
command => $script,
interval => $interval,
user => $user,
environment => $environment,
send_mail => $send_mail,
logfile_basedir => '/var/log/refinery',
logfile_name => "${title}.log",
logfile_owner => $user,
logfile_group => $user,
logfile_perms => 'all',
syslog_force_stop => true,
# Only need to match equality here, not startswith.
syslog_match_startswith => false,
syslog_identifier => $title,
require => File[$script],
}
}
|