Defined Type: profile::analytics::refinery::job::gobblin_job

Defined in:
modules/profile/manifests/analytics/refinery/job/gobblin_job.pp

Overview

Define profile::analytics::refinery::job::gobblin_job

Wrapper define for declaring a gobblin job systemd timer. This is planned to be replaced by an Airflow job in the near future.

Parameters

sysconfig_properties_file

Path to gobblin sysconfig properties file.

*jobconfig_properties_file

Path to gobblin jobconfig pull properties file. Default: /srv/deployment/analytics/refinery/gobblin/jobs/$title.pull

Parameters:

  • sysconfig_properties_file (Any)
  • jobconfig_properties_file (Any) (defaults to: undef)
  • user (Any) (defaults to: 'analytics')
  • gobblin_jar_file (Any) (defaults to: undef)
  • gobblin_script (Any) (defaults to: undef)
  • log_directory (Any) (defaults to: '/var/log/refinery/gobblin')
  • interval (Any) (defaults to: undef)
  • environment (Any) (defaults to: {})
  • monitoring_enabled (Any) (defaults to: true)
  • monitoring_contact_groups (Any) (defaults to: 'analytics')
  • ensure (Any) (defaults to: 'present')


13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# File 'modules/profile/manifests/analytics/refinery/job/gobblin_job.pp', line 13

define profile::analytics::refinery::job::gobblin_job (
    $sysconfig_properties_file,
    $jobconfig_properties_file  = undef,
    $user                       = 'analytics',
    $gobblin_jar_file           = undef,
    $gobblin_script             = undef,
    $log_directory              = '/var/log/refinery/gobblin',
    $interval                   = undef,
    $environment                = {},
    $monitoring_enabled         = true,
    $monitoring_contact_groups  = 'analytics',
    $ensure                     = 'present',
) {
    require ::profile::analytics::refinery
    $refinery_path = $::profile::analytics::refinery::path

    $_jobconfig_properties_file = $jobconfig_properties_file ? {
        undef   => "${refinery_path}/gobblin/jobs/${title}.pull",
        default => $jobconfig_properties_file,
    }

    $_gobblin_jar_file = $gobblin_jar_file ? {
        undef   => "${refinery_path}/artifacts/gobblin-wmf.jar",
        default => $gobblin_jar_file,
    }

    $_gobblin_script = $gobblin_script ? {
        undef   => "${refinery_path}/bin/gobblin",
        default => $gobblin_script,
    }

    $default_environment = {
        'PYTHONPATH' => "${refinery_path}/python"
    }
    $_environment = merge($default_environment, $environment)

    if !defined(File[$log_directory]) {
        file { $log_directory:
            ensure => 'directory',
        }
    }

    $command = "${_gobblin_script} --sysconfig=${sysconfig_properties_file} --jar=${_gobblin_jar_file} ${_jobconfig_properties_file}"

    kerberos::systemd_timer { "gobblin-${title}":
        ensure                    => $ensure,
        description               => "Hadoop Gobblin job ${title}",
        command                   => $command,
        interval                  => $interval,
        user                      => $user,
        environment               => $_environment,
        monitoring_enabled        => $monitoring_enabled,
        monitoring_contact_groups => $monitoring_contact_groups,
        logfile_basedir           => $log_directory,
        logfile_name              => "${title}.log",
        logfile_owner             => $user,
        logfile_group             => $user,
        logfile_perms             => 'all',
        syslog_force_stop         => true,
        syslog_identifier         => "gobblin-${title}",
    }
}