Defined Type: profile::analytics::refinery::job::gobblin_job

Defined in:
modules/profile/manifests/analytics/refinery/job/gobblin_job.pp

Overview

SPDX-License-Identifier: Apache-2.0

Define profile::analytics::refinery::job::gobblin_job

Wrapper define for declaring a gobblin job systemd timer. This is planned to be replaced by an Airflow job in the near future.

Parameters

sysconfig_properties_file

Path to gobblin sysconfig properties file.

*jobconfig_properties_file

Path to gobblin jobconfig pull properties file. Default: /srv/deployment/analytics/refinery/gobblin/jobs/$title.pull

Parameters:

  • sysconfig_properties_file (Any)
  • jobconfig_properties_file (Any) (defaults to: undef)
  • user (Any) (defaults to: 'analytics')
  • group (Any) (defaults to: 'analytics')
  • gobblin_jar_file (Any) (defaults to: undef)
  • gobblin_script (Any) (defaults to: undef)
  • log_directory (Any) (defaults to: '/var/log/refinery/gobblin')
  • interval (Any) (defaults to: undef)
  • environment (Any) (defaults to: {})
  • send_mail (Any) (defaults to: true)
  • send_mail_to (Any) (defaults to: 'data-engineering-alerts@lists.wikimedia.org')
  • ensure (Any) (defaults to: 'present')


14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# File 'modules/profile/manifests/analytics/refinery/job/gobblin_job.pp', line 14

define profile::analytics::refinery::job::gobblin_job (
    $sysconfig_properties_file,
    $jobconfig_properties_file  = undef,
    $user                       = 'analytics',
    $group                      = 'analytics',
    $gobblin_jar_file           = undef,
    $gobblin_script             = undef,
    $log_directory              = '/var/log/refinery/gobblin',
    $interval                   = undef,
    $environment                = {},
    $send_mail                 = true,
    $send_mail_to               = 'data-engineering-alerts@lists.wikimedia.org',
    $ensure                     = 'present',
) {
    require ::profile::analytics::refinery
    $refinery_path = $::profile::analytics::refinery::path

    $_jobconfig_properties_file = $jobconfig_properties_file ? {
        undef   => "${refinery_path}/gobblin/jobs/${title}.pull",
        default => $jobconfig_properties_file,
    }

    $_gobblin_jar_file = $gobblin_jar_file ? {
        undef   => "${refinery_path}/artifacts/gobblin-wmf.jar",
        default => $gobblin_jar_file,
    }

    $_gobblin_script = $gobblin_script ? {
        undef   => "${refinery_path}/bin/gobblin",
        default => $gobblin_script,
    }

    $default_environment = {
        'PYTHONPATH' => "${refinery_path}/python"
    }
    $_environment = merge($default_environment, $environment)

    if !defined(File[$log_directory]) {
        file { $log_directory:
            ensure => 'directory',
            group  => $group,
        }
    }

    $command = "${_gobblin_script} --sysconfig=${sysconfig_properties_file} --jar=${_gobblin_jar_file} ${_jobconfig_properties_file}"

    kerberos::systemd_timer { "gobblin-${title}":
        ensure            => $ensure,
        description       => "Hadoop Gobblin job ${title}",
        command           => $command,
        interval          => $interval,
        user              => $user,
        environment       => $_environment,
        send_mail         => $send_mail,
        send_mail_to      => $send_mail_to,
        logfile_basedir   => $log_directory,
        logfile_name      => "${title}.log",
        logfile_owner     => $user,
        logfile_group     => $user,
        logfile_perms     => 'all',
        syslog_force_stop => true,
        syslog_identifier => "gobblin-${title}",
    }
}