Defined Type: reportupdater::job

Defined in:
modules/reportupdater/manifests/job.pp

Overview

Define reportupdater::job

Sets up hourly systemd timer job to run reportupdater, which generates and updates tsv reports for a set of given queries.

This requires that a repository with config and queries for the script exists at gerrit.wikimedia.org/r/p/analytics/$repository.git.

Parameters

 title        - string. Name of query dir inside of $repository.
                a $title directory with reportupdater query config
                must exist inside of $repository.

 repository   - string. Name of the query repository in gerrit in the
                analytics/ namespace.  All reportupdater job
                repositories must be in analytics/
                E.g. analytics/reportupdater-queries

config_file   - string. [optional] Absolute path of the config file.
                If not specified, reportupdater will use the 'config.yaml'
                file inside the query folder.

 output_dir   - string. [optional] Relative path where to write the reports.
                This will be relative to $::reportupdater::base_path/output
                Default: $title

 interval     - string. [optional] Systemd time to run the report updater job.
                Default: '*-*-* *:00:00' (hourly)

Usage

reportupdater::job { 'browser': }

reportupdater::job { 'mobile':
    repository  => 'limn-mobile-data',
    output_dir  => "mobile/datafiles",
}

Parameters:

  • repository (Any) (defaults to: 'reportupdater-queries')
  • config_file (Any) (defaults to: undef)
  • output_dir (Any) (defaults to: $title)
  • query_dir (Any) (defaults to: undef)
  • interval (Any) (defaults to: '*-*-* *:00:00')
  • send_mail (Any) (defaults to: true)
  • ensure (Any) (defaults to: present)
  • use_kerberos (Any) (defaults to: true)


38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# File 'modules/reportupdater/manifests/job.pp', line 38

define reportupdater::job(
    $repository = 'reportupdater-queries',
    $config_file = undef,
    $output_dir = $title,
    $query_dir = undef,
    $interval = '*-*-* *:00:00',
    $send_mail = true,
    $ensure = present,
    $use_kerberos = true,
)
{
    Class['::reportupdater'] -> Reportupdater::Job[$title]

    # Name of the repository in gerrit.
    # All reportupdater job repositories are in the analytics/ namespace.
    $repository_name = "analytics/${repository}"

    # Path at which this reportupdater job repository will be cloned.
    $path            = "${::reportupdater::jobs_path}/${repository}"

    # Path of the query configuration directory inside of $repository_name.
    $query_path = $query_dir ? {
        undef   => "${path}/${title}",
        default => "${path}/${query_dir}",
    }

    # Path at which the job will store its report output.
    $output_path     = "${::reportupdater::output_path}/${output_dir}"

    # Ensure the query repository is cloned and latest version.
    # It is possible that multiple jobs will use the same repository,
    # so wrap this in an if !defined.
    if !defined(Git::Clone[$repository_name]) {
        git::clone { $repository_name:
            ensure    => 'latest',
            directory => $path,
            origin    => "https://gerrit.wikimedia.org/r/${repository_name}.git",
            owner     => $::reportupdater::user,
        }
    }

    # Prepare config path parameter in case it's defined.
    $config_path = $config_file ? {
        undef   => '',
        default => "--config-path ${config_file}",
    }

    if $use_kerberos {
        kerberos::systemd_timer { "reportupdater-${title}":
            ensure            => $ensure,
            description       => "Report Updater job for ${title}",
            command           => "/usr/bin/python3 ${::reportupdater::source_path}/update_reports.py ${config_path} -l info ${query_path} ${output_path}",
            interval          => $interval,
            user              => $::reportupdater::user,
            send_mail         => $send_mail,
            logfile_basedir   => $::reportupdater::log_path,
            logfile_name      => 'syslog.log',
            logfile_owner     => $::reportupdater::user,
            logfile_group     => $::reportupdater::user,
            logfile_perms     => 'all',
            syslog_force_stop => true,
            syslog_identifier => "reportupdater-${title}",
        }
    } else {
        systemd::timer::job { "reportupdater-${title}":
            ensure            => $ensure,
            description       => "Report Updater job for ${title}",
            command           => "/usr/bin/python3 ${::reportupdater::source_path}/update_reports.py ${config_path} -l info ${query_path} ${output_path}",
            interval          => {
                'start'    => 'OnCalendar',
                'interval' => $interval
            },
            user              => $::reportupdater::user,
            send_mail         => $send_mail,
            send_mail_to      => 'data-engineering-alerts@lists.wikimedia.org',
            logfile_basedir   => $::reportupdater::log_path,
            logfile_name      => 'syslog.log',
            logfile_owner     => $::reportupdater::user,
            logfile_group     => $::reportupdater::user,
            logfile_perms     => 'all',
            syslog_identifier => "reportupdater-${title}",
            syslog_force_stop => true,
        }
    }
}