Puppet Class: profile::analytics::refinery::job::data_check

Defined in:
modules/profile/manifests/analytics/refinery/job/data_check.pp

Overview

Class profile::analytics::refinery::job::data_check

Configures systemd timer jobs that:

  • alert and send email about the faultyness of webrequest data.

  • alert if REFINE_FAILED flags are found in various datasources.

Parameters:

  • use_kerberos (Any) (defaults to: lookup('profile::analytics::refinery::job::data_check::use_kerberos', { 'default_value' => false }))
  • ensure_timers (Any) (defaults to: lookup('profile::analytics::refinery::job::data_check::ensure_timers', { 'default_value' => 'present' }))


7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# File 'modules/profile/manifests/analytics/refinery/job/data_check.pp', line 7

class profile::analytics::refinery::job::data_check (
    $use_kerberos  = lookup('profile::analytics::refinery::job::data_check::use_kerberos', { 'default_value' => false }),
    $ensure_timers = lookup('profile::analytics::refinery::job::data_check::ensure_timers', { 'default_value' => 'present' }),
) {
    require ::profile::analytics::refinery

    # This should not be hardcoded.  Instead, one should be able to use
    # $::cdh::hadoop::mount::mount_point to reference the user supplied
    # parameter when the cdh::hadoop::mount class is evaluated.
    # I am not sure why this is not working.
    $hdfs_mount_point = '/mnt/hdfs'

    # Since the 'stats' user is not in ldap, it is unnecessarily hard
    # to grant it access to the private data in hdfs. As discussed in
    #   https://gerrit.wikimedia.org/r/#/c/186254
    # the cron was used to run as hdfs instead, and now the systemd units
    # that are run by the timers below do the same.
    kerberos::systemd_timer { 'check_webrequest_partitions':
        ensure       => $ensure_timers,
        description  => 'Check HDFS Webrequest partitions',
        command      => "${::profile::analytics::refinery::path}/bin/refinery-dump-status-webrequest-partitions --hdfs-mount ${hdfs_mount_point} --datasets webrequest,raw_webrequest --quiet --percent-lost",
        interval     => '*-*-* 10:00:00',
        user         => 'analytics',
        use_kerberos => $use_kerberos,
    }

    kerberos::systemd_timer { 'check_pageviews_partitions':
        ensure       => $ensure_timers,
        description  => 'Check HDFS Pageviews partitions',
        command      => "${::profile::analytics::refinery::path}/bin/refinery-dump-status-webrequest-partitions --hdfs-mount ${hdfs_mount_point} --datasets pageview,projectview --quiet",
        interval     => '*-*-* 10:10:00',
        user         => 'analytics',
        use_kerberos => $use_kerberos,
    }
}