Puppet Class: statistics::discovery

Defined in:
modules/statistics/manifests/discovery.pp

Overview

Class: statistics::discovery

Maintainer: Mikhail Popov (bearloga)

Parameters:

  • use_kerberos (Any) (defaults to: false)


3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# File 'modules/statistics/manifests/discovery.pp', line 3

class statistics::discovery (
    $use_kerberos = false,
) {
    Class['::statistics'] -> Class['::statistics::discovery']

    include ::passwords::mysql::research

    $working_path = $::statistics::working_path
    # Homedir for everything Search Platform (formerly Discovery) related
    $dir = "${working_path}/discovery"
    # Path in which daily runs will log to
    $log_dir = "${dir}/log"
    # Path in which the R library will reside
    $rlib_dir = "${dir}/r-library"

    $user = 'analytics-search'
    # Setting group to 'analytics-privatedata-users' so that Discovery's Analysts
    # (as members of analytics-privatedata-users) have some privileges, and so
    # the user can access private data in Hive. Also refer to T174110#4265908.
    $group ='analytics-privatedata-users'

    # This file will render at
    # /etc/mysql/conf.d/discovery-stats-client.cnf.
    ::mariadb::config::client { 'discovery-stats':
        user  => $::passwords::mysql::research::user,
        pass  => $::passwords::mysql::research::pass,
        group => $group,
        mode  => '0440',
    }

    $directories = [
        $dir,
        $log_dir,
        $rlib_dir
    ]

    file { $directories:
        ensure => 'directory',
        owner  => $user,
        group  => $group,
        mode   => '0775',
    }

    git::clone { 'wikimedia/discovery/golden':
        ensure             => 'latest',
        branch             => 'master',
        recurse_submodules => true,
        directory          => "${dir}/golden",
        owner              => $user,
        group              => $group,
        require            => File[$dir],
    }

    logrotate::conf { 'wikimedia-discovery-stats':
        ensure  => absent,
        content => template('statistics/discovery-stats.logrotate.erb'),
        require => File[$log_dir],
    }

    # Running the script at 5AM UTC means that:
    # - Remaining data from previous day is likely to have finished processing.
    # - It's ~9/10p Pacific time, so we're not likely to hinder people's work
    #   on analytics cluster, although we use `nice` & `ionice` as a courtesy.
    kerberos::systemd_timer { 'wikimedia-discovery-golden':
        description       => 'Discovery golden daily run',
        command           => "${dir}/golden/main.sh",
        interval          => '*-*-* 05:00:00',
        user              => $user,
        logfile_basedir   => $log_dir,
        logfile_name      => 'golden-daily.log',
        logfile_owner     => $user,
        logfile_group     => $group,
        syslog_force_stop => true,
        use_kerberos      => $use_kerberos,
        slice             => 'user.slice',
        require           => [
            Class['::statistics::compute'],
            Git::Clone['wikimedia/discovery/golden'],
            Mariadb::Config::Client['discovery-stats']
        ],
    }
}