Puppet Class: profile::analytics::search::airflow

Defined in:
modules/profile/manifests/analytics/search/airflow.pp

Overview

Class profile::analytics::search::airflow

Set up an apache-airflow instance to coordinate tasks in the analytics cluster.

Parameters:

  • service_user (String) (defaults to: lookup('profile::analytics::search::airflow::service_user'))
  • service_group (String) (defaults to: lookup('profile::analytics::search::airflow::service_group'))
  • webserver_port (Stdlib::Port) (defaults to: lookup('profile::analytics::search::airflow::webserver_port'))
  • mysql_host (Stdlib::Fqdn) (defaults to: lookup('profile::analytics::search::airflow::mysql_host'))
  • db_name (String) (defaults to: lookup('profile::analytics::search::airflow::db_name'))
  • deploy_target (String) (defaults to: lookup('profile::analytics::search::airflow::deploy_target'))
  • deploy_target_plugins (String) (defaults to: lookup('profile::analytics::search::airflow::deploy_target_plugins'))
  • deploy_user (String) (defaults to: lookup('profile::analytics::search::airflow::deploy_user'))
  • deploy_dir (Stdlib::Unixpath) (defaults to: lookup('profile::analytics::search::airflow::deploy_dir'))
  • airflow_dir (Stdlib::Unixpath) (defaults to: lookup('profile::analytics::search::airflow::airflow_dir'))
  • log_dir (Stdlib::Unixpath) (defaults to: lookup('profile::analytics::search::airflow::log_dir'))
  • run_dir (Stdlib::Unixpath) (defaults to: lookup('profile::analytics::search::airflow::run_dir'))
  • conf_dir (Stdlib::Unixpath) (defaults to: lookup('profile::analytics::search::airflow::conf_dir'))
  • conf_file (String) (defaults to: lookup('profile::analytics::search::airflow::conf_file'))


6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# File 'modules/profile/manifests/analytics/search/airflow.pp', line 6

class profile::analytics::search::airflow(
    String $service_user          = lookup('profile::analytics::search::airflow::service_user'),
    String $service_group         = lookup('profile::analytics::search::airflow::service_group'),
    Stdlib::Port $webserver_port  = lookup('profile::analytics::search::airflow::webserver_port'),
    Stdlib::Fqdn $mysql_host      = lookup('profile::analytics::search::airflow::mysql_host'),
    String $db_name               = lookup('profile::analytics::search::airflow::db_name'),
    String $deploy_target         = lookup('profile::analytics::search::airflow::deploy_target'),
    String $deploy_target_plugins = lookup('profile::analytics::search::airflow::deploy_target_plugins'),
    String $deploy_user           = lookup('profile::analytics::search::airflow::deploy_user'),
    Stdlib::Unixpath $deploy_dir  = lookup('profile::analytics::search::airflow::deploy_dir'),
    Stdlib::Unixpath $airflow_dir = lookup('profile::analytics::search::airflow::airflow_dir'),
    Stdlib::Unixpath $log_dir     = lookup('profile::analytics::search::airflow::log_dir'),
    Stdlib::Unixpath $run_dir     = lookup('profile::analytics::search::airflow::run_dir'),
    Stdlib::Unixpath $conf_dir    = lookup('profile::analytics::search::airflow::conf_dir'),
    String $conf_file             = lookup('profile::analytics::search::airflow::conf_file'),
) {
    include ::passwords::mysql::airflow::search

    require_package([
        'python3',
        'python3-virtualenv',
        'virtualenv',
        'python3-pip',
        'python3-mysqldb',
    ])

    # wrapper script to run the airflow command in the right context
    $airflow_wrapper = '/usr/local/bin/airflow'
    file { $airflow_wrapper:
        ensure  => present,
        owner   => 'root',
        group   => 'root',
        mode    => '0555',
        content => template('profile/analytics/search/airflow/airflow.sh.erb'),
    }

    # Deploy upstream airflow code + dependencies
    scap::target { $deploy_target:
        deploy_user => $deploy_user,
    }

    # Deploy dags + plugins
    scap::target { $deploy_target_plugins:
        deploy_user => $deploy_user,
    }

    file { $conf_dir:
        ensure => 'directory',
        owner  => 'root',
        group  => 'root',
        mode   => '0755',
    }

    $sql_user = $::passwords::mysql::airflow::search::user
    $sql_pass = $::passwords::mysql::airflow::search::password
    $sql_alchemy_conn = "mysql://${sql_user}:${sql_pass}@${mysql_host}/${db_name}"

    file { "${conf_dir}/${conf_file}":
        ensure  => present,
        # Since this stores passwords limit read access
        owner   => 'root',
        group   => $service_group,
        mode    => '0440',
        content => template('profile/analytics/search/airflow/airflow.cfg.erb'),
        require => Group[$service_group],
    }

    # Ensure places the daemons will write to are available.
    file { [$log_dir, $run_dir]:
        ensure => 'directory',
        owner  => $service_user,
        group  => $service_group,
        mode   => '0755',
    }

    file { '/usr/local/bin/airflow-clean-log-dirs':
        content => template('profile/analytics/search/airflow/airflow-clean-log-dirs.erb'),
        mode    => '0550',
        owner   => 'root',
        group   => 'root',
    }

    systemd::timer::job { 'airflow_clean_log_dirs':
        user        => 'root',
        description => 'Delete Airflow log dirs/files after 30 days',
        command     => '/usr/local/bin/airflow-clean-log-dirs',
        interval    => {
            'start'    => 'OnCalendar',
            'interval' => '*-*-* 03:00:00',  # Every day at 3:00
        },
        require     => File['/usr/local/bin/airflow-clean-log-dirs'],
    }

    systemd::service { 'airflow-webserver':
        content => template('profile/analytics/search/airflow/webserver.service.erb'),
        require => File[$log_dir, $run_dir, "${conf_dir}/${conf_file}", $airflow_wrapper],
    }

    base::service_auto_restart { 'airflow-webserver': }

    systemd::service { 'airflow-scheduler':
        content => template('profile/analytics/search/airflow/scheduler.service.erb'),
        require => File[$log_dir, $run_dir, "${conf_dir}/${conf_file}", $airflow_wrapper],
    }

    base::service_auto_restart { 'airflow-scheduler': }

    systemd::service { 'airflow-kerberos':
        content => template('profile/analytics/search/airflow/kerberos.service.erb'),
        require => File[$log_dir, $run_dir, "${conf_dir}/${conf_file}", $airflow_wrapper],
    }

    base::service_auto_restart { 'airflow-kerberos': }

    # Include analytics mediawiki sql replica credentials at
    # /etc/mysql/conf.d/analytics-research-client.cnf. This is only readable to
    # users in analytics-privatedata-users group, $service_user must be externally
    # configured as a member of this group.
    statistics::mysql_credentials { $service_group:
        group => $service_group,
    }
}