Puppet Class: mediabackup::worker

Defined in:
modules/mediabackup/manifests/worker.pp

Overview

Media backups worker: Install required packages and configures them.

  • mw_db_host: fqdn pf the mediawiki core database used to read the image metadata

  • mw_db_port: port where the previous db is publicly listening to

  • mw_db_user: user used to authenticate to the mediawiki database (neads only

    SELECT grants on the image, oldimage and filearchive tables)
    
  • mw_db_password: password used to authenticate to the mediawiki database

  • wiki: name of the mediawiki database that will be read

  • dblist: name of the list from mediawiki-config/dblists (including the

    extension) that will be used instead of a single wiki. If both wiki
    and this are provided, dblist is ignored.
    
  • batchsize: maximum number of rows (files) of metadata to be read into memory and

    processed for both mw metadata and backups metadata
    
  • db_host: fqdn of the database used to write the media backups metadata backend

  • db_port: port of such database

  • db_user: user used to authenticate to the database (needs read/write grants)

  • db_password: password used to authenticate to the database

  • db_schema: name of the database inside the server where the data is read

    from and written to
    
  • storage_hosts: list of hosts where the file backend runs, where the files

    will be finally stored
    
  • storage_port: Port where all storage nodes will be listening to (it may

    change in the future to a per-host configuration (socket)
    to allow for multiplexing with multiple services per host
    
  • encryption_key: String used for encryption and decryption of private files

    Can be an age secret key or an ssh file
    
  • access_key: identifier to authenticate on the s3-compatible api to store

    the backup files (it has both read and write permissions)
    
  • secret_key: password to authenticate on the s3-compatible api for backing

    up files
    
  • recovery_access_key: identifier to authenticate on the s3-compatible api to

    restore  the backup files (only has read and list
    permissions)
    
  • recovery_secret_key: password to authenticate on the s3-compatible api

    for recovering files
    

Parameters:

  • mw_db_host (Stdlib::Fqdn)
  • mw_db_port (Stdlib::Port)
  • mw_db_user (String)
  • mw_db_password (String)
  • wiki (Optional[String])
  • dblist (Optional[String])
  • batchsize (Integer[1])
  • db_host (Stdlib::Fqdn)
  • db_port (Stdlib::Port)
  • db_user (String)
  • db_password (String)
  • storage_hosts (Array[Stdlib::Fqdn])
  • storage_port (Stdlib::Port)
  • encryption_key (String)
  • access_key (String)
  • secret_key (String)
  • recovery_access_key (String)
  • recovery_secret_key (String)
  • db_schema (String) (defaults to: 'mediabackups')


36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
# File 'modules/mediabackup/manifests/worker.pp', line 36

class mediabackup::worker (
    Stdlib::Fqdn        $mw_db_host,
    Stdlib::Port        $mw_db_port,
    String              $mw_db_user,
    String              $mw_db_password,
    Optional[String]    $wiki,
    Optional[String]    $dblist,
    Integer[1]          $batchsize,
    Stdlib::Fqdn        $db_host,
    Stdlib::Port        $db_port,
    String              $db_user,
    String              $db_password,
    Array[Stdlib::Fqdn] $storage_hosts,
    Stdlib::Port        $storage_port,
    String              $encryption_key,
    String              $access_key,
    String              $secret_key,
    String              $recovery_access_key,
    String              $recovery_secret_key,
    String              $db_schema = 'mediabackups',
) {
    ensure_packages([
        'python3',  # most of this will go into package deps.
        'python3-boto3',
        'python3-numpy',
        'python3-pymysql',
        'python3-swiftclient',
        'python3-yaml',
        's3cmd',  # useful s3 command line util
        'age',  # used for encryption and decryption
    ])

    # user and group so we don't run anything as a privileged user
    group { 'mediabackup':
        ensure => present,
        system => true,
    }
    user { 'mediabackup':
        ensure     => present,
        gid        => 'mediabackup',
        shell      => '/bin/false',
        home       => '/srv/mediabackup',
        system     => true,
        managehome => false,
        require    => Group['mediabackup'],
    }

    # location of temporary storage to download and hash files before
    # sending it to its final location
    file { '/srv/mediabackup':
        ensure  => directory,
        mode    => '0750',
        owner   => 'mediabackup',
        group   => 'mediabackup',
        require => [ User['mediabackup'], Group['mediabackup'] ],
    }

    # backup execution configuration dir (including secrets)
    file { '/etc/mediabackup':
        ensure  => directory,
        mode    => '0400',
        owner   => 'mediabackup',
        group   => 'mediabackup',
        require => [ User['mediabackup'], Group['mediabackup'] ],
    }

    # access to a backup source db to read mediawiki image metadata
    file { '/etc/mediabackup/mw_db.conf':
        ensure    => present,
        mode      => '0400',
        owner     => 'mediabackup',
        group     => 'mediabackup',
        content   => template('mediabackup/mw_db.conf.erb'),
        show_diff => false,
        require   => File['/etc/mediabackup'],
    }

    # access to a rw db to write and coordinate backup metadata
    file { '/etc/mediabackup/mediabackups_db.conf':
        ensure    => present,
        mode      => '0400',
        owner     => 'mediabackup',
        group     => 'mediabackup',
        content   => template('mediabackup/mediabackups_db.conf.erb'),
        show_diff => false,
        require   => File['/etc/mediabackup'],
    }

    $tmpdir = '/srv/mediabackup'
    # configuration and credentials to access final storage (S3-compatible
    # cluster on the same dc) for writing (backup generation)
    file { '/etc/mediabackup/mediabackups_storage.conf':
        ensure    => present,
        mode      => '0400',
        owner     => 'mediabackup',
        group     => 'mediabackup',
        content   => template('mediabackup/mediabackups_storage.conf.erb'),
        show_diff => false,
        require   => [ File['/etc/mediabackup'], File['/srv/mediabackup'], ],
    }

    # identity file used for encryption with age
    file { '/etc/mediabackup/encryption.key':
        ensure    => present,
        mode      => '0400',
        owner     => 'mediabackup',
        group     => 'mediabackup',
        content   => $encryption_key,
        show_diff => false,
    }

    # extra read-only policy for the recovery account
    file { '/etc/mediabackup/readandlist.json':
        ensure => present,
        owner  => 'mediabackup',
        group  => 'mediabackup',
        mode   => '0444',
        source => 'puppet:///modules/mediabackup/readandlist.json',
    }
    # configuration and credentials to access final storage (S3-compatible
    # cluster on the same dc) for reading and listing (backup recovery)
    file { '/etc/mediabackup/mediabackups_recovery.conf':
        ensure    => present,
        mode      => '0400',
        owner     => 'mediabackup',
        group     => 'mediabackup',
        content   => template('mediabackup/mediabackups_recovery.conf.erb'),
        show_diff => false,
        require   => [
            File['/etc/mediabackup'],
            File['/srv/mediabackup'],
            File['/etc/mediabackup/readandlist.json'],
        ],
    }
}