Defined Type: dumps::web::fetches::analytics::job
- Defined in:
- modules/dumps/manifests/web/fetches/analytics/job.pp
Overview
Define dumps::web::fetches::analytics::job
Regularly copies files from $hdfs_source to $local_destination. Uses hdfs-rsync, systemd timers and Kerberos.
Parameters
- hdfs_source
-
HDFS Source directory to pull data from.
- local_destination
-
Destination directory on local filesystem to put data into.
- interval
-
Systemd interval that the timer will use.
- user
-
User running the Systemd timer.
- delete
-
Add the –delete if true.
- exclude
-
Add –exclude $value if not undef.
- ensure
-
Ensure status of systemd timer.
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
# File 'modules/dumps/manifests/web/fetches/analytics/job.pp', line 29
define dumps::web::fetches::analytics::job(
String $hdfs_source,
String $local_destination,
String $interval,
String $user,
Boolean $delete = true,
Boolean $ignore_missing_source = false,
Wmflib::Ensure $ensure = present,
Optional[String] $exclude = undef,
) {
if !defined(File[$local_destination]) {
file { $local_destination:
ensure => 'directory',
owner => $user,
group => 'root',
}
}
$delete_option = $delete ? {
true => '--delete',
default => ''
}
# Quotes around the exclude value are on purpose to force
# to parse it as a single value
$exclude_option = $exclude ? {
undef => '',
default => " --exclude \"${exclude}\""
}
# If $ignore_missing_source is enabled, add a check that prevents
# hdfs-rsync to fail when the source directory is missing.
$rsync_command = "/usr/local/bin/hdfs-rsync -r -t ${delete_option}${exclude_option} --perms --chmod D755,F644 hdfs://${hdfs_source} file://${local_destination}"
$ignore_msg = "Ignoring missing hdfs source hdfs://${hdfs_source}"
$head = "#!/bin/bash\n"
$script_content = $ignore_missing_source ? {
true => "${head}hdfs dfs -ls -d hdfs://${hdfs_source} > /dev/null 2>&1 && ${rsync_command} || echo ${ignore_msg}",
default => "${head}${rsync_command}"
}
file { "/usr/local/bin/rsync-analytics-${title}":
ensure => $ensure,
content => $script_content,
mode => '0550',
owner => $user,
group => 'root',
}
kerberos::systemd_timer { "analytics-dumps-fetch-${title}":
description => "Copy ${title} files from Hadoop HDFS.",
command => "/usr/local/bin/rsync-analytics-${title}",
interval => $interval,
user => $user,
require => File["/usr/local/bin/rsync-analytics-${title}"],
}
}
|