Defined Type: hdfs_tools::hdfs_rsync_job
- Defined in:
- modules/hdfs_tools/manifests/hdfs_rsync_job.pp
Overview
Define hdfs_tools::hdfs_rsync_job
Regularly copies files from $hdfs_source to $local_destination. Uses hdfs-rsync, systemd timers and Kerberos.
Note: Usage of this job installs ::hdfs_tools which requires java
and a hadoop configuration on the running host.
Parameters
- hdfs_source
-
HDFS Source directory to pull data from.
- local_destination
-
Destination directory on local filesystem to put data into.
- interval
-
Systemd interval that the timer will use.
- user
-
User running the Systemd timer.
- delete
-
Add the –delete if true.
- exclude
-
Add –exclude $value if not undef.
- ensure
-
Ensure status of systemd timer.
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
# File 'modules/hdfs_tools/manifests/hdfs_rsync_job.pp', line 32
define hdfs_tools::hdfs_rsync_job(
String $hdfs_source,
String $local_destination,
String $interval,
String $user,
Boolean $delete = true,
Boolean $ignore_missing_source = false,
Wmflib::Ensure $ensure = present,
Optional[String] $exclude = undef,
) {
require ::hdfs_tools
if !defined(File[$local_destination]) {
file { $local_destination:
ensure => 'directory',
owner => $user,
group => 'root',
}
}
$delete_option = $delete ? {
true => '--delete',
default => ''
}
# Quotes around the exclude value are on purpose to force
# to parse it as a single value
$exclude_option = $exclude ? {
undef => '',
default => " --exclude \"${exclude}\""
}
# If $ignore_missing_source is enabled, add a check that prevents
# hdfs-rsync to fail when the source directory is missing.
$rsync_command = "/usr/local/bin/hdfs-rsync -r -t ${delete_option}${exclude_option} --perms --chmod D755,F644 hdfs://${hdfs_source} file://${local_destination}"
$ignore_msg = "Ignoring missing hdfs source hdfs://${hdfs_source}"
$head = "#!/bin/bash\n"
$script_content = $ignore_missing_source ? {
true => "${head}hdfs dfs -ls -d hdfs://${hdfs_source} > /dev/null 2>&1 && ${rsync_command} || echo ${ignore_msg}",
default => "${head}${rsync_command}"
}
$script_file = "/usr/local/bin/hdfs_rsync_${title}"
file { $script_file:
ensure => $ensure,
content => $script_content,
mode => '0550',
owner => $user,
group => 'root',
}
kerberos::systemd_timer { "hdfs_rsync_${title}":
description => "Copy ${title} files from Hadoop HDFS.",
command => $script_file,
interval => $interval,
user => $user,
require => File[$script_file],
}
}
|