Puppet Class: profile::analytics::refinery::job::import_wikidata_entities_dumps

Defined in:
modules/profile/manifests/analytics/refinery/job/import_wikidata_entities_dumps.pp

Overview

Parameters:

  • use_kerberos (Any) (defaults to: lookup('profile::analytics::refinery::job::import_wikidata_entities_dumps::use_kerberos', { 'default_value' => false }))
  • ensure_timers (Any) (defaults to: lookup('profile::analytics::refinery::job::import_wikidata_entities_dumps::ensure_timers', { 'default_value' => 'present' }))


8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# File 'modules/profile/manifests/analytics/refinery/job/import_wikidata_entities_dumps.pp', line 8

class profile::analytics::refinery::job::import_wikidata_entities_dumps (
    $use_kerberos = lookup('profile::analytics::refinery::job::import_wikidata_entities_dumps::use_kerberos', { 'default_value' => false }),
    $ensure_timers = lookup('profile::analytics::refinery::job::import_wikidata_entities_dumps::ensure_timers', { 'default_value' => 'present' }),
) {

    $wikidata_local_source = '/mnt/data/xmldatadumps/public/wikidatawiki/entities/'

    # Import all-json dumps
    profile::analytics::refinery::job::import_wikidata_dumps_config { 'refinery-import-wikidata-all-json-dumps':
        ensure            => $ensure_timers,
        include_pattern   => '/*/*.json.bz2',
        local_source      => $wikidata_local_source,
        hdfs_destination  => '/wmf/data/raw/wikidata/dumps/all_json',
        timer_description => 'Schedules daily an hdfs-rsync of the wikidata all-json dumps into HDFS',
        timer_interval    => '*-*-* 01:00:00',
        use_kerberos      => $use_kerberos,
    }

    # Import all-ttl dumps
    profile::analytics::refinery::job::import_wikidata_dumps_config { 'refinery-import-wikidata-all-ttl-dumps':
        ensure            => $ensure_timers,
        include_pattern   => '/*/*-all-BETA.ttl.bz2',
        local_source      => $wikidata_local_source,
        hdfs_destination  => '/wmf/data/raw/wikidata/dumps/all_ttl',
        timer_description => 'Schedules daily an hdfs-rsync of the wikidata all-ttl dumps into HDFS',
        timer_interval    => '*-*-* 01:30:00',
        use_kerberos      => $use_kerberos,
    }

    # Import lexemes-ttl dumps
    profile::analytics::refinery::job::import_wikidata_dumps_config { 'refinery-import-wikidata-lexemes-ttl-dumps':
        ensure            => $ensure_timers,
        include_pattern   => '/*/*-lexemes-BETA.ttl.bz2',
        local_source      => $wikidata_local_source,
        hdfs_destination  => '/wmf/data/raw/wikidata/dumps/lexemes_ttl',
        timer_description => 'Schedules daily an hdfs-rsync of the wikidata lexemes-ttl dumps into HDFS',
        timer_interval    => '*-*-* 02:00:00',
        use_kerberos      => $use_kerberos,
    }

}