Defined Type: query_service::blazegraph

Defined in:
modules/query_service/manifests/blazegraph.pp

Overview

SPDX-License-Identifier: Apache-2.0

define: query_service::blazegraph

Note: This resource installs and start the blazegraph service

Parameters:

  • $port: Blazegraph port to run on

  • $config_file_name: The name of the config file for this instance

  • $package_dir: Directory where the service should be installed.

  • $data_dir: Directory where the database should be stored

  • $logstash_logback_port: port which rsyslog server is listening on

  • $log_dir: Directory where the logs go

  • $heap_size: heapsize for blazegraph

  • $username: Username owning the service

  • $deploy_user: username of deploy user

  • $use_deployed_config: Whether we should use config in deployed repo or our own

  • $extra_jvm_opts: Extra JVM configs for blazegraph

  • $use_geospatial: Turn on blazegraph geospatial features

  • $journal: Name to assign instance journal. Must be unique per data_dir.

  • $use_oauth: Set to true to protect the service with OAuth

  • $internal_federated_endpoints: Map of internal federated endpoints with a list of their service aliases

  • $only_throttle_cdn: Set to true to only throttle requests coming from the CDN (external requests)

Parameters:

  • port (Stdlib::Port)
  • config_file_name (String)
  • package_dir (Stdlib::Unixpath)
  • data_dir (Stdlib::Unixpath)
  • deploy_name (String)
  • logstash_logback_port (Stdlib::Port)
  • log_dir (Stdlib::Unixpath)
  • heap_size (String)
  • username (String)
  • use_deployed_config (Boolean)
  • extra_jvm_opts (Array[String])
  • use_geospatial (Boolean)
  • journal (String)
  • blazegraph_main_ns (String)
  • federation_user_agent (String)
  • prefixes_file (String)
  • use_oauth (Boolean)
  • internal_federated_endpoints (Optional[Hash[Stdlib::HTTPSUrl, Array[Stdlib::HTTPSUrl]]])
  • only_throttle_cdn (Boolean)


22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
# File 'modules/query_service/manifests/blazegraph.pp', line 22

define query_service::blazegraph(
    Stdlib::Port $port,
    String $config_file_name,
    Stdlib::Unixpath $package_dir,
    Stdlib::Unixpath $data_dir,
    String $deploy_name, # TODO: we should use $title for this.
    Stdlib::Port $logstash_logback_port,
    Stdlib::Unixpath $log_dir,
    String $heap_size,
    String $username,
    Boolean $use_deployed_config,
    Array[String] $extra_jvm_opts,
    Boolean $use_geospatial,
    String $journal,
    String $blazegraph_main_ns,
    String $federation_user_agent,
    String $prefixes_file,
    Boolean $use_oauth,
    Optional[Hash[Stdlib::HTTPSUrl, Array[Stdlib::HTTPSUrl]]] $internal_federated_endpoints,
    Boolean $only_throttle_cdn,
) {
    $data_file = "${data_dir}/${journal}.jnl"

    if ($use_deployed_config) {
        $config_file = $config_file_name
    } else {
        $common_config = template('query_service/RWStore.common.properties.erb')
        $specific_config = template("query_service/${config_file_name}.erb")
        if ($use_geospatial) {
            $geo_config = template('query_service/RWStore.geo.properties.erb')
        } else {
            $geo_config = 'com.bigdata.rdf.store.AbstractTripleStore.geoSpatial=false'
        }
        $config_file = "/etc/${deploy_name}/${config_file_name}"
        file { $config_file:
            ensure  => file,
            content => "${common_config}\n${specific_config}\n${geo_config}",
            owner   => 'root',
            group   => 'root',
            mode    => '0644',
            before  => Systemd::Unit[$title],
        }
    }

    file { "/etc/${deploy_name}/allowlist-${title}.txt":
        ensure  => present,
        content => epp('query_service/allowlist.txt.epp', { 'endpoints' => $internal_federated_endpoints }),
        owner   => 'root',
        group   => 'root',
        mode    => '0644',
        before  => Service[$title],
    }
    $internal_federated_hosts = query_service::get_federated_endpoint_hostnames($internal_federated_endpoints)
    $proxy_bypass_hosts_jvm_opts = $internal_federated_hosts ? {
        default => ["-Dhttp.proxyExcludedHosts=${internal_federated_hosts}"],
        undef   => [],
    }
    $throttling_header_jvm_opts = $only_throttle_cdn ? {
        true => ['-Dwdqs.enable-throttling-if-header=X-BIGDATA-READ-ONLY&&!X-Disable-Throttling'],
        false => []
    }

    file { "/etc/default/${title}":
        ensure  => present,
        content => epp('query_service/blazegraph-default.epp',
            {
                'deploy_name'           => $deploy_name,
                'title'                 => $title,
                'heap_size'             => $heap_size,
                'blazegraph_main_ns'    => $blazegraph_main_ns,
                'log_dir'               => $log_dir,
                'port'                  => $port,
                'extra_jvm_opts'        => $extra_jvm_opts + $proxy_bypass_hosts_jvm_opts + $throttling_header_jvm_opts,
                'prefixes_file'         => $prefixes_file,
                'use_oauth'             => $use_oauth,
                'federation_user_agent' => $federation_user_agent,
            }
        ),
        owner   => 'root',
        group   => 'root',
        mode    => '0644',
        before  => Systemd::Unit[$title],
    }

    query_service::logback_config { $title:
        logstash_logback_port => $logstash_logback_port,
        deploy_name           => $deploy_name,
        log_dir               => $log_dir,
        pattern               => '%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg %mdc%n%rEx{1,QUERY_TIMEOUT,SYNTAX_ERROR}',
        evaluators            => true,
        throttle              => true,
    }

    # Blazegraph service
    systemd::unit { $title:
        content => template('query_service/initscripts/blazegraph.systemd.erb'),
    }

    service { $title:
        ensure => 'running',
    }

    # This is a rather ugly hack to ensure that permissions of $data_file are
    # managed, but that the file is not created by puppet. If that file does
    # not exist, puppet will raise an error and skip the File[$data_file]
    # resource (and only that resource). It means that puppet will be in error
    # until data import is started, but that's a reasonable behaviour.
    # This works as:
    # if $data_file dose not exist then:
    #    * this resource state is not clean so run the command
    #    * command returns false so the resource fails
    #    * file{$data_file} resource does not run as a dependency fails
    # else
    #    * The file exists so the exec resource state is clean and dose not need to run command
    #      This causes the exec resource to succeed without running command
    #      and so the file can mange permissions
    exec { "${data_file} exists":
        command => '/bin/false',
        creates => $data_file,
    }

    file { $data_file:
        ensure  => file,
        owner   => $username,
        group   => $username,
        mode    => '0664',
        require => Exec["${data_file} exists"],
        tag     => 'in-wdqs-data-dir',
    }
}