Puppet Class: profile::analytics::cluster::hadoop::yarn_capacity_scheduler
- Defined in:
- modules/profile/manifests/analytics/cluster/hadoop/yarn_capacity_scheduler.pp
Overview
SPDX-License-Identifier: Apache-2.0 Class: profile::analytics::cluster::hadoop::yarn_capacity_scheduler
Capacity scheduler config tailored for the Hadoop Analytics Cluster. This class renders the capacity-scheduler.xml file, but it requires some options to be set in yarn-site.xml (via hadoop's common config) to be enabled:
yarn.resourcemanager.scheduler.monitor.enable: true yarn.acl.enable: true
This profile needs to be included on the Hadoop master nodes only.
Parameters
[*base_settings*]
Settings that are common/shared to all clusters that use this scheduler.
[*extra_settings*]
Settings that can be selectively enabled/disabled on top of the base ones.
It is useful when testing new properties on a single cluster (like testing)
before considering to add the option to the base_settings.
23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 |
# File 'modules/profile/manifests/analytics/cluster/hadoop/yarn_capacity_scheduler.pp', line 23
class profile::analytics::cluster::hadoop::yarn_capacity_scheduler (
$extra_settings = lookup('profile::analytics::cluster::hadoop::yarn_capacity_scheduler::extra_settings', { 'default_value' => {} }),
) {
$base_settings = {
# Global config
# Maximum number of applications that can be pending and running (same as hadoop default).
'yarn.scheduler.capacity.maximum-applications' => 10000,
# Maximum percent of resources in the cluster which can be used to run
# application masters i.e. controls number of concurrent running applications
# (same as hadoop default).
'yarn.scheduler.capacity.maximum-am-resource-percent' => 0.1,
# The ResourceCalculator implementation to be used to compare Resources in the scheduler.
# The default DefaultResourceCalculator only uses Memory while DominantResourceCalculator
# uses dominant-resource to compare multi-dimensional resources such as Memory, CPU etc.
'yarn.scheduler.capacity.resource-calculator' => 'org.apache.hadoop.yarn.util.resource.DominantResourceCalculator',
# Number of missed scheduling opportunities after which the CapacityScheduler
# attempts to schedule rack-local containers.
# Typically this should be set to number of nodes in the cluster.
'yarn.scheduler.capacity.node-locality-delay' => 78,
# If a queue mapping is present, will it override the value specified by the user?
'yarn.scheduler.capacity.queue-mappings-override.enable' => false,
# Useful to enable/disable any new job in the cluster (for example to let it drain before maintenance)
# Individual queues are not re-enabled by setting the yarn.scheduler.capacity.root.state to RUNNING,
# so all 5 queues have a setting here. Specific leaf queues can also be managed this way.
'yarn.scheduler.capacity.root.gpus.state' => 'RUNNING',
'yarn.scheduler.capacity.root.launchers.state' => 'RUNNING',
'yarn.scheduler.capacity.root.default.state' => 'RUNNING',
'yarn.scheduler.capacity.root.production.state' => 'RUNNING',
'yarn.scheduler.capacity.root.essential.state' => 'RUNNING',
# Queue definitions
# Sum of capacity (not max) needs to be 100 at any level/branch of the tree.
# The -1 value for maximum-capacity means no maximum. We set this to maximize
# usage elasticity.
# First layer
'yarn.scheduler.capacity.root.queues' => 'gpus,launchers,default,production,essential',
'yarn.scheduler.capacity.root.gpus.capacity' => 2,
'yarn.scheduler.capacity.root.gpus.maximum-capacity' => -1,
'yarn.scheduler.capacity.root.launchers.capacity' => 3,
'yarn.scheduler.capacity.root.launchers.maximum-capacity' => -1,
'yarn.scheduler.capacity.root.default.capacity' => 35,
'yarn.scheduler.capacity.root.default.maximum-capacity' => -1,
'yarn.scheduler.capacity.root.production.capacity' => 50,
'yarn.scheduler.capacity.root.production.maximum-capacity' => -1,
'yarn.scheduler.capacity.root.essential.capacity' => 10,
'yarn.scheduler.capacity.root.essential.maximum-capacity' => -1,
# Default mappings
# PLEASE NOTE: use only the leaf queue names, not full path.
# Example: root.production BAD, production GOOD
'yarn.scheduler.capacity.queue-mappings' => 'u:druid:production,u:analytics:production,u:analytics-platform-eng:production,u:analytics-research:production,u:analytics-search:production,u:analytics-product:production,u:analytics-wmde:production,g:analytics-privatedata-users:default',
# Limits
# https://docs.cloudera.com/HDPDocuments/HDP2/HDP-2.6.4/bk_yarn-resource-management/content/setting_user_limits.html
# https://hadoop.apache.org/docs/r2.10.1/hadoop-yarn/hadoop-yarn-site/CapacityScheduler.html
# The user limit factor is a multiplier used to allow users of a specific queue to take up to X
# times the resource allocated (as min value) for the queue. It is needed to allow/control elasticity,
# so users can overcome Yarn default limits in case there are free resources.
# Since launchers queue size is small, use a large limit-factor.
# Don't allow more resources than GPU ones when running in GPU queue
'yarn.scheduler.capacity.root.gpus.user-limit-factor' => 1,
'yarn.scheduler.capacity.root.launchers.user-limit-factor' => 5,
'yarn.scheduler.capacity.root.default.user-limit-factor' => 2,
'yarn.scheduler.capacity.root.production.user-limit-factor' => 2,
'yarn.scheduler.capacity.root.essential.user-limit-factor' => 10,
# The user limit percent is different from the factor, since it is about how many users can run jobs on a queue
# at any given time. For example, if we set:
# 'yarn.scheduler.capacity.root.production.analytics.minimum-user-limit-percent' => 50,
# we want to allow up to two users concurrently in the queue (druid and analytics), leaving the others waiting.
# If we use '25', we'll allow a max of 4 different users, etc..
'yarn.scheduler.capacity.root.gpus.minimum-user-limit-percent' => 100,
'yarn.scheduler.capacity.root.launchers.minimum-user-limit-percent' => 50,
'yarn.scheduler.capacity.root.default.minimum-user-limit-percent' => 10,
'yarn.scheduler.capacity.root.production.minimum-user-limit-percent' => 20,
'yarn.scheduler.capacity.root.essential.minimum-user-limit-percent' => 50,
# Max lifetime for a Yarn application
'yarn.scheduler.capacity.root.default.maximum-application-lifetime' => 604800, # 1 week in seconds
'yarn.scheduler.capacity.root.gpus.maximum-application-lifetime' => 604800, # 1 week in seconds
# Ordering policy
'yarn.scheduler.capacity.root.gpus.ordering-policy' => 'fifo',
'yarn.scheduler.capacity.root.launchers.ordering-policy' => 'fair',
'yarn.scheduler.capacity.root.default.ordering-policy' => 'fair',
'yarn.scheduler.capacity.root.production.ordering-policy' => 'fair',
'yarn.scheduler.capacity.root.essential.ordering-policy' => 'fair',
# Labels
# https://hadoop.apache.org/docs/r2.10.0/hadoop-yarn/hadoop-yarn-site/NodeLabel.html
# Only one label can be assigned to every node, by default ending up in the DEFAULT_PARTITION.
# When a label is assigned, it creates a partition between the nodes, and the Capacity scheduler
# settings gets "duplicated" (so all the queues, etc..). In this case we want just one queue to
# use the GPU label, so we concentrate all the capacity to it.
'yarn.scheduler.capacity.root.accessible-node-labels' => 'GPU',
'yarn.scheduler.capacity.root.accessible-node-labels.GPU.capacity' => '100',
'yarn.scheduler.capacity.root.gpus.accessible-node-labels' => 'GPU',
'yarn.scheduler.capacity.root.gpus.accessible-node-labels.GPU.capacity' => '100',
# ACLs
# Permissions cannot be reduced on the lower layer of the tree once set for a specific
# queue, they can only be incremented.
# Note: permissions values are in the form 'users groups'. If no user is specified but a
# group is, the value should start with a space
'yarn.scheduler.capacity.root.acl_submit_applications' => ' ',
'yarn.scheduler.capacity.root.acl_administer_queue' => ' ',
# Allow any from analytics-privatedata-users group to use GPUs
'yarn.scheduler.capacity.root.gpus.acl_submit_applications' => ' analytics-privatedata-users',
'yarn.scheduler.capacity.root.gpus.acl_administer_queue' => ' analytics-privatedata-users',
# same settings as the production queue
'yarn.scheduler.capacity.root.launchers.acl_submit_applications' => 'analytics,analytics-platform-eng,analytics-research,druid,analytics-search,analytics-product,analytics-wmde',
'yarn.scheduler.capacity.root.launchers.acl_administer_queue' => '%user analytics-admins,analytics-platform-eng-admins,analytics-research-admins,analytics-search-users,analytics-product-users,airflow-wmde-admins',
'yarn.scheduler.capacity.root.default.acl_submit_applications' => ' analytics-privatedata-users',
'yarn.scheduler.capacity.root.default.acl_administer_queue' => ' analytics-privatedata-users',
'yarn.scheduler.capacity.root.production.acl_submit_applications' => 'analytics,analytics-platform-eng,analytics-research,druid,analytics-search,analytics-product,analytics-wmde',
# '%user' below refers to the submitter of the application/job. Thus, the submitter can manage/kill their own jobs in production.
# Additionaly, any member from the group list can manage/kill any job in production
'yarn.scheduler.capacity.root.production.acl_administer_queue' => '%user analytics-admins,analytics-platform-eng-admins,analytics-research-admins,analytics-search-users,analytics-product-users,airflow-wmde-admins',
'yarn.scheduler.capacity.root.essential.acl_submit_applications' => 'analytics,druid',
'yarn.scheduler.capacity.root.essential.acl_administer_queue' => ' analytics-admins',
# Preemption
'yarn.scheduler.capacity.root.essential.disable_preemption' => true,
# Application-master ratio override
# The launchers queue will be used to run very small jobs (application-master only)
# We don't limit the ratio application-master/container to allow many jobs
# to be run at the same time in this queue
'yarn.scheduler.capacity.root.launchers.maximum-am-resource-percent' => 1,
}
$scheduler_settings = $base_settings + $extra_settings
class { 'bigtop::hadoop::yarn::capacity_scheduler':
scheduler_settings => $scheduler_settings,
}
}
|