Configuration
stream_manager
is configurable via the following yaml
settings.
All options can be overwritten at cli. See python your_app.py -h
for more details.
# A context manager that wraps the setup of a Flink stream pipeline
stream_manager:
# The name of the streaming job. Will be used in UI and metric names. (required, type: str)
job_name:
# SourceDescriptor. Will be used to instantiate
# and then read from sources
source:
# (required, type: str)
connector:
# (required, type: str)
stream:
# (type: Optional[str], default: null)
name:
# (type: Dict[Any, Any], default: {})
options: {}
# SinkDescriptor configuration. Will be used to instantiate
# and write the final output. The shape of the final output
# must conform to sink.stream's event schema
sink:
# (required, type: str)
connector:
# (required, type: str)
stream:
# (type: Optional[str], default: null)
name:
# (type: Dict[Any, Any], default: {})
options: {}
# Error SinkDescriptor config. If set, any errors encountered
# during processing will be written to this sink.
# You must ensure that the error stream is declared in Event Stream Config.
# The default is a dummy "null" SinkDescriptor connector,
# which will cause no error sink to be used.
# In this case, errors will be raised to the caller
error_sink:
# (type: str, default: null)
connector: 'null'
# (type: str, default: stream.error:2.1.0)
stream: stream.error:2.1.0
# (type: Optional[str], default: null__stream.error)
name: null__stream.error
# (type: Dict[Any, Any], default: {})
options: {}
# Event schema repository URIs. Event schemas will be loaded from these.
# Either a list or a csv. (type: Union[Sequence[~UriAbsolute], ~UriAbsolute], default: ('https://schema.wikimedia.org/repositories/primary/jsonschema', 'https://schema.wikimedia.org/repositories/secondary/jsonschema'))
schema_uris:
- https://schema.wikimedia.org/repositories/primary/jsonschema
- https://schema.wikimedia.org/repositories/secondary/jsonschema
# URI from which to get stream config.
# See org.wikimedia.eventutilities.core.event.EventStreamConfig (type: ~UriAbsolute, default: https://meta.wikimedia.org/w/api.php)
stream_config_uri: https://meta.wikimedia.org/w/api.php
# A mapping of source hostnames to dest hostnames. If an HTTP request is to made
# to a source hostname, the request will be sent to dest hostname but with the
# Host header set to source hostname. Used in WMF production environments
# where we need to use discovery MediaWiki API endpoints, e.g. api-ro.discovery.wmnet (type: Optional[Dict[str, str]], default: null)
http_client_routes:
# Max number of thread workers to use when using async
# stream.process with a python function. (type: int, default: 12)
process_max_workers_default: 12
# Batch size to use when using async stream.process with a python function. (type: int, default: 12)
process_batch_size_default: 12
# Batch duration in ms when using async stream.process with a python function. (type: int, default: 120000)
process_batch_duration_ms_default: 120000
# Log configuration in dictionary format. (type: Optional[dict], default: null)
log_config: