Configuration

stream_manager is configurable via the following yaml settings. All options can be overwritten at cli. See python your_app.py -h for more details.

# A context manager that wraps the setup of a Flink stream pipeline
stream_manager:

  # The name of the streaming job. Will be used in UI and metric names. (required, type: str)
  job_name:

  # SourceDescriptor.  Will be used to instantiate
  # and then read from sources
  source:

    #   (required, type: str)
    connector:

    #   (required, type: str)
    stream:

    #   (type: Optional[str], default: null)
    name:

    #   (type: Dict[Any, Any], default: {})
    options: {}

  # SinkDescriptor configuration.  Will be used to instantiate
  # and write the final output. The shape of the final output
  # must conform to sink.stream's event schema
  sink:

    #   (required, type: str)
    connector:

    #   (required, type: str)
    stream:

    #   (type: Optional[str], default: null)
    name:

    #   (type: Dict[Any, Any], default: {})
    options: {}

  # Error SinkDescriptor config.  If set, any errors encountered
  # during processing will be written to this sink.
  # You must ensure that the error stream is declared in Event Stream Config.

  # The default is a dummy "null" SinkDescriptor connector,
  # which will cause no error sink to be used.
  # In this case, errors will be raised to the caller
  error_sink:

    #   (type: str, default: null)
    connector: 'null'

    #   (type: str, default: stream.error:2.1.0)
    stream: stream.error:2.1.0

    #   (type: Optional[str], default: null__stream.error)
    name: null__stream.error

    #   (type: Dict[Any, Any], default: {})
    options: {}

  # Event schema repository URIs.  Event schemas will be loaded from these.
  # Either a list or a csv. (type: Union[Sequence[~UriAbsolute], ~UriAbsolute], default: ('https://schema.wikimedia.org/repositories/primary/jsonschema', 'https://schema.wikimedia.org/repositories/secondary/jsonschema'))
  schema_uris:
  - https://schema.wikimedia.org/repositories/primary/jsonschema
  - https://schema.wikimedia.org/repositories/secondary/jsonschema

  # URI from which to get stream config.
  # See org.wikimedia.eventutilities.core.event.EventStreamConfig (type: ~UriAbsolute, default: https://meta.wikimedia.org/w/api.php)
  stream_config_uri: https://meta.wikimedia.org/w/api.php

  # A mapping of source hostnames to dest hostnames.  If an HTTP request is to made
  # to a source hostname, the request will be sent to dest hostname but with the
  # Host header set to source hostname.  Used in WMF production environments
  # where we need to use discovery MediaWiki API endpoints, e.g. api-ro.discovery.wmnet (type: Optional[Dict[str, str]], default: null)
  http_client_routes:

  # Max number of thread workers to use when using async
  # stream.process with a python function. (type: int, default: 12)
  process_max_workers_default: 12

  # Batch size to use when using async stream.process with a python function. (type: int, default: 12)
  process_batch_size_default: 12

  # Batch duration in ms when using async stream.process with a python function. (type: int, default: 120000)
  process_batch_duration_ms_default: 120000

# Log configuration in dictionary format. (type: Optional[dict], default: null)
log_config: