public abstract class KafkaSource<S,D>
extends org.apache.gobblin.source.extractor.extract.EventBasedSource<S,D>
Source
implementation for Kafka source.
This is an updated copy of KafkaSource
in the gobblin-kafka-common module. This file should be deleted in favor of the upstream version when possible.
Updates are:
getFilteredTopics(org.apache.gobblin.configuration.SourceState)
method protected instead of private -- https://github.com/apache/gobblin/pull/3408Constructor and Description |
---|
KafkaSource() |
Modifier and Type | Method and Description |
---|---|
protected List<org.apache.gobblin.source.extractor.extract.kafka.KafkaTopic> |
getFilteredTopics(org.apache.gobblin.configuration.SourceState state)
Return topics to be processed filtered by job-level whitelist and blacklist.
|
List<org.apache.gobblin.source.workunit.WorkUnit> |
getWorkunits(org.apache.gobblin.configuration.SourceState state) |
protected boolean |
isTopicQualified(org.apache.gobblin.source.extractor.extract.kafka.KafkaTopic topic)
Whether a
KafkaTopic is qualified to be pulled. |
protected void |
populateClientPool(int count,
org.apache.gobblin.kafka.client.GobblinKafkaConsumerClient.GobblinKafkaConsumerClientFactory kafkaConsumerClientFactory,
com.typesafe.config.Config config) |
void |
shutdown(org.apache.gobblin.configuration.SourceState state) |
createExtract, getPreviousWorkUnitsForRetry, getPreviousWorkUnitStatesForRetry, isEarlyStopped
public static final String TOPIC_BLACKLIST
public static final String TOPIC_WHITELIST
public static final String LATEST_OFFSET
public static final String EARLIEST_OFFSET
public static final String NEAREST_OFFSET
public static final String OFFSET_LOOKBACK
public static final String BOOTSTRAP_WITH_OFFSET
public static final String KAFKA_OFFSET_LOOKBACK
public static final String DEFAULT_BOOTSTRAP_WITH_OFFSET
public static final String TOPICS_MOVE_TO_LATEST_OFFSET
public static final String RESET_ON_OFFSET_OUT_OF_RANGE
public static final String DEFAULT_RESET_ON_OFFSET_OUT_OF_RANGE
public static final String TOPIC_NAME
public static final String PARTITION_ID
public static final String LEADER_ID
public static final String LEADER_HOSTANDPORT
public static final org.apache.gobblin.source.workunit.Extract.TableType DEFAULT_TABLE_TYPE
public static final String DEFAULT_NAMESPACE_NAME
public static final String ALL_TOPICS
public static final String NUM_TOPIC_PARTITIONS
public static final String AVG_RECORD_MILLIS
public static final String START_FETCH_EPOCH_TIME
public static final String STOP_FETCH_EPOCH_TIME
public static final String PREVIOUS_START_FETCH_EPOCH_TIME
public static final String PREVIOUS_STOP_FETCH_EPOCH_TIME
public static final String PREVIOUS_LOW_WATERMARK
public static final String PREVIOUS_HIGH_WATERMARK
public static final String PREVIOUS_LATEST_OFFSET
public static final String OFFSET_FETCH_EPOCH_TIME
public static final String PREVIOUS_OFFSET_FETCH_EPOCH_TIME
public static final String GOBBLIN_KAFKA_CONSUMER_CLIENT_FACTORY_CLASS
public static final String GOBBLIN_KAFKA_EXTRACT_ALLOW_TABLE_TYPE_NAMESPACE_CUSTOMIZATION
public static final String DEFAULT_GOBBLIN_KAFKA_CONSUMER_CLIENT_FACTORY_CLASS
public static final String GOBBLIN_KAFKA_SHOULD_ENABLE_DATASET_STATESTORE
public static final boolean DEFAULT_GOBBLIN_KAFKA_SHOULD_ENABLE_DATASET_STATESTORE
public static final String OFFSET_FETCH_TIMER
public static final String RECORD_LEVEL_SLA_MINUTES_KEY
public static final String MAX_POSSIBLE_OBSERVED_LATENCY_IN_HOURS
public static final Integer DEFAULT_MAX_POSSIBLE_OBSERVED_LATENCY_IN_HOURS
public static final String OBSERVED_LATENCY_PRECISION
public static final Integer DEFAULT_OBSERVED_LATENCY_PRECISION
public static final String OBSERVED_LATENCY_MEASUREMENT_ENABLED
public static final Boolean DEFAULT_OBSERVED_LATENCY_MEASUREMENT_ENABLED
public static final String RECORD_CREATION_TIMESTAMP_FIELD
public static final String RECORD_CREATION_TIMESTAMP_UNIT
protected final ConcurrentLinkedQueue<org.apache.gobblin.kafka.client.GobblinKafkaConsumerClient> kafkaConsumerClientPool
protected static final ThreadLocal<org.apache.gobblin.kafka.client.GobblinKafkaConsumerClient> kafkaConsumerClient
protected com.google.common.base.Optional<org.apache.gobblin.metrics.event.lineage.LineageInfo> lineageInfo
public List<org.apache.gobblin.source.workunit.WorkUnit> getWorkunits(org.apache.gobblin.configuration.SourceState state)
protected void populateClientPool(int count, org.apache.gobblin.kafka.client.GobblinKafkaConsumerClient.GobblinKafkaConsumerClientFactory kafkaConsumerClientFactory, com.typesafe.config.Config config)
protected boolean isTopicQualified(org.apache.gobblin.source.extractor.extract.kafka.KafkaTopic topic)
KafkaTopic
is qualified to be pulled.
This method can be overridden by subclasses for verifying topic eligibility, e.g., one may want to
skip a topic if its schema cannot be found in the schema registry.protected List<org.apache.gobblin.source.extractor.extract.kafka.KafkaTopic> getFilteredTopics(org.apache.gobblin.configuration.SourceState state)
Copyright © 2021. All rights reserved.