Skip to content

Commit

Permalink
refactor: ditch consumer groups for direct consumers (#35)
Browse files Browse the repository at this point in the history
* refactor: replace consumer groups with direct consumers

* feat: make target channel buffer configurable

* fix: Fix stop at end flow

* fix: Fix filters config name while parsing

---------

Co-authored-by: Joe Paul <joeirimpan@gmail.com>
  • Loading branch information
kalbhor and joeirimpan authored Nov 26, 2024
1 parent dba537d commit 4273fc4
Show file tree
Hide file tree
Showing 7 changed files with 92 additions and 216 deletions.
5 changes: 1 addition & 4 deletions config.sample.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,6 @@ source_topic2 = "target_topic2"
[source_pool]
# Kafka client config common to all upstream sources ([[sources]]).
initial_offset = "start"
# Static memmbership to pin the member for the consumer group for respawn / reconnect and fence other members from connecting using the same id.
instance_id = "client_instance_id"
# Consumer group id.
group_id = "consumer_group"

# Frequency at which source servers are polled for health/lag.
healthcheck_interval = "3s"
Expand Down Expand Up @@ -109,6 +105,7 @@ ca_cert_path = ""
max_retries = -1
flush_batch_size = 1000
batch_size = 1000
buffer_size = 100000 # channel buffer length
max_message_bytes = 10000000

# Kafka exponential retry-backoff config for reconnection attempts.
Expand Down
12 changes: 5 additions & 7 deletions init.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,14 +92,12 @@ func initSourcePoolConfig(ko *koanf.Koanf) relay.SourcePoolCfg {
EnableBackoff: ko.Bool("source_pool.backoff_enable"),
BackoffMin: ko.MustDuration("source_pool.backoff_min"),
BackoffMax: ko.MustDuration("source_pool.backoff_max"),
GroupID: ko.MustString("source_pool.group_id"),
InstanceID: ko.MustString("source_pool.instance_id"),
}
}

func initRelayConfig(ko *koanf.Koanf) relay.RelayCfg {
return relay.RelayCfg{
StopAtEnd: ko.Bool("stop_at_end"),
StopAtEnd: ko.Bool("stop-at-end"),
}
}

Expand Down Expand Up @@ -175,10 +173,10 @@ func initTopicsMap(ko *koanf.Koanf) relay.Topics {
}

// initKafkaConfig reads the source(s)/target Kafka configuration.
func initKafkaConfig(ko *koanf.Koanf) ([]relay.ConsumerGroupCfg, relay.ProducerCfg) {
func initKafkaConfig(ko *koanf.Koanf) ([]relay.ConsumerCfg, relay.ProducerCfg) {
// Read source Kafka config.
src := struct {
Sources []relay.ConsumerGroupCfg `koanf:"sources"`
Sources []relay.ConsumerCfg `koanf:"sources"`
}{}

if err := ko.Unmarshal("", &src); err != nil {
Expand Down Expand Up @@ -237,11 +235,11 @@ func initFilters(ko *koanf.Koanf, lo *slog.Logger) (map[string]filter.Provider,
}

var cfg filter.Config
if err := ko.Unmarshal("filter."+id, &cfg); err != nil {
if err := ko.Unmarshal("filters."+id, &cfg); err != nil {
log.Fatalf("error unmarshalling filter config: %s: %v", id, err)
}
if cfg.Config == "" {
lo.Info(fmt.Sprintf("WARNING: No config 'filter.%s' for '%s' in config", id, id))
lo.Info(fmt.Sprintf("WARNING: No config 'filters.%s' for '%s' in config", id, id))
}

// Initialize the plugin.
Expand Down
5 changes: 3 additions & 2 deletions internal/relay/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ type KafkaCfg struct {
EnableLog bool `koanf:"enable_log"`
}

// ConsumerGroupCfg is the consumer group specific config.
type ConsumerGroupCfg struct {
// ConsumerCfg is the direct consumer config.
type ConsumerCfg struct {
KafkaCfg `koanf:",squash"`
}

Expand All @@ -57,6 +57,7 @@ type ProducerCfg struct {
FlushFrequency time.Duration `koanf:"flush_frequency"`
MaxMessageBytes int `koanf:"max_message_bytes"`
BatchSize int `koanf:"batch_size"`
BufferSize int `koanf:"buffer_size"`
FlushBatchSize int `koanf:"flush_batch_size"`
Compression string `koanf:"compression"` // gzip|snappy|lz4|zstd|none
}
11 changes: 6 additions & 5 deletions internal/relay/relay.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import (
)

type RelayCfg struct {
StopAtEnd bool `koanf:"stop_at_end"`
StopAtEnd bool
}

// Relay represents the input, output kafka and the remapping necessary to forward messages from one topic to another.
Expand All @@ -30,7 +30,7 @@ type Relay struct {
// If stop-at-end is enabled, the "end" offsets of the source
// read at the time of boot are cached here to compare against
// live offsets and stop consumption.
targetOffsets map[string]map[int32]kgo.Offset
targetOffsets TopicOffsets

// Live topic offsets from source.
srcOffsets map[string]map[int32]int64
Expand All @@ -42,7 +42,7 @@ type Relay struct {
func NewRelay(cfg RelayCfg, src *SourcePool, target *Target, topics Topics, filters map[string]filter.Provider, log *slog.Logger) (*Relay, error) {
// If stop-at-end is set, fetch and cache the offsets to determine
// when end is reached.
var offsets map[string]map[int32]kgo.Offset
var offsets TopicOffsets
if cfg.StopAtEnd {
if o, err := target.GetHighWatermark(); err != nil {
return nil, err
Expand Down Expand Up @@ -115,7 +115,7 @@ func (re *Relay) Start(globalCtx context.Context) error {
go func() {
defer wg.Done()
// Wait till main ctx is cancelled.
<-globalCtx.Done()
<-ctx.Done()

// Stop consumer group.
re.source.Close()
Expand All @@ -133,6 +133,7 @@ func (re *Relay) Start(globalCtx context.Context) error {
// Close producer.
re.target.Close()

cancel()
wg.Wait()

return nil
Expand Down Expand Up @@ -223,7 +224,7 @@ loop:
rec := iter.Next()
// Always record the latest offsets before the messages are processed for new connections and
// retries to consume from where it was left off.
// NOTE: What if the next step fails? The messages won't be read again?
// TODO: What if the next step fails? The messages won't be read again?
re.source.RecordOffsets(rec)

if err := re.processMessage(ctx, rec); err != nil {
Expand Down
Loading

0 comments on commit 4273fc4

Please sign in to comment.