Skip to content
This repository has been archived by the owner on Oct 7, 2022. It is now read-only.

Commit

Permalink
feat(consumer): add batch send deadline
Browse files Browse the repository at this point in the history
  • Loading branch information
ggml1 committed Apr 23, 2021
1 parent 0194b1f commit 7a872fd
Show file tree
Hide file tree
Showing 5 changed files with 28 additions and 5 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ To create new injectors for your topics, you should create a new kubernetes depl
- `ELASTICSEARCH_DISABLE_SNIFFING` if set to "true", the client will not sniff Elasticsearch nodes during the node discovery process. Defaults to false. **OPTIONAL**
- `KAFKA_CONSUMER_CONCURRENCY` Number of parallel goroutines working as a consumer. Default value is 1 **OPTIONAL**
- `KAFKA_CONSUMER_BATCH_SIZE` Number of records to accumulate before sending them to Elasticsearch (for each goroutine). Default value is 100 **OPTIONAL**
- `KAFKA_CONSUMER_BATCH_DEADLINE` If no new records are added to the batch after this time duration, the batch will be sent to Elasticsearch. Default value is 1m **OPTIONAL**
- `ES_INDEX_COLUMN` Record field to append to index name. Ex: to create one ES index per campaign, use "campaign_id" here **OPTIONAL**
- `ES_BLACKLISTED_COLUMNS` Comma separated list of record fields to filter before sending to Elasticsearch. Defaults to empty string. **OPTIONAL**
- `ES_DOC_ID_COLUMN` Record field to be the document ID of Elasticsearch. Defaults to "kafkaRecordPartition:kafkaRecordOffset". **OPTIONAL**
Expand Down
1 change: 1 addition & 0 deletions cmd/injector.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ func main() {
ConsumerGroup: os.Getenv("KAFKA_CONSUMER_GROUP"),
Concurrency: os.Getenv("KAFKA_CONSUMER_CONCURRENCY"),
BatchSize: os.Getenv("KAFKA_CONSUMER_BATCH_SIZE"),
BatchDeadline: os.Getenv("KAFKA_CONSUMER_BATCH_DEADLINE"),
BufferSize: os.Getenv("KAFKA_CONSUMER_BUFFER_SIZE"),
MetricsUpdateInterval: os.Getenv("KAFKA_CONSUMER_METRICS_UPDATE_INTERVAL"),
RecordType: os.Getenv("KAFKA_CONSUMER_RECORD_TYPE"),
Expand Down
6 changes: 6 additions & 0 deletions src/injector/injector.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@ func MakeKafkaConsumer(endpoints Endpoints, logger log.Logger, schemaRegistry *s
level.Warn(logger).Log("err", err, "message", "failed to get consumer batch size")
batchSize = 100
}
batchDeadline, err := time.ParseDuration(kafkaConfig.BatchDeadline)
if err != nil {
level.Warn(logger).Log("err", err, "message", "failed to get consumer batch deadline")
batchDeadline = time.Minute
}
metricsUpdateInterval, err := time.ParseDuration(kafkaConfig.MetricsUpdateInterval)
if err != nil {
level.Warn(logger).Log("err", err, "message", "failed to get consumer metrics update interval")
Expand Down Expand Up @@ -54,6 +59,7 @@ func MakeKafkaConsumer(endpoints Endpoints, logger log.Logger, schemaRegistry *s
Logger: logger,
Concurrency: concurrency,
BatchSize: batchSize,
BatchDeadline: batchDeadline,
MetricsUpdateInterval: metricsUpdateInterval,
BufferSize: bufferSize,
IncludeKey: includeKey,
Expand Down
1 change: 1 addition & 0 deletions src/kafka/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ type Config struct {
ConsumerGroup string
Concurrency string
BatchSize string
BatchDeadline string
MetricsUpdateInterval string
BufferSize string
RecordType string
Expand Down
24 changes: 19 additions & 5 deletions src/kafka/consumer.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ package kafka

import (
"context"
"os"
"errors"
"os"

"time"

Expand All @@ -12,9 +12,9 @@ import (
"github.com/go-kit/kit/endpoint"
"github.com/go-kit/kit/log"
"github.com/go-kit/kit/log/level"
e "github.com/inloco/kafka-elasticsearch-injector/src/errors"
"github.com/inloco/kafka-elasticsearch-injector/src/metrics"
"github.com/inloco/kafka-elasticsearch-injector/src/models"
e "github.com/inloco/kafka-elasticsearch-injector/src/errors"
)

type Notification int32
Expand All @@ -41,6 +41,7 @@ type Consumer struct {
Logger log.Logger
Concurrency int
BatchSize int
BatchDeadline time.Duration
MetricsUpdateInterval time.Duration
BufferSize int
IncludeKey bool
Expand Down Expand Up @@ -80,8 +81,9 @@ func (k *kafka) Start(signals chan os.Signal, notifications chan<- Notification)
defer consumer.Close()

buffSize := k.consumer.BatchSize
batchDeadline := k.consumer.BatchDeadline
for i := 0; i < concurrency; i++ {
go k.worker(consumer, buffSize, notifications)
go k.worker(consumer, buffSize, batchDeadline, notifications)
}
go func() {
for {
Expand Down Expand Up @@ -134,15 +136,24 @@ func (k *kafka) Start(signals chan os.Signal, notifications chan<- Notification)
}
}

func (k *kafka) worker(consumer *cluster.Consumer, buffSize int, notifications chan<- Notification) {
func batchDeadlineExceeded(lastReceivedMsg *time.Time, batchDeadline time.Duration) bool {
if lastReceivedMsg == nil {
return false
}

return time.Now().Sub(*lastReceivedMsg) > batchDeadline
}

func (k *kafka) worker(consumer *cluster.Consumer, buffSize int, batchDeadline time.Duration, notifications chan<- Notification) {
buf := make([]*sarama.ConsumerMessage, buffSize)
var decoded []*models.Record
var lastReceivedMsg *time.Time
idx := 0
for {
kafkaMsg := <-k.consumerCh
buf[idx] = kafkaMsg
idx++
for idx == buffSize {
if idx == buffSize || batchDeadlineExceeded(lastReceivedMsg, batchDeadline) {
if decoded == nil {
for _, msg := range buf {
req, err := k.consumer.Decoder(nil, msg, k.consumer.IncludeKey)
Expand Down Expand Up @@ -172,7 +183,10 @@ func (k *kafka) worker(consumer *cluster.Consumer, buffSize int, notifications c
consumer.MarkOffset(msg, "") // mark message as processed
}
decoded = nil
lastReceivedMsg = nil
idx = 0
} else {
*lastReceivedMsg = time.Now()
}
}
}

0 comments on commit 7a872fd

Please sign in to comment.