-
Notifications
You must be signed in to change notification settings - Fork 21
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: add elastic search ingester module
Signed-off-by: Martin Chodur <[email protected]>
- Loading branch information
Showing
11 changed files
with
529 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
# Elasticsearch ingester | ||
|
||
| | | | ||
|----------------|-------------------------| | ||
| `moduleName` | `elasticSearchIngester` | | ||
| Module type | `producer` | | ||
| Output event | `raw` | | ||
|
||
This module allows you to read events as a documents form Elastic search (assuming ELK stack). | ||
|
||
### Elastic search versions and support | ||
Currently, only v7 is supported. | ||
|
||
### moduleConfig | ||
```yaml | ||
addresses: | ||
- "https://foo.bar:4433" | ||
index: "*:sklik-production-search" | ||
clientCertFile: "./client.pem" | ||
clientKeyFile: "./client-key.pem" | ||
clientCaCertFile: "./ca.cert" | ||
debug: true | ||
insecureSkipVerify: false | ||
maxBatchSize: 100 | ||
interval: 5s | ||
timeout: 5s | ||
timestampField: "@timestamp" | ||
timestampFormat: "2006-01-02T15:04:05Z07:00" # See # https://www.geeksforgeeks.org/time-formatting-in-golang/ for common examples | ||
query: "app_name: nginx AND namespace: test" | ||
rawLogField: "log" | ||
rawLogParseRegexp: '^(?P<ip>[A-Fa-f0-9.:]{4,50}) \S+ \S+ \[(?P<time>.*?)\] "(?P<httpMethod>[^\s]+)?\s+(?P<httpPath>[^\?\s]+)(?P<httpQuery>[^\s]+)?\s+(?P<protocolVersion>[^\s]+)\s*" (?P<statusCode>\d+) \d+ "(?P<referer>.*?)" uag="(?P<userAgent>[^"]+)" "[^"]+" ua="[^"]+" rt="(?P<requestDuration>\d+(\.\d+)??)".*? cc="(?P<contentClass>[^"]*)".*? ignore-slo="(?P<ignoreSloHeader>[^"]*)" slo-domain="(?P<sloDomain>[^"]*)" slo-app="(?P<sloApp>[^"]*)" slo-class="(?P<sloClass>[^"]*)" slo-endpoint="(?P<sloEndpoint>[^"]*)" slo-result="(?P<sloResult>[^"]*)"' | ||
rawLogEmptyGroupRegexp: '^-$' | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
package elasticsearch_ingester | ||
|
||
import ( | ||
"context" | ||
"encoding/json" | ||
"github.com/prometheus/client_golang/prometheus" | ||
"time" | ||
) | ||
|
||
var ( | ||
elasticApiCall = prometheus.NewHistogramVec(prometheus.HistogramOpts{ | ||
Name: "elasticsearch_request_seconds", | ||
Help: "Duration histogram of elasticsearch api calls", | ||
Buckets: prometheus.ExponentialBuckets(0.1, 2, 5), | ||
}, []string{"api_version", "endpoint", "error"}) | ||
) | ||
|
||
type document struct { | ||
timestamp time.Time | ||
fields map[string]string | ||
} | ||
|
||
type elasticClient interface { | ||
RangeSearch(ctx context.Context, index, timestampField string, since time.Time, size int, query string, timeout time.Duration) ([]json.RawMessage, error) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,171 @@ | ||
package elasticsearch_ingester | ||
|
||
import ( | ||
"context" | ||
"encoding/json" | ||
"fmt" | ||
"github.com/prometheus/client_golang/prometheus" | ||
"github.com/sirupsen/logrus" | ||
"go.uber.org/atomic" | ||
"regexp" | ||
"sync" | ||
"time" | ||
|
||
tailer_module "github.com/seznam/slo-exporter/pkg/tailer" | ||
) | ||
|
||
var ( | ||
searchedDocuments = prometheus.NewCounter(prometheus.CounterOpts{ | ||
Name: "searched_documents_total", | ||
Help: "How many documents were retrieved from the elastic search", | ||
}) | ||
lastSearchTimestamp = prometheus.NewGauge(prometheus.GaugeOpts{ | ||
Name: "last_document_timestamp_seconds", | ||
Help: "Timestamp of the last processed document, next fetch will read since this timestamp", | ||
}) | ||
missingRawLogField = prometheus.NewCounter(prometheus.CounterOpts{ | ||
Name: "missing_raw_log_filed_total", | ||
Help: "How many times defined raw log wasn't found in the document", | ||
}) | ||
invalidrawLogFormat = prometheus.NewCounter(prometheus.CounterOpts{ | ||
Name: "raw_log_invalid_format_total", | ||
Help: "How many times the raw log had invalid format", | ||
}) | ||
missingTimestampField = prometheus.NewCounter(prometheus.CounterOpts{ | ||
Name: "missing_timestamp_field_total", | ||
Help: "How many times the timestamp field was missing", | ||
}) | ||
invalidTimestampFormat = prometheus.NewCounter(prometheus.CounterOpts{ | ||
Name: "invalid_timestamp_format_total", | ||
Help: "How many times the timestamp field had invalid format", | ||
}) | ||
) | ||
|
||
func newTailer(logger logrus.FieldLogger, client elasticClient, index, timestampField, timestampFormat, rawLogField string, rawLogFormatRegexp, rawLogEmptyGroupRegexp *regexp.Regexp, query string, timeout time.Duration, maxBatchSize int) tailer { | ||
return tailer{ | ||
client: client, | ||
index: index, | ||
timestampField: timestampField, | ||
timestampFormat: timestampFormat, | ||
rawLogField: rawLogField, | ||
rawLogFormatRegexp: rawLogFormatRegexp, | ||
rawLogEmptyGroupRegexp: rawLogEmptyGroupRegexp, | ||
lastTimestamp: time.Now(), | ||
lastTimestampMtx: sync.RWMutex{}, | ||
maxBatchSize: maxBatchSize, | ||
timeout: timeout, | ||
query: query, | ||
logger: logger, | ||
} | ||
} | ||
|
||
type tailer struct { | ||
client elasticClient | ||
index string | ||
timestampField string | ||
timestampFormat string | ||
rawLogField string | ||
rawLogFormatRegexp *regexp.Regexp | ||
rawLogEmptyGroupRegexp *regexp.Regexp | ||
query string | ||
lastTimestamp time.Time | ||
lastTimestampMtx sync.RWMutex | ||
maxBatchSize int | ||
timeout time.Duration | ||
logger logrus.FieldLogger | ||
|
||
processing atomic.Bool | ||
} | ||
|
||
func (t *tailer) newDocumentFromJson(data json.RawMessage) (document, error) { | ||
newDoc := document{ | ||
timestamp: time.Now(), | ||
fields: map[string]string{}, | ||
} | ||
|
||
var fields map[string]interface{} | ||
err := json.Unmarshal(data, &fields) | ||
if err != nil { | ||
return newDoc, fmt.Errorf("unable to unmarshall document body: %w", err) | ||
} | ||
for k, v := range fields { | ||
newDoc.fields[k] = fmt.Sprint(v) | ||
} | ||
|
||
if t.rawLogField != "" { | ||
rawLog, ok := newDoc.fields[t.rawLogField] | ||
if !ok { | ||
missingRawLogField.Inc() | ||
t.logger.WithField("document", newDoc.fields).Warnf("document missing the raw log field %s", t.rawLogField) | ||
} else { | ||
rawLogFields, err := tailer_module.ParseLine(t.rawLogFormatRegexp, t.rawLogEmptyGroupRegexp, rawLog) | ||
if err != nil { | ||
invalidrawLogFormat.Inc() | ||
t.logger.WithField("document", newDoc.fields).Warnf("document has invalid format of the raw log field %s", t.rawLogField) | ||
} | ||
for k, v := range rawLogFields { | ||
newDoc.fields[k] = v | ||
} | ||
} | ||
} | ||
|
||
timeFiled, ok := newDoc.fields[t.timestampField] | ||
if !ok { | ||
missingTimestampField.Inc() | ||
t.logger.WithField("document", newDoc.fields).Warnf("document missing the timestamp field %s, using now instead", t.timestampField) | ||
return newDoc, nil | ||
} else { | ||
ts, err := time.Parse(t.timestampFormat, timeFiled) | ||
if err != nil { | ||
invalidTimestampFormat.Inc() | ||
t.logger.WithField("document", newDoc.fields).WithField("timestamp", timeFiled).Warnf("document has invalid timestamp field %s, using now instead", t.timestampField) | ||
return newDoc, nil | ||
} | ||
newDoc.timestamp = ts | ||
t.lastTimestamp = ts | ||
lastSearchTimestamp.Set(float64(t.lastTimestamp.Unix())) | ||
} | ||
return newDoc, nil | ||
} | ||
|
||
func (t *tailer) run(ctx context.Context, interval time.Duration) chan document { | ||
ticker := time.NewTicker(interval) | ||
outChan := make(chan document, t.maxBatchSize) | ||
go func() { | ||
defer ticker.Stop() | ||
defer close(outChan) | ||
for { | ||
select { | ||
case <-ctx.Done(): | ||
return | ||
case <-ticker.C: | ||
if t.processing.Load() { | ||
t.logger.Warnf("skipping scheduled query") | ||
continue | ||
} | ||
t.processing.Store(true) | ||
|
||
jsonDocs, err := t.client.RangeSearch(ctx, t.index, t.timestampField, t.lastTimestamp, t.maxBatchSize, t.query, t.timeout) | ||
if err != nil { | ||
t.logger.WithFields(logrus.Fields{"error": err, "since": t.lastTimestamp}).Error("failed to search data from elastic search") | ||
continue | ||
} | ||
for _, jd := range jsonDocs { | ||
select { | ||
case <-ctx.Done(): | ||
return | ||
default: | ||
newDoc, err := t.newDocumentFromJson(jd) | ||
if err != nil { | ||
t.logger.WithFields(logrus.Fields{"error": err, "document": jd}).Errorf("failed to read document") | ||
} | ||
searchedDocuments.Inc() | ||
outChan <- newDoc | ||
} | ||
} | ||
} | ||
t.processing.Store(false) | ||
} | ||
}() | ||
return outChan | ||
} |
Oops, something went wrong.