diff --git a/Makefile b/Makefile index 14a921e..1c173b7 100644 --- a/Makefile +++ b/Makefile @@ -1,22 +1,16 @@ -OS := $(shell uname | awk '{print tolower($$0)}') - .PHONY: generate generate: - pushd ./bench && \ - rm ./bench-data/*.nljson 2> /dev/null || true && \ - go mod download && go run ./generate.go ./logger.go -file-count 256 -line-count 4096 && \ - popd + rm -f ./bench-data/*.nljson || true + go run ./bench/generate -file-count 256 -line-count 4096 .PHONY: bench bench: - go mod download && go run ./bench/bench.go ./bench/logger.go + go run ./bench/server .PHONY: bench-file-d bench-file-d: - pushd ./file.d && \ - rm offsets || true && \ - ./file.d_$(OS) --config config.yaml && \ - popd + rm ./file.d/offsets || true + ./file.d/file.d --config ./file.d/config.yaml .PHONY: bench-fluent-bit bench-fluent-bit: @@ -24,14 +18,10 @@ bench-fluent-bit: .PHONY: bench-filebeat bench-filebeat: - pushd ./filebeat && \ - rm -r data || true && \ - ./filebeat_$(OS) -c config.yaml && \ - popd + rm -rf ./filebeat/data || true + ./filebeat/filebeat -c ./filebeat/config.yaml .PHONY: bench-vector bench-vector: - pushd ./vector && \ - rm -r ./logs || true && \ - ./vector_$(OS) --config config.toml && \ - popd + rm ./logs/checkpoints.json || true + ./vector/vector --config ./vector/config.toml diff --git a/README.md b/README.md index bd1cf8c..929724c 100644 --- a/README.md +++ b/README.md @@ -1,21 +1,25 @@ # Benchmarks -Pipeline: File > JSON > Elastic Search +Pipeline: File > JSON > Elastic Search Supported tools for benchmarking: -* File.d -* Vector -* Filebeat -Works only on MacOS. +* File.d v0.25.2 +* Vector v0.38.0 +* Filebeat 7.17.13 + +Works only on x86-64 MacOS/Linux. How to run: + +* Download a log collector binary to its folder * Run `make generate` to generate json bench data (only once) * Run `make bench` to run elasticsearch mock service * Switch to other terminal tab and run `make bench-file-d|bench-vector|bench-filebeat` (one of tool) * Look at output of elasticsearch mock service -2.6 GHz 6‑Core Intel Core i7 results: -* File.d — 251.48Mb/s -* Vector — 21.50Mb/s -* Filebeat — 64.75Mb/s +AMD Ryzen 9 5950x (4.9 GHz 16‑Core) and SSD M2 Samsung 980 PRO (read 7000 MB/s, write 5000 MB/s) results: + +* File.d — 500.48Mb/s +* Vector — 323.14Mb/s +* Filebeat — 75.42Mb/s diff --git a/bench/generate.go b/bench/generate/main.go similarity index 91% rename from bench/generate.go rename to bench/generate/main.go index 77a28c7..7a201b5 100644 --- a/bench/generate.go +++ b/bench/generate/main.go @@ -12,6 +12,7 @@ import ( "time" insaneJSON "github.com/vitkovskii/insane-json" + "go.uber.org/zap" ) var ( @@ -29,6 +30,14 @@ var ( workersCount = runtime.GOMAXPROCS(0) ) +var logger = func() *zap.SugaredLogger { + lg, err := zap.NewDevelopment() + if err != nil { + panic(err) + } + return lg.Sugar() +}() + func main() { flag.Parse() rand.Seed(time.Now().UnixNano()) // constant number is intended to repeat output @@ -43,7 +52,7 @@ func main() { jobs := make(chan int) - err = os.MkdirAll("./../bench-data", os.ModePerm) + err = os.MkdirAll("./bench-data", os.ModePerm) if err != nil { logger.Info(err.Error()) os.Exit(1) @@ -69,25 +78,19 @@ func main() { func getJSONFixtures() ([]*insaneJSON.Root, error) { jsonFixtures := make([]*insaneJSON.Root, 0, 0) - //root, err := insaneJSON.DecodeFile("./fixtures/canada.json") - //if err != nil { - // return nil, err - //} - //jsonFixtures = append(jsonFixtures, root) - // //root, err = insaneJSON.DecodeFile("./fixtures/citm.json") //if err != nil { // return nil, err //} //jsonFixtures = append(jsonFixtures, root) - root, err := insaneJSON.DecodeFile("./fixtures/twitter.json") + root, err := insaneJSON.DecodeFile("./bench/fixtures/twitter.json") if err != nil { return nil, err } jsonFixtures = append(jsonFixtures, root) - root, err = insaneJSON.DecodeFile("./fixtures/unknown.json") + root, err = insaneJSON.DecodeFile("./bench/fixtures/unknown.json") if err != nil { return nil, err } @@ -154,7 +157,7 @@ func worker(fileName string, lineCount int, fields []string, values []string, jo queue := make([]*insaneJSON.Node, 0) jsonOut := make([]byte, 0) for j := range jobs { - fullFileName := fmt.Sprintf("../bench-data/%s-%04d.nljson", fileName, j) + fullFileName := fmt.Sprintf("./bench-data/%s-%04d.nljson", fileName, j) curAvg := avgSizeTarget buf = buf[:0] @@ -221,12 +224,12 @@ func genFillNodes(root *insaneJSON.Root, node *insaneJSON.Node, fields []string, val float64 t string }{ - {0.2,"obj"}, - {0.2,"arr"}, - {0.3,"str"}, - {0.1,"int"}, - {0.1,"flt"}, - {0.1,"bool"}, + {0.2, "obj"}, + {0.2, "arr"}, + {0.3, "str"}, + {0.1, "int"}, + {0.1, "flt"}, + {0.1, "bool"}, } r := rand.Float64() @@ -234,7 +237,7 @@ func genFillNodes(root *insaneJSON.Root, node *insaneJSON.Node, fields []string, if r < x.val { return x.t } - r-=x.val + r -= x.val } return "null" diff --git a/bench/logger.go b/bench/logger.go deleted file mode 100644 index 2bdf959..0000000 --- a/bench/logger.go +++ /dev/null @@ -1,32 +0,0 @@ -package main - -import ( - "os" - - "go.uber.org/zap" - "go.uber.org/zap/zapcore" -) - -var logger = getZapLogger() - -func getZapLogger() *zap.SugaredLogger { - return zap.New( - zapcore.NewCore( - zapcore.NewConsoleEncoder(zapcore.EncoderConfig{ - // TimeKey: "ts", - LevelKey: "level", - NameKey: "Instance", - CallerKey: "caller", - MessageKey: "message", - StacktraceKey: "stacktrace", - LineEnding: zapcore.DefaultLineEnding, - EncodeLevel: zapcore.LowercaseLevelEncoder, - EncodeTime: zapcore.ISO8601TimeEncoder, - EncodeDuration: zapcore.SecondsDurationEncoder, - EncodeCaller: zapcore.ShortCallerEncoder, - }), - zapcore.AddSync(os.Stdout), - zapcore.DebugLevel, - ), - ).Sugar() -} diff --git a/bench/bench.go b/bench/server/main.go similarity index 92% rename from bench/bench.go rename to bench/server/main.go index a2969b5..10618b3 100644 --- a/bench/bench.go +++ b/bench/server/main.go @@ -10,12 +10,13 @@ import ( "time" "github.com/valyala/fasthttp" + "go.uber.org/zap" ) var ( addr = flag.String("addr", "127.0.0.1:9200", "TCP address to listen to") debug = flag.Bool("debug", false, "Verbose logging") - filebeatBulkSize = flag.Int("filebeat", 50, "Filebeat bulk_max_size") + filebeatBulkSize = flag.Int("filebeat", 1000, "Filebeat bulk_max_size") duration = flag.Duration("duration", time.Second*10, "Benchmark duration") stats = &Stats{} firstReq = true @@ -29,11 +30,19 @@ var ( PathEmpty = []byte("/") PathLicense = []byte("/_license") PathXpack = []byte("/_xpack") - PathFilebeatTemplate = []byte("/_template/filebeat-7.15.2") - PathCatFilebeatTemplate = []byte("/_cat/templates/filebeat-7.15.2") + PathFilebeatTemplate = []byte("/_template/filebeat-") + PathCatFilebeatTemplate = []byte("/_cat/templates/filebeat-") PathBulk = []byte("/_bulk") ) +var logger = func() *zap.SugaredLogger { + lg, err := zap.NewDevelopment() + if err != nil { + panic(err) + } + return lg.Sugar() +}() + func main() { flag.Parse() termChan := make(chan os.Signal, 2) @@ -42,7 +51,7 @@ func main() { srv := fasthttp.Server{ Handler: requestHandler, Name: "default", - MaxRequestBodySize: fasthttp.DefaultMaxRequestBodySize, + MaxRequestBodySize: 1024 * 1024 * 100, // 100 MiB } logger.Infof("started, waiting for first request") @@ -176,14 +185,14 @@ func requestHandler(ctx *fasthttp.RequestCtx) { "security":{"available":true,"enabled":false},"slm":{"available":true,"enabled":true},"spatial":{"available":true,"enabled":true}, "sql":{"available":true,"enabled":true},"transform":{"available":true,"enabled":true},"voting_only":{"available":true,"enabled":true}, "watcher":{"available":false,"enabled":true}},"tagline":"You know, for X"}`) - case bytes.Equal(path, PathCatFilebeatTemplate): + case bytes.HasPrefix(path, PathCatFilebeatTemplate): ctx.SetBody(filebeatTemplate) } return } // Filebeat puts template - if bytes.Equal(method, MethodPut) && bytes.Equal(path, PathFilebeatTemplate) { + if bytes.Equal(method, MethodPut) && bytes.HasPrefix(path, PathFilebeatTemplate) { filebeatTemplate = ctx.PostBody() ctx.SetBodyString("{\"took\":0,\"errors\":false}\n") return @@ -248,7 +257,7 @@ func requestHandler(ctx *fasthttp.RequestCtx) { return } - ctx.Error("Bad Request", fasthttp.StatusBadRequest) + ctx.Write([]byte("{}")) } func dumpReport(stats *Stats) { diff --git a/file.d/config.yaml b/file.d/config.yaml index a36bea1..0e5ad24 100644 --- a/file.d/config.yaml +++ b/file.d/config.yaml @@ -2,13 +2,13 @@ pipelines: benchmark: settings: decoder: json - capacity: 1024 + capacity: 2048 # more capacity = more memory usage, but more throughput input: type: file persistence_mode: async - watching_dir: ./../bench-data/ + watching_dir: ./bench-data/ filename_pattern: "*.nljson" - offsets_file: ./offsets + offsets_file: ./file.d/offsets offsets_op: reset output: type: elasticsearch diff --git a/file.d/file.d_darwin b/file.d/file.d_darwin deleted file mode 100755 index 2203f8c..0000000 Binary files a/file.d/file.d_darwin and /dev/null differ diff --git a/filebeat/config.yaml b/filebeat/config.yaml index 0988f4d..37c583d 100644 --- a/filebeat/config.yaml +++ b/filebeat/config.yaml @@ -4,11 +4,11 @@ filebeat.inputs: json.add_error_key: true json.message_key: message paths: - - ../bench-data/*.nljson + - ./bench-data/*.nljson output.elasticsearch: hosts: ["http://127.0.0.1:9200"] - bulk_max_size: 50 + bulk_max_size: 1000 logging.level: info logging.to_files: false diff --git a/filebeat/filebeat_darwin b/filebeat/filebeat_darwin deleted file mode 100755 index 1dc18c8..0000000 Binary files a/filebeat/filebeat_darwin and /dev/null differ diff --git a/vector/config.toml b/vector/config.toml index 0b03681..fb09e7d 100644 --- a/vector/config.toml +++ b/vector/config.toml @@ -2,13 +2,17 @@ data_dir = "./" [sources.logs] type = "file" - include = ["../bench-data/*.nljson"] + include = ["./bench-data/*.nljson"] data_dir = './' fingerprinting.strategy = "device_and_inode" -[transforms.logs_parse_json] - inputs = [ "logs" ] - type = "json_parser" +[transforms.json_parser] + type = "remap" + inputs = ["logs"] + drop_on_error = false + source = ''' + . |= object!(parse_json!(.message)) + ''' [sinks.elasticsearch] type = "elasticsearch" diff --git a/vector/vector_darwin b/vector/vector_darwin deleted file mode 100755 index d1ff67b..0000000 Binary files a/vector/vector_darwin and /dev/null differ