From d446b2836cf9baff94f8c914002fea5f5373998d Mon Sep 17 00:00:00 2001 From: Steve Willoughby Date: Fri, 6 Sep 2024 15:59:24 -0700 Subject: [PATCH 1/2] Fix zerologwriter json parser bug --- .../zerologWriter/zerolog-writer.go | 150 ++++++++++++++---- .../zerologWriter/zerolog-writer_test.go | 22 ++- 2 files changed, 140 insertions(+), 32 deletions(-) diff --git a/v3/integrations/logcontext-v2/zerologWriter/zerolog-writer.go b/v3/integrations/logcontext-v2/zerologWriter/zerolog-writer.go index 7d94ed141..11605a949 100644 --- a/v3/integrations/logcontext-v2/zerologWriter/zerolog-writer.go +++ b/v3/integrations/logcontext-v2/zerologWriter/zerolog-writer.go @@ -1,6 +1,7 @@ package zerologWriter import ( + "bytes" "context" "io" "strings" @@ -58,12 +59,15 @@ func parseJSONLogData(log []byte) newrelic.LogData { for i := 0; i < len(log)-1; { // get key; always a string field key, valStart := getKey(log, i) + if valStart < 0 { + return data + } var next int // NOTE: depending on the key, the type of field the value is can differ switch key { case zerolog.LevelFieldName: - data.Severity, next = getStringValue(log, valStart+1) + data.Severity, next = getStringValue(log, valStart) case zerolog.ErrorStackFieldName: _, next = getStackTrace(log, valStart) default: @@ -72,7 +76,7 @@ func parseJSONLogData(log []byte) newrelic.LogData { } // TODO: once we update the logging spec to support custom attributes, capture these if isStringValue(log, valStart) { - _, next = getStringValue(log, valStart+1) + _, next = getStringValue(log, valStart) } else if isNumberValue(log, valStart) { _, next = getNumberValue(log, valStart) } else { @@ -98,6 +102,9 @@ func isNumberValue(p []byte, indx int) bool { } // zerolog keys are always JSON strings +// sasdhsd"foo" +// +// foo func getKey(p []byte, indx int) (string, int) { value := strings.Builder{} i := indx @@ -111,33 +118,108 @@ func getKey(p []byte, indx int) (string, int) { } // parse value of string field + literalNext := false for ; i < len(p); i++ { - if p[i] == '"' && i+1 < len(p) && p[i+1] == ':' { - return value.String(), i + 2 + if literalNext { + value.WriteByte(p[i]) + literalNext = false + continue + } + + if p[i] == '\\' { + value.WriteByte(p[i]) + literalNext = true + continue + } + + if p[i] == '"' { + // found end of key. Now look for the colon separator + for i++; i < len(p); i++ { + if p[i] == ':' && i+1 < len(p) { + return value.String(), i + 1 + } + if p[i] != ' ' && p[i] != '\t' { + break + } + } + // Oh oh. if we got here, there wasn't a colon, or there wasn't a value after it, or + // something showed up between the end of the key and the colon that wasn't a space. + // In any of those cases, we didn't find the key of a key/value pair. + return "", -1 } else { value.WriteByte(p[i]) } } - return "", -1 } +/* func isEOL(p []byte, i int) bool { - return p[i] == '}' && i+2 == len(p) + for ; i < len(p); i++ { + if p[i] == ' ' || p[i] == '\t' { + continue + } + if p[i] == '}' { + // nothing but space to the end from here? + for i++; i < len(p); i++ { + if p[i] != ' ' && p[i] != '\t' && p[i] != '\r' && p[i] != '\n' { + return false // nope, that wasn't the end of the string + } + } + return true + } + } + return false +} +*/ + +func skipPastSpaces(p []byte, i int) int { + for ; i < len(p); i++ { + if p[i] != ' ' && p[i] != '\t' && p[i] != '\r' && p[i] != '\n' { + return i + } + } + return -1 } func getStringValue(p []byte, indx int) (string, int) { value := strings.Builder{} + // skip to start of string + i := skipPastSpaces(p, indx) + if i < 0 || i >= len(p) || p[i] != '"' { + return "", -1 + } + // parse value of string field - for i := indx; i < len(p); i++ { - if p[i] == '"' && i+1 < len(p) { - if p[i+1] == ',' && i+2 < len(p) && p[i+2] == '"' { - return value.String(), i + 2 - } else if isEOL(p, i+1) { + literalNext := false + for i++; i < len(p); i++ { + if literalNext { + value.WriteByte(p[i]) + literalNext = false + continue + } + + if p[i] == '\\' { + value.WriteByte(p[i]) + literalNext = true + continue + } + + if p[i] == '"' { + // end of string. search past the comma so we can find the following key (if any) later. + if i = skipPastSpaces(p, i+1); i < 0 { + return value.String(), -1 + } + if p[i] == ',' { + if i+1 < len(p) { + return value.String(), i + 1 + } return value.String(), -1 } + return value.String(), -1 } + value.WriteByte(p[i]) } @@ -148,35 +230,43 @@ func getNumberValue(p []byte, indx int) (string, int) { value := strings.Builder{} // parse value of string field - for i := indx; i < len(p); i++ { - if p[i] == ',' && i+1 < len(p) && p[i+1] == '"' { - return value.String(), i + 1 - } else if isEOL(p, i) { - return value.String(), -1 - } else { - value.WriteByte(p[i]) - } + i := skipPastSpaces(p, indx) + if i < 0 { + return "", -1 + } + // JSON numeric values contain digits, '.', '-', 'e' + for ; i < len(p) && bytes.IndexByte([]byte("0123456789-+eE."), p[i]) >= 0; i++ { + value.WriteByte(p[i]) } - return "", -1 + i = skipPastSpaces(p, i) + if i > 0 && i+1 < len(p) && p[i] == ',' { + return value.String(), i + 1 + } + return value.String(), -1 } func getStackTrace(p []byte, indx int) (string, int) { value := strings.Builder{} - // parse value of string field - for i := indx; i < len(p); i++ { + i := skipPastSpaces(p, indx) + if i < 0 || i >= len(p) || p[i] != '[' { + return "", -1 + } + // the stack trace is everything from '[' to the next ']'. + // TODO: this is a little naïve and we may need to consider parsing + // the data inbetween more carefully. To date, we haven't seen a case + // where that is necessary, and prefer not to impact performance of the + // system by doing the extra processing, but we can revisit that later + // if necessary. + for ; i < len(p); i++ { if p[i] == ']' { value.WriteByte(p[i]) - - if i+1 < len(p) { - if isEOL(p, i+1) { - return value.String(), -1 - } - if p[i+1] == ',' && i+2 < len(p) && p[i+2] == '"' { - return value.String(), i + 2 - } + i = skipPastSpaces(p, i) + if i > 0 && i+1 < len(p) && p[i] == ',' { + return value.String(), i + 1 } + return value.String(), -1 } else { value.WriteByte(p[i]) } diff --git a/v3/integrations/logcontext-v2/zerologWriter/zerolog-writer_test.go b/v3/integrations/logcontext-v2/zerologWriter/zerolog-writer_test.go index ab0a2f974..cc831185c 100644 --- a/v3/integrations/logcontext-v2/zerologWriter/zerolog-writer_test.go +++ b/v3/integrations/logcontext-v2/zerologWriter/zerolog-writer_test.go @@ -116,13 +116,31 @@ func TestParseLogData(t *testing.T) { }, }, { - `{"Scale":"833 cents","Interval":833.09,"time":1562212768,"message":"Fibonacci is everywhere","level":"debug"}` + "\n", + `{"Scale":"833 cents" , "Interval":833.09,"time":1562212768,"message":"Fibonacci is everywhere","level":"debug"}` + "\n", "level", newrelic.LogData{ - Message: `{"Scale":"833 cents","Interval":833.09,"time":1562212768,"message":"Fibonacci is everywhere","level":"debug"}` + "\n", + Message: `{"Scale":"833 cents" , "Interval":833.09,"time":1562212768,"message":"Fibonacci is everywhere","level":"debug"}` + "\n", Severity: "debug", }, }, + /* regression test case from issue 955 by MarioCarrion */ + { + `{"level":"info","message":"\"value\","}` + "\n", + "level", + newrelic.LogData{ + Message: `{"level":"info","message":"\"value\","}` + "\n", + Severity: "info", + }, + }, + { + `{"level":"info","message":","}` + "\n", + "level", + newrelic.LogData{ + Message: `{"level":"info","message":","}` + "\n", + Severity: "info", + }, + }, + /* end of issue 955 test case */ } for _, test := range tests { if test.levelKey != "" { From 373260ad4e0ae5e2571382463dfa084c2dbc6d57 Mon Sep 17 00:00:00 2001 From: Steve Willoughby Date: Fri, 6 Sep 2024 16:22:17 -0700 Subject: [PATCH 2/2] correction to start of object parsing --- .../zerologWriter/zerolog-writer.go | 34 +++++++++++-------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/v3/integrations/logcontext-v2/zerologWriter/zerolog-writer.go b/v3/integrations/logcontext-v2/zerologWriter/zerolog-writer.go index 11605a949..beae3e551 100644 --- a/v3/integrations/logcontext-v2/zerologWriter/zerolog-writer.go +++ b/v3/integrations/logcontext-v2/zerologWriter/zerolog-writer.go @@ -56,7 +56,12 @@ func parseJSONLogData(log []byte) newrelic.LogData { data.Message = string(log) data.Timestamp = time.Now().UnixMilli() - for i := 0; i < len(log)-1; { + i := skipPastSpaces(log, 0) + if i < 0 || i >= len(log) || log[i] != '{' { + return data + } + i++ + for i < len(log)-1 { // get key; always a string field key, valStart := getKey(log, i) if valStart < 0 { @@ -94,32 +99,31 @@ func parseJSONLogData(log []byte) newrelic.LogData { } func isStringValue(p []byte, indx int) bool { + if indx = skipPastSpaces(p, indx); indx < 0 { + return false + } return p[indx] == '"' } func isNumberValue(p []byte, indx int) bool { - return unicode.IsDigit(rune(p[indx])) + if indx = skipPastSpaces(p, indx); indx < 0 { + return false + } + // unicode.IsDigit isn't sufficient here because JSON numbers can start with a sign too + return unicode.IsDigit(rune(p[indx])) || p[indx] == '-' } // zerolog keys are always JSON strings -// sasdhsd"foo" -// -// foo func getKey(p []byte, indx int) (string, int) { value := strings.Builder{} - i := indx - - // find start of string field - for ; i < len(p); i++ { - if p[i] == '"' { - i += 1 - break - } + i := skipPastSpaces(p, indx) + if i < 0 || i >= len(p) || p[i] != '"' { + return "", -1 } // parse value of string field literalNext := false - for ; i < len(p); i++ { + for i++; i < len(p); i++ { if literalNext { value.WriteByte(p[i]) literalNext = false @@ -208,7 +212,7 @@ func getStringValue(p []byte, indx int) (string, int) { if p[i] == '"' { // end of string. search past the comma so we can find the following key (if any) later. - if i = skipPastSpaces(p, i+1); i < 0 { + if i = skipPastSpaces(p, i+1); i < 0 || i >= len(p) { return value.String(), -1 } if p[i] == ',' {