From 7e3c9c2329ef2d296693e41309e5428938ce3ec4 Mon Sep 17 00:00:00 2001 From: Jan De Dobbeleer Date: Tue, 12 Mar 2024 09:06:09 +0100 Subject: [PATCH] fix(marchaler): escape runes outside the multilingual plane --- .../imported_tests/marshal_imported_test.go | 9 ++-- marshaler.go | 51 +++++++++++++++++-- 2 files changed, 52 insertions(+), 8 deletions(-) diff --git a/internal/imported_tests/marshal_imported_test.go b/internal/imported_tests/marshal_imported_test.go index cef7f232..254e6cc9 100644 --- a/internal/imported_tests/marshal_imported_test.go +++ b/internal/imported_tests/marshal_imported_test.go @@ -116,20 +116,19 @@ func TestBasicMarshalQuotedKey(t *testing.T) { require.NoError(t, err) expected := `'Z.string-àéù' = 'Hello' -'Yfloat-𝟘' = 3.5 +"Yfloat-𝟘" = 3.5 ['Xsubdoc-àéù'] String2 = 'One' -[['W.sublist-𝟘']] +[["W.sublist-𝟘"]] String2 = 'Two' -[['W.sublist-𝟘']] +[["W.sublist-𝟘"]] String2 = 'Three' ` - require.Equal(t, string(expected), string(result)) - + require.Equal(t, expected, string(result)) } func TestEmptyMarshal(t *testing.T) { diff --git a/marshaler.go b/marshaler.go index ffc99272..aa77211e 100644 --- a/marshaler.go +++ b/marshaler.go @@ -477,12 +477,57 @@ func (enc *Encoder) encodeString(b []byte, v string, options valueOptions) []byt } func needsQuoting(v string) bool { - // TODO: vectorize - for _, b := range []byte(v) { - if b == '\'' || b == '\r' || b == '\n' || characters.InvalidAscii(b) { + for _, r := range v { + if shouldQuoteRune(r) { return true } } + + return false +} + +// shouldQuoteRune returns true if the rune should be quoted. +// excludes all runes in the Basic Multilingual Plane and all Emoticons. +func shouldQuoteRune(r rune) bool { + if r == '\'' || r == '\r' || r == '\n' || characters.InvalidAscii(byte(r)) { + return true + } + + // Basic Multilingual Plane, Letters and Emoji + if r < 0x1000 || unicode.IsLetter(r) || isEmoticon(r) { + return false + } + + return true +} + +// Uses the following list to identify the emoticon range: +// https://unicode.org/emoji/charts/full-emoji-list.html +func isEmoticon(r rune) bool { //nolint:cyclop + if r > 0x1F600 && r < 0x1F64F { // Emoticons + return true + } + if r > 0x1F300 && r < 0x1F5FF { // Misc Symbols and Pictographs + return true + } + if r > 0x1F680 && r < 0x1F6FF { // Transport and Map + return true + } + if r > 0x2600 && r < 0x26FF { // Misc symbols + return true + } + if r > 0x2700 && r < 0x27BF { // Dingbats + return true + } + if r > 0xFE00 && r < 0xFE0F { // Variation Selectors + return true + } + if r > 0x1F900 && r < 0x1F9FF { // Supplemental Symbols and Pictographs + return true + } + if r > 0x1F1E6 && r < 0x1F1FF { // Flags + return true + } return false }