From afe0c480afc76642753c307e08f31d474871a298 Mon Sep 17 00:00:00 2001 From: Ayman Bagabas Date: Thu, 30 May 2024 15:56:59 -0400 Subject: [PATCH] Detect if grapheme is an emoji This adds the necessary logic to detect if a grapheme cluster is an emoji based on @rivo's [comment](https://github.com/rivo/uniseg/issues/27#issuecomment-1368204010) Fixes: https://github.com/rivo/uniseg/issues/27 --- grapheme.go | 52 ++++++++++++++++++++++++++++++++++++++++++++++++ grapheme_test.go | 22 ++++++++++++++++++++ 2 files changed, 74 insertions(+) diff --git a/grapheme.go b/grapheme.go index 1c17c27..cf94215 100644 --- a/grapheme.go +++ b/grapheme.go @@ -103,6 +103,11 @@ func (g *Graphemes) Bytes() []byte { return []byte(g.cluster) } +// IsEmoji returns true if the current grapheme cluster is an emoji. +func (g *Graphemes) IsEmoji() bool { + return IsGraphemeClusterInStringEmoji(g.cluster, g.Width()) +} + // Positions returns the interval of the current grapheme cluster as byte // positions into the original string. The first returned value "from" indexes // the first byte and the second returned value "to" indexes the first byte that @@ -343,3 +348,50 @@ func FirstGraphemeClusterInString(str string, state int) (cluster, rest string, } } } + +const ( + regionalIndicatorA = 0x1F1E6 + regionalIndicatorZ = 0x1F1FF +) + +// IsGraphemeClusterEmoji returns true if the given byte slice grapheme cluster +// and width is an emoji according to the Unicode Standard Annex #51, Unicode +// Emoji. +func IsGraphemeClusterEmoji(cluster []byte, width int) bool { + return isGraphemeClusterEmoji(cluster, utf8.DecodeRune, width) +} + +// IsGraphemeClusterInStringEmoji is like [IsGraphemeClusterEmoji] but its input +// is a string. +func IsGraphemeClusterInStringEmoji(cluster string, width int) bool { + return isGraphemeClusterEmoji(cluster, utf8.DecodeRuneInString, width) +} + +func isGraphemeClusterEmoji[C []byte | string, F func(C) (rune, int)](cluster C, fn F, width int) bool { + if width != 2 { + return false + } + + r, rw := fn(cluster) + if r == vs16 { + return true + } + if r >= regionalIndicatorA && r <= regionalIndicatorZ { + return true + } + if propertyGraphemes(r) == prExtendedPictographic && + property(emojiPresentation, r) == prEmojiPresentation { + return true + } + + cluster = cluster[rw:] + for len(cluster) > 0 { + r, rw := fn(cluster) + if r == vs16 { + return true + } + cluster = cluster[rw:] + } + + return false +} diff --git a/grapheme_test.go b/grapheme_test.go index 988aebe..aa4d065 100644 --- a/grapheme_test.go +++ b/grapheme_test.go @@ -510,6 +510,28 @@ func TestGraphemesFunctionString(t *testing.T) { } } +func TestIsGraphemeClusterEmoji(t *testing.T) { + testCases := []struct { + cluster string + width int + expected bool + }{ + {"πŸ‘‹", 2, true}, + {"a", 1, false}, + {"ε’ͺ", 2, false}, + {"ΨΆ", 1, false}, + {"πŸ‡©πŸ‡ͺ", 2, true}, + {"πŸ‘¨πŸΏβ€πŸŒΎ", 2, true}, + {"πŸ³οΈβ€πŸŒˆ", 2, true}, + {"☺️", 2, true}, // White smiling face (with variation selector 16 = emoji presentation) + } + for index, testCase := range testCases { + if result := IsGraphemeClusterEmoji([]byte(testCase.cluster), testCase.width); result != testCase.expected { + t.Errorf(`Test case %d %q failed: Expected %t, got %t`, index, testCase.cluster, testCase.expected, result) + } + } +} + // Benchmark the use of the Graphemes class. func BenchmarkGraphemesClass(b *testing.B) { for i := 0; i < b.N; i++ {