Skip to content

Commit

Permalink
Make searches 10% faster
Browse files Browse the repository at this point in the history
Demo:

  go test -benchmem -run='^$' -bench=BenchmarkHighlightedSearch . ./...

We got here by avoiding allocations while splitting ANSI escape
sequences into numbers.
  • Loading branch information
walles committed Dec 3, 2023
1 parent d8055e7 commit 09ff042
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 38 deletions.
45 changes: 24 additions & 21 deletions m/ansiTokenizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -423,14 +423,17 @@ type _StyledString struct {
Style twin.Style
}

func splitIntoNumbers(s string) ([]uint, error) {
// "5" gives us the best numbers from BenchmarkHighlightedSearch. Higher
// gives us larger memory allocations for no extra performance, lower gives
// us more memory allocations and lower performance.
//
// To repro the tuning:
// go test -benchmem -run='^$' -bench=BenchmarkHighlightedSearch . ./...
numbers := make([]uint, 0, 5)
// To avoid allocations, our caller is expected to provide us with a
// pre-allocated numbersBuffer for storing the result.
//
// This function is part of the hot code path while searching, so we want it to
// be fast.
//
// # Benchmarking instructions
//
// go test -benchmem -run='^$' -bench=BenchmarkHighlightedSearch . ./...
func splitIntoNumbers(s string, numbersBuffer []uint) ([]uint, error) {
numbers := numbersBuffer[:0]

afterLastSeparator := 0
for i, char := range s {
Expand Down Expand Up @@ -474,22 +477,22 @@ func splitIntoNumbers(s string) ([]uint, error) {

// rawUpdateStyle parses a string of the form "33m" into changes to style. This
// is what comes after ESC[ in an ANSI SGR sequence.
func rawUpdateStyle(style twin.Style, escapeSequenceWithoutHeader string) (twin.Style, error) {
func rawUpdateStyle(style twin.Style, escapeSequenceWithoutHeader string, numbersBuffer []uint) (twin.Style, []uint, error) {
if len(escapeSequenceWithoutHeader) == 0 {
return style, fmt.Errorf("empty escape sequence, expected at least an ending letter")
return style, numbersBuffer, fmt.Errorf("empty escape sequence, expected at least an ending letter")
}
if escapeSequenceWithoutHeader[len(escapeSequenceWithoutHeader)-1] != 'm' {
return style, fmt.Errorf("escape sequence does not end with 'm': %s", escapeSequenceWithoutHeader)
return style, numbersBuffer, fmt.Errorf("escape sequence does not end with 'm': %s", escapeSequenceWithoutHeader)
}

numbers, err := splitIntoNumbers(escapeSequenceWithoutHeader[:len(escapeSequenceWithoutHeader)-1])
numbersBuffer, err := splitIntoNumbers(escapeSequenceWithoutHeader[:len(escapeSequenceWithoutHeader)-1], numbersBuffer)
if err != nil {
return style, fmt.Errorf("splitIntoNumbers: %w", err)
return style, numbersBuffer, fmt.Errorf("splitIntoNumbers: %w", err)
}

index := 0
for index < len(numbers) {
number := numbers[index]
for index < len(numbersBuffer) {
number := numbersBuffer[index]
index++
switch number {
case 0:
Expand Down Expand Up @@ -542,9 +545,9 @@ func rawUpdateStyle(style twin.Style, escapeSequenceWithoutHeader string) (twin.
case 38:
var err error
var color *twin.Color
index, color, err = consumeCompositeColor(numbers, index-1)
index, color, err = consumeCompositeColor(numbersBuffer, index-1)
if err != nil {
return style, fmt.Errorf("Foreground: %w", err)
return style, numbersBuffer, fmt.Errorf("Foreground: %w", err)
}
style = style.Foreground(*color)
case 39:
Expand All @@ -570,9 +573,9 @@ func rawUpdateStyle(style twin.Style, escapeSequenceWithoutHeader string) (twin.
case 48:
var err error
var color *twin.Color
index, color, err = consumeCompositeColor(numbers, index-1)
index, color, err = consumeCompositeColor(numbersBuffer, index-1)
if err != nil {
return style, fmt.Errorf("Background: %w", err)
return style, numbersBuffer, fmt.Errorf("Background: %w", err)
}
style = style.Background(*color)
case 49:
Expand Down Expand Up @@ -619,11 +622,11 @@ func rawUpdateStyle(style twin.Style, escapeSequenceWithoutHeader string) (twin.
style = style.Background(twin.NewColor16(15))

default:
return style, fmt.Errorf("Unrecognized ANSI SGR code <%d>", number)
return style, numbersBuffer, fmt.Errorf("Unrecognized ANSI SGR code <%d>", number)
}
}

return style, nil
return style, numbersBuffer, nil
}

func joinUints(ints []uint) string {
Expand Down
2 changes: 1 addition & 1 deletion m/ansiTokenizer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ func TestConsumeCompositeColorIncomplete24Bit(t *testing.T) {
}

func TestRawUpdateStyle(t *testing.T) {
numberColored, err := rawUpdateStyle(twin.StyleDefault, "33m")
numberColored, _, err := rawUpdateStyle(twin.StyleDefault, "33m", make([]uint, 0))
assert.NilError(t, err)
assert.Equal(t, numberColored, twin.StyleDefault.Foreground(twin.NewColor16(3)))
}
Expand Down
36 changes: 20 additions & 16 deletions m/styledStringSplitter.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ func (s *styledStringSplitter) lastChar() rune {
}

func (s *styledStringSplitter) run() {
var numbersBuffer []uint
char := s.nextChar()
for {
if char == -1 {
Expand All @@ -81,7 +82,8 @@ func (s *styledStringSplitter) run() {

if char == esc {
escIndex := s.previousByteIndex
err := s.handleEscape()
var err error
numbersBuffer, err = s.handleEscape(numbersBuffer)
if err != nil {
header := ""
if s.lineNumberOneBased != nil {
Expand Down Expand Up @@ -114,25 +116,25 @@ func (s *styledStringSplitter) handleRune(char rune) {
s.inProgressString.WriteRune(char)
}

func (s *styledStringSplitter) handleEscape() error {
func (s *styledStringSplitter) handleEscape(numbersBuffer []uint) ([]uint, error) {
char := s.nextChar()
if char == '[' || char == ']' {
// Got the start of a CSI or an OSC sequence
return s.consumeControlSequence(char)
return s.consumeControlSequence(char, numbersBuffer)
}

return fmt.Errorf("Unhandled Fe sequence ESC%c", char)
return numbersBuffer, fmt.Errorf("Unhandled Fe sequence ESC%c", char)
}

func (s *styledStringSplitter) consumeControlSequence(charAfterEsc rune) error {
func (s *styledStringSplitter) consumeControlSequence(charAfterEsc rune, numbersBuffer []uint) ([]uint, error) {
// Points to right after "ESC["
startIndex := s.nextByteIndex

// We're looking for a letter to end the CSI sequence
for {
char := s.nextChar()
if char == -1 {
return fmt.Errorf("Line ended in the middle of a control sequence")
return numbersBuffer, fmt.Errorf("Line ended in the middle of a control sequence")
}

// Range from here:
Expand All @@ -142,47 +144,49 @@ func (s *styledStringSplitter) consumeControlSequence(charAfterEsc rune) error {

if charAfterEsc == ']' && s.input[startIndex:s.nextByteIndex] == "8;;" {
// Special case, here comes the URL
return s.handleUrl()
return numbersBuffer, s.handleUrl()
}

continue
}

// The end, handle what we got
endIndexExclusive := s.nextByteIndex
return s.handleCompleteControlSequence(charAfterEsc, s.input[startIndex:endIndexExclusive])
return s.handleCompleteControlSequence(charAfterEsc, s.input[startIndex:endIndexExclusive], numbersBuffer)
}
}

// If the whole CSI sequence is ESC[33m, you should call this function with just
// "33m".
func (s *styledStringSplitter) handleCompleteControlSequence(charAfterEsc rune, sequence string) error {
func (s *styledStringSplitter) handleCompleteControlSequence(charAfterEsc rune, sequence string, numbersBuffer []uint) ([]uint, error) {
if charAfterEsc == ']' {
return s.handleOsc(sequence)
return numbersBuffer, s.handleOsc(sequence)
}

if charAfterEsc != '[' {
return fmt.Errorf("Unexpected charAfterEsc: %c", charAfterEsc)
return numbersBuffer, fmt.Errorf("Unexpected charAfterEsc: %c", charAfterEsc)
}

if sequence == "K" || sequence == "0K" {
// Clear to end of line
s.trailer = s.inProgressStyle
return nil
return numbersBuffer, nil
}

lastChar := sequence[len(sequence)-1]
if lastChar == 'm' {
newStyle, err := rawUpdateStyle(s.inProgressStyle, sequence)
var newStyle twin.Style
var err error
newStyle, numbersBuffer, err = rawUpdateStyle(s.inProgressStyle, sequence, numbersBuffer)
if err != nil {
return err
return numbersBuffer, err
}

s.startNewPart(newStyle)
return nil
return numbersBuffer, nil
}

return fmt.Errorf("Unhandled CSI type %q", lastChar)
return numbersBuffer, fmt.Errorf("Unhandled CSI type %q", lastChar)
}

func (s *styledStringSplitter) handleOsc(sequence string) error {
Expand Down

0 comments on commit 09ff042

Please sign in to comment.