From 5a874ae241af64368e65b667592918b3fdb17177 Mon Sep 17 00:00:00 2001 From: Charlie Vieth Date: Thu, 11 Mar 2021 05:34:50 -0500 Subject: Speed up ANSI code processing (#2368) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit speeds up the parsing/processing of ANSI escape codes by roughly 7.5x. The speedup is mostly accomplished by replacing the regex with dedicated parsing logic (nextAnsiEscapeSequence()) and reducing the number of allocations in extractColor(). #### Benchmarks ``` name old time/op new time/op delta ExtractColor-16 4.89µs ± 5% 0.64µs ± 2% -86.87% (p=0.000 n=9+9) name old speed new speed delta ExtractColor-16 25.6MB/s ± 5% 194.6MB/s ± 2% +661.43% (p=0.000 n=9+9) name old alloc/op new alloc/op delta ExtractColor-16 1.37kB ± 0% 0.31kB ± 0% -77.31% (p=0.000 n=10+10) name old allocs/op new allocs/op delta ExtractColor-16 48.0 ± 0% 4.0 ± 0% -91.67% (p=0.000 n=10+10) ``` --- src/ansi_test.go | 239 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 239 insertions(+) (limited to 'src/ansi_test.go') diff --git a/src/ansi_test.go b/src/ansi_test.go index 497eca44..36ed0c7d 100644 --- a/src/ansi_test.go +++ b/src/ansi_test.go @@ -2,12 +2,190 @@ package fzf import ( "fmt" + "math/rand" + "regexp" "strings" "testing" + "unicode/utf8" "github.com/junegunn/fzf/src/tui" ) +// The following regular expression will include not all but most of the +// frequently used ANSI sequences. This regex is used as a reference for +// testing nextAnsiEscapeSequence(). +// +// References: +// - https://github.com/gnachman/iTerm2 +// - http://ascii-table.com/ansi-escape-sequences.php +// - http://ascii-table.com/ansi-escape-sequences-vt-100.php +// - http://tldp.org/HOWTO/Bash-Prompt-HOWTO/x405.html +// - https://invisible-island.net/xterm/ctlseqs/ctlseqs.html +var ansiRegexRefence = regexp.MustCompile("(?:\x1b[\\[()][0-9;]*[a-zA-Z@]|\x1b][0-9];[[:print:]]+(?:\x1b\\\\|\x07)|\x1b.|[\x0e\x0f]|.\x08)") + +func testParserReference(t testing.TB, str string) { + t.Helper() + + toSlice := func(start, end int) []int { + if start == -1 { + return nil + } + return []int{start, end} + } + + s := str + for i := 0; ; i++ { + got := toSlice(nextAnsiEscapeSequence(s)) + exp := ansiRegexRefence.FindStringIndex(s) + + equal := len(got) == len(exp) + if equal { + for i := 0; i < len(got); i++ { + if got[i] != exp[i] { + equal = false + break + } + } + } + if !equal { + var exps, gots []rune + if len(got) == 2 { + gots = []rune(s[got[0]:got[1]]) + } + if len(exp) == 2 { + exps = []rune(s[exp[0]:exp[1]]) + } + t.Errorf("%d: %q: got: %v (%q) want: %v (%q)", i, s, got, gots, exp, exps) + return + } + if len(exp) == 0 { + return + } + s = s[exp[1]:] + } +} + +func TestNextAnsiEscapeSequence(t *testing.T) { + testStrs := []string{ + "\x1b[0mhello world", + "\x1b[1mhello world", + "椙\x1b[1m椙", + "椙\x1b[1椙m椙", + "\x1b[1mhello \x1b[mw\x1b7o\x1b8r\x1b(Bl\x1b[2@d", + "\x1b[1mhello \x1b[Kworld", + "hello \x1b[34;45;1mworld", + "hello \x1b[34;45;1mwor\x1b[34;45;1mld", + "hello \x1b[34;45;1mwor\x1b[0mld", + "hello \x1b[34;48;5;233;1mwo\x1b[38;5;161mr\x1b[0ml\x1b[38;5;161md", + "hello \x1b[38;5;38;48;5;48;1mwor\x1b[38;5;48;48;5;38ml\x1b[0md", + "hello \x1b[32;1mworld", + "hello world", + "hello \x1b[0;38;5;200;48;5;100mworld", + "\x1b椙", + "椙\x08", + "\n\x08", + "X\x08", + "", + "\x1b]4;3;rgb:aa/bb/cc\x07 ", + "\x1b]4;3;rgb:aa/bb/cc\x1b\\ ", + ansiBenchmarkString, + } + + for _, s := range testStrs { + testParserReference(t, s) + } +} + +func TestNextAnsiEscapeSequence_Fuzz_Modified(t *testing.T) { + t.Parallel() + if testing.Short() { + t.Skip("short test") + } + + testStrs := []string{ + "\x1b[0mhello world", + "\x1b[1mhello world", + "椙\x1b[1m椙", + "椙\x1b[1椙m椙", + "\x1b[1mhello \x1b[mw\x1b7o\x1b8r\x1b(Bl\x1b[2@d", + "\x1b[1mhello \x1b[Kworld", + "hello \x1b[34;45;1mworld", + "hello \x1b[34;45;1mwor\x1b[34;45;1mld", + "hello \x1b[34;45;1mwor\x1b[0mld", + "hello \x1b[34;48;5;233;1mwo\x1b[38;5;161mr\x1b[0ml\x1b[38;5;161md", + "hello \x1b[38;5;38;48;5;48;1mwor\x1b[38;5;48;48;5;38ml\x1b[0md", + "hello \x1b[32;1mworld", + "hello world", + "hello \x1b[0;38;5;200;48;5;100mworld", + ansiBenchmarkString, + } + + replacementBytes := [...]rune{'\x0e', '\x0f', '\x1b', '\x08'} + + modifyString := func(s string, rr *rand.Rand) string { + n := rr.Intn(len(s)) + b := []rune(s) + for ; n >= 0 && len(b) != 0; n-- { + i := rr.Intn(len(b)) + switch x := rr.Intn(4); x { + case 0: + b = append(b[:i], b[i+1:]...) + case 1: + j := rr.Intn(len(replacementBytes) - 1) + b[i] = replacementBytes[j] + case 2: + x := rune(rr.Intn(utf8.MaxRune)) + for !utf8.ValidRune(x) { + x = rune(rr.Intn(utf8.MaxRune)) + } + b[i] = x + case 3: + b[i] = rune(rr.Intn(utf8.MaxRune)) // potentially invalid + default: + t.Fatalf("unsupported value: %d", x) + } + } + return string(b) + } + + rr := rand.New(rand.NewSource(1)) + for _, s := range testStrs { + for i := 1_000; i >= 0; i-- { + testParserReference(t, modifyString(s, rr)) + } + } +} + +func TestNextAnsiEscapeSequence_Fuzz_Random(t *testing.T) { + t.Parallel() + + if testing.Short() { + t.Skip("short test") + } + + randomString := func(rr *rand.Rand) string { + numChars := rand.Intn(50) + codePoints := make([]rune, numChars) + for i := 0; i < len(codePoints); i++ { + var r rune + for n := 0; n < 1000; n++ { + r = rune(rr.Intn(utf8.MaxRune)) + // Allow 10% of runes to be invalid + if utf8.ValidRune(r) || rr.Float64() < 0.10 { + break + } + } + codePoints[i] = r + } + return string(codePoints) + } + + rr := rand.New(rand.NewSource(1)) + for i := 0; i < 100_000; i++ { + testParserReference(t, randomString(rr)) + } +} + func TestExtractColor(t *testing.T) { assert := func(offset ansiOffset, b int32, e int32, fg tui.Color, bg tui.Color, bold bool) { var attr tui.Attr @@ -185,3 +363,64 @@ func TestAnsiCodeStringConversion(t *testing.T) { &ansiState{attr: tui.Dim | tui.Italic, fg: 1, bg: 1}, "\x1b[2;3;7;38;2;10;20;30;48;5;100m") } + +func TestParseAnsiCode(t *testing.T) { + tests := []struct { + In, Exp string + N int + }{ + {"123", "", 123}, + {"1a", "", -1}, + {"1a;12", "12", -1}, + {"12;a", "a", 12}, + {"-2", "", -1}, + } + for _, x := range tests { + n, s := parseAnsiCode(x.In) + if n != x.N || s != x.Exp { + t.Fatalf("%q: got: (%d %q) want: (%d %q)", x.In, n, s, x.N, x.Exp) + } + } +} + +// kernel/bpf/preload/iterators/README +const ansiBenchmarkString = "\x1b[38;5;81m\x1b[01;31m\x1b[Kkernel/\x1b[0m\x1b[38;5;81mbpf/" + + "\x1b[0m\x1b[38;5;81mpreload/\x1b[0m\x1b[38;5;81miterators/" + + "\x1b[0m\x1b[38;5;149mMakefile\x1b[m\x1b[K\x1b[0m" + +func BenchmarkNextAnsiEscapeSequence(b *testing.B) { + b.SetBytes(int64(len(ansiBenchmarkString))) + for i := 0; i < b.N; i++ { + s := ansiBenchmarkString + for { + _, o := nextAnsiEscapeSequence(s) + if o == -1 { + break + } + s = s[o:] + } + } +} + +// Baseline test to compare the speed of nextAnsiEscapeSequence() to the +// previously used regex based implementation. +func BenchmarkNextAnsiEscapeSequence_Regex(b *testing.B) { + b.SetBytes(int64(len(ansiBenchmarkString))) + for i := 0; i < b.N; i++ { + s := ansiBenchmarkString + for { + a := ansiRegexRefence.FindStringIndex(s) + if len(a) == 0 { + break + } + s = s[a[1]:] + } + } +} + +func BenchmarkExtractColor(b *testing.B) { + b.SetBytes(int64(len(ansiBenchmarkString))) + for i := 0; i < b.N; i++ { + extractColor(ansiBenchmarkString, nil, nil) + } +} -- cgit v1.2.3