From 1d4057c20907b7d263d6f2b8cb4350a024859dfe Mon Sep 17 00:00:00 2001 From: Junegunn Choi Date: Sun, 14 Aug 2016 00:39:44 +0900 Subject: [perf] Avoid allocating rune array for ascii string In the best case (all ascii), this reduces the memory footprint by 60% and the response time by 15% to 20%. In the worst case (every line has non-ascii characters), 3 to 4% overhead is observed. --- src/util/chars.go | 113 +++++++++++++++++++++++++++++++++++++++++++++++++ src/util/chars_test.go | 36 ++++++++++++++++ src/util/util.go | 29 ------------- 3 files changed, 149 insertions(+), 29 deletions(-) create mode 100644 src/util/chars.go create mode 100644 src/util/chars_test.go (limited to 'src/util') diff --git a/src/util/chars.go b/src/util/chars.go new file mode 100644 index 00000000..25a15ddd --- /dev/null +++ b/src/util/chars.go @@ -0,0 +1,113 @@ +package util + +import ( + "unicode/utf8" +) + +type Chars struct { + runes []rune + bytes []byte +} + +// ToChars converts byte array into rune array +func ToChars(bytea []byte) Chars { + var runes []rune + ascii := true + numBytes := len(bytea) + for i := 0; i < numBytes; { + if bytea[i] < utf8.RuneSelf { + if !ascii { + runes = append(runes, rune(bytea[i])) + } + i++ + } else { + if ascii { + ascii = false + runes = make([]rune, i, numBytes) + for j := 0; j < i; j++ { + runes[j] = rune(bytea[j]) + } + } + r, sz := utf8.DecodeRune(bytea[i:]) + i += sz + runes = append(runes, r) + } + } + if ascii { + return Chars{bytes: bytea} + } + return Chars{runes: runes} +} + +func RunesToChars(runes []rune) Chars { + return Chars{runes: runes} +} + +func (chars *Chars) Get(i int) rune { + if chars.runes != nil { + return chars.runes[i] + } + return rune(chars.bytes[i]) +} + +func (chars *Chars) Length() int { + if chars.runes != nil { + return len(chars.runes) + } + return len(chars.bytes) +} + +// TrimLength returns the length after trimming leading and trailing whitespaces +func (chars *Chars) TrimLength() int { + var i int + len := chars.Length() + for i = len - 1; i >= 0; i-- { + char := chars.Get(i) + if char != ' ' && char != '\t' { + break + } + } + // Completely empty + if i < 0 { + return 0 + } + + var j int + for j = 0; j < len; j++ { + char := chars.Get(j) + if char != ' ' && char != '\t' { + break + } + } + return i - j + 1 +} + +func (chars *Chars) TrailingWhitespaces() int { + whitespaces := 0 + for i := chars.Length() - 1; i >= 0; i-- { + char := chars.Get(i) + if char != ' ' && char != '\t' { + break + } + whitespaces++ + } + return whitespaces +} + +func (chars *Chars) ToString() string { + if chars.runes != nil { + return string(chars.runes) + } + return string(chars.bytes) +} + +func (chars *Chars) ToRunes() []rune { + if chars.runes != nil { + return chars.runes + } + runes := make([]rune, len(chars.bytes)) + for idx, b := range chars.bytes { + runes[idx] = rune(b) + } + return runes +} diff --git a/src/util/chars_test.go b/src/util/chars_test.go new file mode 100644 index 00000000..e42cfb7c --- /dev/null +++ b/src/util/chars_test.go @@ -0,0 +1,36 @@ +package util + +import "testing" + +func TestToCharsNil(t *testing.T) { + bs := Chars{bytes: []byte{}} + if bs.bytes == nil || bs.runes != nil { + t.Error() + } + rs := RunesToChars([]rune{}) + if rs.bytes != nil || rs.runes == nil { + t.Error() + } +} + +func TestToCharsAscii(t *testing.T) { + chars := ToChars([]byte("foobar")) + if chars.ToString() != "foobar" || chars.runes != nil { + t.Error() + } +} + +func TestCharsLength(t *testing.T) { + chars := ToChars([]byte("\tabc한글 ")) + if chars.Length() != 8 || chars.TrimLength() != 5 { + t.Error() + } +} + +func TestCharsToString(t *testing.T) { + text := "\tabc한글 " + chars := ToChars([]byte(text)) + if chars.ToString() != text { + t.Error() + } +} diff --git a/src/util/util.go b/src/util/util.go index 4f3d409d..90cc28b4 100644 --- a/src/util/util.go +++ b/src/util/util.go @@ -7,7 +7,6 @@ import ( "os" "os/exec" "time" - "unicode/utf8" ) // Max returns the largest integer @@ -84,34 +83,6 @@ func IsTty() bool { return int(C.isatty(C.int(os.Stdin.Fd()))) != 0 } -// TrimRight returns rune array with trailing white spaces cut off -func TrimRight(runes []rune) []rune { - var i int - for i = len(runes) - 1; i >= 0; i-- { - char := runes[i] - if char != ' ' && char != '\t' { - break - } - } - return runes[0 : i+1] -} - -// BytesToRunes converts byte array into rune array -func BytesToRunes(bytea []byte) []rune { - runes := make([]rune, 0, len(bytea)) - for i := 0; i < len(bytea); { - if bytea[i] < utf8.RuneSelf { - runes = append(runes, rune(bytea[i])) - i++ - } else { - r, sz := utf8.DecodeRune(bytea[i:]) - i += sz - runes = append(runes, r) - } - } - return runes -} - // TrimLen returns the length of trimmed rune array func TrimLen(runes []rune) int { var i int -- cgit v1.2.3