summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJunegunn Choi <junegunn.c@gmail.com>2015-10-02 18:40:20 +0900
committerJunegunn Choi <junegunn.c@gmail.com>2015-10-02 18:40:20 +0900
commit92a75c9563600a174e9ee8334853f99ed560492a (patch)
treec65a17633ee57dbfbbafa4b351c41bbbfffa3f9f
parent7c7a30c472463e0115adcf8bc2a792b48c03bf08 (diff)
downloadfzf-92a75c9563600a174e9ee8334853f99ed560492a.tar.gz
Use trimmed length when --nth is used with --tiebreak=length
This change improves sort ordering for aligned tabular input. Given the following input: apple juice 100 apple pie 200 fzf --nth=2 will now prefer the one with pie. Before this change fzf compared "juice " and "pie ", both of which have the same length.
-rw-r--r--src/item.go19
-rw-r--r--src/pattern.go23
-rw-r--r--src/tokenizer.go5
-rw-r--r--src/tokenizer_test.go12
-rw-r--r--src/util/util.go26
-rw-r--r--src/util/util_test.go20
-rw-r--r--test/test_go.rb47
7 files changed, 124 insertions, 28 deletions
diff --git a/src/item.go b/src/item.go
index 12ca3dfb..f2f105ac 100644
--- a/src/item.go
+++ b/src/item.go
@@ -6,8 +6,8 @@ import (
"github.com/junegunn/fzf/src/curses"
)
-// Offset holds two 32-bit integers denoting the offsets of a matched substring
-type Offset [2]int32
+// Offset holds three 32-bit integers denoting the offsets of a matched substring
+type Offset [3]int32
type colorOffset struct {
offset [2]int32
@@ -43,10 +43,13 @@ func (item *Item) Rank(cache bool) Rank {
}
matchlen := 0
prevEnd := 0
+ lenSum := 0
minBegin := math.MaxUint16
for _, offset := range item.offsets {
begin := int(offset[0])
end := int(offset[1])
+ trimLen := int(offset[2])
+ lenSum += trimLen
if prevEnd > begin {
begin = prevEnd
}
@@ -65,10 +68,7 @@ func (item *Item) Rank(cache bool) Rank {
case byLength:
// It is guaranteed that .transformed in not null in normal execution
if item.transformed != nil {
- lenSum := 0
- for _, token := range item.transformed {
- lenSum += len(token.text)
- }
+ // If offsets is empty, lenSum will be 0, but we don't care
tiebreak = uint16(lenSum)
} else {
tiebreak = uint16(len(item.text))
@@ -116,7 +116,8 @@ func (item *Item) colorOffsets(color int, bold bool, current bool) []colorOffset
if len(item.colors) == 0 {
var offsets []colorOffset
for _, off := range item.offsets {
- offsets = append(offsets, colorOffset{offset: off, color: color, bold: bold})
+
+ offsets = append(offsets, colorOffset{offset: [2]int32{off[0], off[1]}, color: color, bold: bold})
}
return offsets
}
@@ -160,7 +161,7 @@ func (item *Item) colorOffsets(color int, bold bool, current bool) []colorOffset
if curr != 0 && idx > start {
if curr == -1 {
offsets = append(offsets, colorOffset{
- offset: Offset{int32(start), int32(idx)}, color: color, bold: bold})
+ offset: [2]int32{int32(start), int32(idx)}, color: color, bold: bold})
} else {
ansi := item.colors[curr-1]
fg := ansi.color.fg
@@ -180,7 +181,7 @@ func (item *Item) colorOffsets(color int, bold bool, current bool) []colorOffset
}
}
offsets = append(offsets, colorOffset{
- offset: Offset{int32(start), int32(idx)},
+ offset: [2]int32{int32(start), int32(idx)},
color: curses.PairFor(fg, bg),
bold: ansi.color.bold || bold})
}
diff --git a/src/pattern.go b/src/pattern.go
index 5466b862..f5dd8a75 100644
--- a/src/pattern.go
+++ b/src/pattern.go
@@ -6,6 +6,7 @@ import (
"strings"
"github.com/junegunn/fzf/src/algo"
+ "github.com/junegunn/fzf/src/util"
)
// fuzzy
@@ -251,9 +252,9 @@ func (p *Pattern) matchChunk(chunk *Chunk) []*Item {
matches := []*Item{}
if p.mode == ModeFuzzy {
for _, item := range *chunk {
- if sidx, eidx := p.fuzzyMatch(item); sidx >= 0 {
+ if sidx, eidx, tlen := p.fuzzyMatch(item); sidx >= 0 {
matches = append(matches,
- dupItem(item, []Offset{Offset{int32(sidx), int32(eidx)}}))
+ dupItem(item, []Offset{Offset{int32(sidx), int32(eidx), int32(tlen)}}))
}
}
} else {
@@ -269,7 +270,7 @@ func (p *Pattern) matchChunk(chunk *Chunk) []*Item {
// MatchItem returns true if the Item is a match
func (p *Pattern) MatchItem(item *Item) bool {
if p.mode == ModeFuzzy {
- sidx, _ := p.fuzzyMatch(item)
+ sidx, _, _ := p.fuzzyMatch(item)
return sidx >= 0
}
offsets := p.extendedMatch(item)
@@ -288,7 +289,7 @@ func dupItem(item *Item, offsets []Offset) *Item {
rank: Rank{0, 0, item.index}}
}
-func (p *Pattern) fuzzyMatch(item *Item) (int, int) {
+func (p *Pattern) fuzzyMatch(item *Item) (int, int, int) {
input := p.prepareInput(item)
return p.iter(algo.FuzzyMatch, input, p.caseSensitive, p.forward, p.text)
}
@@ -298,13 +299,13 @@ func (p *Pattern) extendedMatch(item *Item) []Offset {
offsets := []Offset{}
for _, term := range p.terms {
pfun := p.procFun[term.typ]
- if sidx, eidx := p.iter(pfun, input, term.caseSensitive, p.forward, term.text); sidx >= 0 {
+ if sidx, eidx, tlen := p.iter(pfun, input, term.caseSensitive, p.forward, term.text); sidx >= 0 {
if term.inv {
break
}
- offsets = append(offsets, Offset{int32(sidx), int32(eidx)})
+ offsets = append(offsets, Offset{int32(sidx), int32(eidx), int32(tlen)})
} else if term.inv {
- offsets = append(offsets, Offset{0, 0})
+ offsets = append(offsets, Offset{0, 0, 0})
}
}
return offsets
@@ -320,19 +321,19 @@ func (p *Pattern) prepareInput(item *Item) []Token {
tokens := Tokenize(item.text, p.delimiter)
ret = Transform(tokens, p.nth)
} else {
- ret = []Token{Token{text: item.text, prefixLength: 0}}
+ ret = []Token{Token{text: item.text, prefixLength: 0, trimLength: util.TrimLen(item.text)}}
}
item.transformed = ret
return ret
}
func (p *Pattern) iter(pfun func(bool, bool, []rune, []rune) (int, int),
- tokens []Token, caseSensitive bool, forward bool, pattern []rune) (int, int) {
+ tokens []Token, caseSensitive bool, forward bool, pattern []rune) (int, int, int) {
for _, part := range tokens {
prefixLength := part.prefixLength
if sidx, eidx := pfun(caseSensitive, forward, part.text, pattern); sidx >= 0 {
- return sidx + prefixLength, eidx + prefixLength
+ return sidx + prefixLength, eidx + prefixLength, part.trimLength
}
}
- return -1, -1
+ return -1, -1, -1 // math.MaxUint16
}
diff --git a/src/tokenizer.go b/src/tokenizer.go
index a8d04003..4b89b38e 100644
--- a/src/tokenizer.go
+++ b/src/tokenizer.go
@@ -20,6 +20,7 @@ type Range struct {
type Token struct {
text []rune
prefixLength int
+ trimLength int
}
// Delimiter for tokenizing the input
@@ -81,7 +82,7 @@ func withPrefixLengths(tokens [][]rune, begin int) []Token {
for idx, token := range tokens {
// Need to define a new local variable instead of the reused token to take
// the pointer to it
- ret[idx] = Token{text: token, prefixLength: prefixLength}
+ ret[idx] = Token{token, prefixLength, util.TrimLen(token)}
prefixLength += len(token)
}
return ret
@@ -233,7 +234,7 @@ func Transform(tokens []Token, withNth []Range) []Token {
} else {
prefixLength = 0
}
- transTokens[idx] = Token{part, prefixLength}
+ transTokens[idx] = Token{part, prefixLength, util.TrimLen(part)}
}
return transTokens
}
diff --git a/src/tokenizer_test.go b/src/tokenizer_test.go
index 0f95aa13..b0924402 100644
--- a/src/tokenizer_test.go
+++ b/src/tokenizer_test.go
@@ -44,22 +44,22 @@ func TestTokenize(t *testing.T) {
// AWK-style
input := " abc: def: ghi "
tokens := Tokenize([]rune(input), Delimiter{})
- if string(tokens[0].text) != "abc: " || tokens[0].prefixLength != 2 {
+ if string(tokens[0].text) != "abc: " || tokens[0].prefixLength != 2 || tokens[0].trimLength != 4 {
t.Errorf("%s", tokens)
}
// With delimiter
tokens = Tokenize([]rune(input), delimiterRegexp(":"))
- if string(tokens[0].text) != " abc:" || tokens[0].prefixLength != 0 {
+ if string(tokens[0].text) != " abc:" || tokens[0].prefixLength != 0 || tokens[0].trimLength != 4 {
t.Errorf("%s", tokens)
}
// With delimiter regex
tokens = Tokenize([]rune(input), delimiterRegexp("\\s+"))
- if string(tokens[0].text) != " " || tokens[0].prefixLength != 0 ||
- string(tokens[1].text) != "abc: " || tokens[1].prefixLength != 2 ||
- string(tokens[2].text) != "def: " || tokens[2].prefixLength != 8 ||
- string(tokens[3].text) != "ghi " || tokens[3].prefixLength != 14 {
+ if string(tokens[0].text) != " " || tokens[0].prefixLength != 0 || tokens[0].trimLength != 0 ||
+ string(tokens[1].text) != "abc: " || tokens[1].prefixLength != 2 || tokens[1].trimLength != 4 ||
+ string(tokens[2].text) != "def: " || tokens[2].prefixLength != 8 || tokens[2].trimLength != 4 ||
+ string(tokens[3].text) != "ghi " || tokens[3].prefixLength != 14 || tokens[3].trimLength != 3 {
t.Errorf("%s", tokens)
}
}
diff --git a/src/util/util.go b/src/util/util.go
index aa5f227c..e7e4f313 100644
--- a/src/util/util.go
+++ b/src/util/util.go
@@ -75,6 +75,7 @@ func IsTty() bool {
return int(C.isatty(C.int(os.Stdin.Fd()))) != 0
}
+// TrimRight returns rune array with trailing white spaces cut off
func TrimRight(runes []rune) []rune {
var i int
for i = len(runes) - 1; i >= 0; i-- {
@@ -86,6 +87,7 @@ func TrimRight(runes []rune) []rune {
return runes[0 : i+1]
}
+// BytesToRunes converts byte array into rune array
func BytesToRunes(bytea []byte) []rune {
runes := make([]rune, 0, len(bytea))
for i := 0; i < len(bytea); {
@@ -100,3 +102,27 @@ func BytesToRunes(bytea []byte) []rune {
}
return runes
}
+
+// TrimLen returns the length of trimmed rune array
+func TrimLen(runes []rune) int {
+ var i int
+ for i = len(runes) - 1; i >= 0; i-- {
+ char := runes[i]
+ if char != ' ' && char != '\t' {
+ break
+ }
+ }
+ // Completely empty
+ if i < 0 {
+ return 0
+ }
+
+ var j int
+ for j = 0; j < len(runes); j++ {
+ char := runes[j]
+ if char != ' ' && char != '\t' {
+ break
+ }
+ }
+ return i - j + 1
+}
diff --git a/src/util/util_test.go b/src/util/util_test.go
index 06cfd4f2..8aeaeac5 100644
--- a/src/util/util_test.go
+++ b/src/util/util_test.go
@@ -20,3 +20,23 @@ func TestContrain(t *testing.T) {
t.Error("Expected", 3)
}
}
+
+func TestTrimLen(t *testing.T) {
+ check := func(str string, exp int) {
+ trimmed := TrimLen([]rune(str))
+ if trimmed != exp {
+ t.Errorf("Invalid TrimLen result for '%s': %d (expected %d)",
+ str, trimmed, exp)
+ }
+ }
+ check("hello", 5)
+ check("hello ", 5)
+ check("hello ", 5)
+ check(" hello", 5)
+ check(" hello", 5)
+ check(" hello ", 5)
+ check(" hello ", 5)
+ check("h o", 5)
+ check(" h o ", 5)
+ check(" ", 0)
+}
diff --git a/test/test_go.rb b/test/test_go.rb
index e76b5200..77414ecd 100644
--- a/test/test_go.rb
+++ b/test/test_go.rb
@@ -527,6 +527,53 @@ class TestGoFZF < TestBase
assert_equal output, `cat #{tempname} | #{FZF} -fh -n2 -d:`.split($/)
end
+ def test_tiebreak_length_with_nth_trim_length
+ input = [
+ "apple juice bottle 1",
+ "apple ui bottle 2",
+ "app ice bottle 3",
+ "app ic bottle 4",
+ ]
+ writelines tempname, input
+
+ # len(1)
+ output = [
+ "app ice bottle 3",
+ "app ic bottle 4",
+ "apple juice bottle 1",
+ "apple ui bottle 2",
+ ]
+ assert_equal output, `cat #{tempname} | #{FZF} -fa -n1`.split($/)
+
+ # len(1 ~ 2)
+ output = [
+ "apple ui bottle 2",
+ "app ic bottle 4",
+ "apple juice bottle 1",
+ "app ice bottle 3",
+ ]
+ assert_equal output, `cat #{tempname} | #{FZF} -fai -n1..2`.split($/)
+
+ # len(1) + len(2)
+ output = [
+ "app ic bottle 4",
+ "app ice bottle 3",
+ "apple ui bottle 2",
+ "apple juice bottle 1",
+ ]
+ assert_equal output, `cat #{tempname} | #{FZF} -x -f"a i" -n1,2`.split($/)
+
+ # len(2)
+ output = [
+ "apple ui bottle 2",
+ "app ic bottle 4",
+ "app ice bottle 3",
+ "apple juice bottle 1",
+ ]
+ assert_equal output, `cat #{tempname} | #{FZF} -fi -n2`.split($/)
+ assert_equal output, `cat #{tempname} | #{FZF} -fi -n2,1..2`.split($/)
+ end
+
def test_tiebreak_end_backward_scan
input = %w[
foobar-fb