diff options
| author | Junegunn Choi <junegunn.c@gmail.com> | 2016-04-16 14:02:43 +0900 |
|---|---|---|
| committer | Junegunn Choi <junegunn.c@gmail.com> | 2016-04-16 14:33:38 +0900 |
| commit | 2f6d23b91e845f53e746e7cf74477a735ec88a85 (patch) | |
| tree | 7e80f65638d310c002b559b0d7a79323d495aec3 /src | |
| parent | 5f63a7b587b4ce221d1e90a559051c51fad2ff78 (diff) | |
| download | fzf-2f6d23b91e845f53e746e7cf74477a735ec88a85.tar.gz | |
Enhanced ranking algorithm
Based on the patch by Matt Westcott (@mjwestcott).
But with a more conservative approach:
- Does not use linearly increasing penalties; It is agreed upon that we
should prefer matching characters at the beginnings of the words, but
it's not always clear that the relevance is inversely proportional to
the distance from the beginning.
- The approach here is more conservative in that the bonus is never
large enough to override the matchlen, so it can be thought of as the
first implicit tiebreak criterion.
- One may argue the change breaks the contract of --tiebreak, but the
judgement depends on the definition of "tie".
Diffstat (limited to 'src')
| -rw-r--r-- | src/algo/algo.go | 136 | ||||
| -rw-r--r-- | src/algo/algo_test.go | 73 | ||||
| -rw-r--r-- | src/item.go | 11 | ||||
| -rw-r--r-- | src/pattern.go | 52 | ||||
| -rw-r--r-- | src/pattern_test.go | 14 |
5 files changed, 203 insertions, 83 deletions
diff --git a/src/algo/algo.go b/src/algo/algo.go index ac7bd8b2..8656c684 100644 --- a/src/algo/algo.go +++ b/src/algo/algo.go @@ -22,10 +22,30 @@ func runeAt(runes []rune, index int, max int, forward bool) rune { return runes[max-index-1] } +// Result conatins the results of running a match function. +type Result struct { + Start int32 + End int32 + + // Items are basically sorted by the lengths of matched substrings. + // But we slightly adjust the score with bonus for better results. + Bonus int32 +} + +type charClass int + +const ( + charNonWord charClass = iota + charLower + charUpper + charLetter + charNumber +) + // FuzzyMatch performs fuzzy-match -func FuzzyMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune) (int, int) { +func FuzzyMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune) Result { if len(pattern) == 0 { - return 0, 0 + return Result{0, 0, 0} } // 0. (FIXME) How to find the shortest match? @@ -90,12 +110,76 @@ func FuzzyMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune) } } } - if forward { - return sidx, eidx + + // Calculate the bonus. This can't be done at the same time as the + // pattern scan above because 'forward' may be false. + if !forward { + sidx, eidx = lenRunes-eidx, lenRunes-sidx + } + + var bonus int32 + pidx := 0 + consecutive := false + prevClass := charNonWord + for index := 0; index < eidx; index++ { + char := runes[index] + var class charClass + if unicode.IsLower(char) { + class = charLower + } else if unicode.IsUpper(char) { + class = charUpper + } else if unicode.IsLetter(char) { + class = charLetter + } else if unicode.IsNumber(char) { + class = charNumber + } else { + class = charNonWord + } + + var point int32 + if prevClass == charNonWord && class != charNonWord { + // Word boundary + point = 2 + } else if prevClass == charLower && class == charUpper || + prevClass != charNumber && class == charNumber { + // camelCase letter123 + point = 1 + } + prevClass = class + + if index >= sidx { + if !caseSensitive { + if char >= 'A' && char <= 'Z' { + char += 32 + } else if char > unicode.MaxASCII { + char = unicode.To(unicode.LowerCase, char) + } + } + pchar := pattern[pidx] + if pchar == char { + // Boost bonus for the first character in the pattern + if pidx == 0 { + point *= 2 + } + // Bonus to consecutive matching chars + if consecutive { + point++ + } + bonus += point + + if pidx++; pidx == lenPattern { + break + } + consecutive = true + } else { + consecutive = false + } + } } - return lenRunes - eidx, lenRunes - sidx + + return Result{int32(sidx), int32(eidx), bonus} } - return -1, -1 + return Result{-1, -1, 0} } // ExactMatchNaive is a basic string searching algorithm that handles case @@ -105,16 +189,17 @@ func FuzzyMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune) // // We might try to implement better algorithms in the future: // http://en.wikipedia.org/wiki/String_searching_algorithm -func ExactMatchNaive(caseSensitive bool, forward bool, runes []rune, pattern []rune) (int, int) { +func ExactMatchNaive(caseSensitive bool, forward bool, runes []rune, pattern []rune) Result { + // Note: ExactMatchNaive always return a zero bonus. if len(pattern) == 0 { - return 0, 0 + return Result{0, 0, 0} } lenRunes := len(runes) lenPattern := len(pattern) if lenRunes < lenPattern { - return -1, -1 + return Result{-1, -1, 0} } pidx := 0 @@ -132,22 +217,23 @@ func ExactMatchNaive(caseSensitive bool, forward bool, runes []rune, pattern []r pidx++ if pidx == lenPattern { if forward { - return index - lenPattern + 1, index + 1 + return Result{int32(index - lenPattern + 1), int32(index + 1), 0} } - return lenRunes - (index + 1), lenRunes - (index - lenPattern + 1) + return Result{int32(lenRunes - (index + 1)), int32(lenRunes - (index - lenPattern + 1)), 0} } } else { index -= pidx pidx = 0 } } - return -1, -1 + return Result{-1, -1, 0} } // PrefixMatch performs prefix-match -func PrefixMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune) (int, int) { +func PrefixMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune) Result { + // Note: PrefixMatch always return a zero bonus. if len(runes) < len(pattern) { - return -1, -1 + return Result{-1, -1, 0} } for index, r := range pattern { @@ -156,19 +242,20 @@ func PrefixMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune) char = unicode.ToLower(char) } if char != r { - return -1, -1 + return Result{-1, -1, 0} } } - return 0, len(pattern) + return Result{0, int32(len(pattern)), 0} } // SuffixMatch performs suffix-match -func SuffixMatch(caseSensitive bool, forward bool, input []rune, pattern []rune) (int, int) { +func SuffixMatch(caseSensitive bool, forward bool, input []rune, pattern []rune) Result { + // Note: SuffixMatch always return a zero bonus. runes := util.TrimRight(input) trimmedLen := len(runes) diff := trimmedLen - len(pattern) if diff < 0 { - return -1, -1 + return Result{-1, -1, 0} } for index, r := range pattern { @@ -177,23 +264,24 @@ func SuffixMatch(caseSensitive bool, forward bool, input []rune, pattern []rune) char = unicode.ToLower(char) } if char != r { - return -1, -1 + return Result{-1, -1, 0} } } - return trimmedLen - len(pattern), trimmedLen + return Result{int32(trimmedLen - len(pattern)), int32(trimmedLen), 0} } // EqualMatch performs equal-match -func EqualMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune) (int, int) { +func EqualMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune) Result { + // Note: EqualMatch always return a zero bonus. if len(runes) != len(pattern) { - return -1, -1 + return Result{-1, -1, 0} } runesStr := string(runes) if !caseSensitive { runesStr = strings.ToLower(runesStr) } if runesStr == string(pattern) { - return 0, len(pattern) + return Result{0, int32(len(pattern)), 0} } - return -1, -1 + return Result{-1, -1, 0} } diff --git a/src/algo/algo_test.go b/src/algo/algo_test.go index 06ec5eae..a124d050 100644 --- a/src/algo/algo_test.go +++ b/src/algo/algo_test.go @@ -5,65 +5,84 @@ import ( "testing" ) -func assertMatch(t *testing.T, fun func(bool, bool, []rune, []rune) (int, int), caseSensitive bool, forward bool, input string, pattern string, sidx int, eidx int) { +func assertMatch(t *testing.T, fun func(bool, bool, []rune, []rune) Result, caseSensitive, forward bool, input, pattern string, sidx int32, eidx int32, bonus int32) { if !caseSensitive { pattern = strings.ToLower(pattern) } - s, e := fun(caseSensitive, forward, []rune(input), []rune(pattern)) - if s != sidx { - t.Errorf("Invalid start index: %d (expected: %d, %s / %s)", s, sidx, input, pattern) + res := fun(caseSensitive, forward, []rune(input), []rune(pattern)) + if res.Start != sidx { + t.Errorf("Invalid start index: %d (expected: %d, %s / %s)", res.Start, sidx, input, pattern) } - if e != eidx { - t.Errorf("Invalid end index: %d (expected: %d, %s / %s)", e, eidx, input, pattern) + if res.End != eidx { + t.Errorf("Invalid end index: %d (expected: %d, %s / %s)", res.End, eidx, input, pattern) + } + if res.Bonus != bonus { + t.Errorf("Invalid bonus: %d (expected: %d, %s / %s)", res.Bonus, bonus, input, pattern) } } func TestFuzzyMatch(t *testing.T) { - assertMatch(t, FuzzyMatch, false, true, "fooBarbaz", "oBZ", 2, 9) - assertMatch(t, FuzzyMatch, true, true, "fooBarbaz", "oBZ", -1, -1) - assertMatch(t, FuzzyMatch, true, true, "fooBarbaz", "oBz", 2, 9) - assertMatch(t, FuzzyMatch, true, true, "fooBarbaz", "fooBarbazz", -1, -1) + assertMatch(t, FuzzyMatch, false, true, "fooBarbaz", "oBZ", 2, 9, 2) + assertMatch(t, FuzzyMatch, false, true, "foo bar baz", "fbb", 0, 9, 8) + assertMatch(t, FuzzyMatch, false, true, "/AutomatorDocument.icns", "rdoc", 9, 13, 4) + assertMatch(t, FuzzyMatch, false, true, "/man1/zshcompctl.1", "zshc", 6, 10, 7) + assertMatch(t, FuzzyMatch, false, true, "/.oh-my-zsh/cache", "zshc", 8, 13, 8) + assertMatch(t, FuzzyMatch, false, true, "ab0123 456", "12356", 3, 10, 3) + assertMatch(t, FuzzyMatch, false, true, "abc123 456", "12356", 3, 10, 5) + + assertMatch(t, FuzzyMatch, false, true, "foo/bar/baz", "fbb", 0, 9, 8) + assertMatch(t, FuzzyMatch, false, true, "fooBarBaz", "fbb", 0, 7, 6) + assertMatch(t, FuzzyMatch, false, true, "foo barbaz", "fbb", 0, 8, 6) + assertMatch(t, FuzzyMatch, false, true, "fooBar Baz", "foob", 0, 4, 8) + assertMatch(t, FuzzyMatch, true, true, "fooBarbaz", "oBZ", -1, -1, 0) + assertMatch(t, FuzzyMatch, true, true, "fooBarbaz", "oBz", 2, 9, 2) + assertMatch(t, FuzzyMatch, true, true, "Foo Bar Baz", "fbb", -1, -1, 0) + assertMatch(t, FuzzyMatch, true, true, "Foo/Bar/Baz", "FBB", 0, 9, 8) + assertMatch(t, FuzzyMatch, true, true, "FooBarBaz", "FBB", 0, 7, 6) + assertMatch(t, FuzzyMatch, true, true, "foo BarBaz", "fBB", 0, 8, 7) + assertMatch(t, FuzzyMatch, true, true, "FooBar Baz", "FooB", 0, 4, 8) + assertMatch(t, FuzzyMatch, true, true, "fooBarbaz", "fooBarbazz", -1, -1, 0) } func TestFuzzyMatchBackward(t *testing.T) { - assertMatch(t, FuzzyMatch, false, true, "foobar fb", "fb", 0, 4) - assertMatch(t, FuzzyMatch, false, false, "foobar fb", "fb", 7, 9) + assertMatch(t, FuzzyMatch, false, true, "foobar fb", "fb", 0, 4, 4) + assertMatch(t, FuzzyMatch, false, false, "foobar fb", "fb", 7, 9, 5) } func TestExactMatchNaive(t *testing.T) { for _, dir := range []bool{true, false} { - assertMatch(t, ExactMatchNaive, false, dir, "fooBarbaz", "oBA", 2, 5) - assertMatch(t, ExactMatchNaive, true, dir, "fooBarbaz", "oBA", -1, -1) - assertMatch(t, ExactMatchNaive, true, dir, "fooBarbaz", "fooBarbazz", -1, -1) + assertMatch(t, ExactMatchNaive, false, dir, "fooBarbaz", "oBA", 2, 5, 0) + assertMatch(t, ExactMatchNaive, true, dir, "fooBarbaz", "oBA", -1, -1, 0) + assertMatch(t, ExactMatchNaive, true, dir, "fooBarbaz", "fooBarbazz", -1, -1, 0) } } func TestExactMatchNaiveBackward(t *testing.T) { - assertMatch(t, ExactMatchNaive, false, true, "foobar foob", "oo", 1, 3) - assertMatch(t, ExactMatchNaive, false, false, "foobar foob", "oo", 8, 10) + assertMatch(t, ExactMatchNaive, false, true, "foobar foob", "oo", 1, 3, 0) + assertMatch(t, ExactMatchNaive, false, false, "foobar foob", "oo", 8, 10, 0) } func TestPrefixMatch(t *testing.T) { for _, dir := range []bool{true, false} { - assertMatch(t, PrefixMatch, false, dir, "fooBarbaz", "Foo", 0, 3) - assertMatch(t, PrefixMatch, true, dir, "fooBarbaz", "Foo", -1, -1) - assertMatch(t, PrefixMatch, false, dir, "fooBarbaz", "baz", -1, -1) + assertMatch(t, PrefixMatch, false, dir, "fooBarbaz", "Foo", 0, 3, 0) + assertMatch(t, PrefixMatch, true, dir, "fooBarbaz", "Foo", -1, -1, 0) + assertMatch(t, PrefixMatch, false, dir, "fooBarbaz", "baz", -1, -1, 0) } } func TestSuffixMatch(t *testing.T) { for _, dir := range []bool{true, false} { - assertMatch(t, SuffixMatch, false, dir, "fooBarbaz", "Foo", -1, -1) - assertMatch(t, SuffixMatch, false, dir, "fooBarbaz", "baz", 6, 9) - assertMatch(t, SuffixMatch, true, dir, "fooBarbaz", "Baz", -1, -1) + assertMatch(t, SuffixMatch, false, dir, "fooBarbaz", "Foo", -1, -1, 0) + assertMatch(t, SuffixMatch, false, dir, "fooBarbaz", "baz", 6, 9, 0) + assertMatch(t, SuffixMatch, true, dir, "fooBarbaz", "Baz", -1, -1, 0) } } func TestEmptyPattern(t *testing.T) { for _, dir := range []bool{true, false} { - assertMatch(t, FuzzyMatch, true, dir, "foobar", "", 0, 0) - assertMatch(t, ExactMatchNaive, true, dir, "foobar", "", 0, 0) - assertMatch(t, PrefixMatch, true, dir, "foobar", "", 0, 0) - assertMatch(t, SuffixMatch, true, dir, "foobar", "", 6, 6) + assertMatch(t, FuzzyMatch, true, dir, "foobar", "", 0, 0, 0) + assertMatch(t, ExactMatchNaive, true, dir, "foobar", "", 0, 0, 0) + assertMatch(t, PrefixMatch, true, dir, "foobar", "", 0, 0, 0) + assertMatch(t, SuffixMatch, true, dir, "foobar", "", 6, 6, 0) } } diff --git a/src/item.go b/src/item.go index 43ce1a21..66b3e4d1 100644 --- a/src/item.go +++ b/src/item.go @@ -23,6 +23,7 @@ type Item struct { offsets []Offset colors []ansiOffset rank [5]int32 + bonus int32 } // Sort criteria to use. Never changes once fzf is started. @@ -73,15 +74,17 @@ func (item *Item) Rank(cache bool) [5]int32 { matchlen += end - begin } } - if matchlen == 0 { - matchlen = math.MaxInt32 - } rank := buildEmptyRank(item.Index()) for idx, criterion := range sortCriteria { var val int32 switch criterion { case byMatchLen: - val = int32(matchlen) + if matchlen == 0 { + val = math.MaxInt32 + } else { + // It is extremely unlikely that bonus exceeds 128 + val = 128*int32(matchlen) - item.bonus + } case byLength: // It is guaranteed that .transformed in not null in normal execution if item.transformed != nil { diff --git a/src/pattern.go b/src/pattern.go index af73b674..fda5cc9a 100644 --- a/src/pattern.go +++ b/src/pattern.go @@ -49,7 +49,7 @@ type Pattern struct { cacheable bool delimiter Delimiter nth []Range - procFun map[termType]func(bool, bool, []rune, []rune) (int, int) + procFun map[termType]func(bool, bool, []rune, []rune) algo.Result } var ( @@ -125,7 +125,7 @@ func BuildPattern(fuzzy bool, extended bool, caseMode Case, forward bool, cacheable: cacheable, nth: nth, delimiter: delimiter, - procFun: make(map[termType]func(bool, bool, []rune, []rune) (int, int))} + procFun: make(map[termType]func(bool, bool, []rune, []rune) algo.Result)} ptr.procFun[termFuzzy] = algo.FuzzyMatch ptr.procFun[termEqual] = algo.EqualMatch @@ -275,15 +275,16 @@ func (p *Pattern) matchChunk(chunk *Chunk) []*Item { matches := []*Item{} if !p.extended { for _, item := range *chunk { - if sidx, eidx, tlen := p.basicMatch(item); sidx >= 0 { + offset, bonus := p.basicMatch(item) + if sidx := offset[0]; sidx >= 0 { matches = append(matches, - dupItem(item, []Offset{Offset{int32(sidx), int32(eidx), int32(tlen)}})) + dupItem(item, []Offset{offset}, bonus)) } } } else { for _, item := range *chunk { - if offsets := p.extendedMatch(item); len(offsets) == len(p.termSets) { - matches = append(matches, dupItem(item, offsets)) + if offsets, bonus := p.extendedMatch(item); len(offsets) == len(p.termSets) { + matches = append(matches, dupItem(item, offsets, bonus)) } } } @@ -293,25 +294,27 @@ func (p *Pattern) matchChunk(chunk *Chunk) []*Item { // MatchItem returns true if the Item is a match func (p *Pattern) MatchItem(item *Item) bool { if !p.extended { - sidx, _, _ := p.basicMatch(item) + offset, _ := p.basicMatch(item) + sidx := offset[0] return sidx >= 0 } - offsets := p.extendedMatch(item) + offsets, _ := p.extendedMatch(item) return len(offsets) == len(p.termSets) } -func dupItem(item *Item, offsets []Offset) *Item { +func dupItem(item *Item, offsets []Offset, bonus int32) *Item { sort.Sort(ByOrder(offsets)) return &Item{ text: item.text, origText: item.origText, transformed: item.transformed, offsets: offsets, + bonus: bonus, colors: item.colors, rank: buildEmptyRank(item.Index())} } -func (p *Pattern) basicMatch(item *Item) (int, int, int) { +func (p *Pattern) basicMatch(item *Item) (Offset, int32) { input := p.prepareInput(item) if p.fuzzy { return p.iter(algo.FuzzyMatch, input, p.caseSensitive, p.forward, p.text) @@ -319,29 +322,33 @@ func (p *Pattern) basicMatch(item *Item) (int, int, int) { return p.iter(algo.ExactMatchNaive, input, p.caseSensitive, p.forward, p.text) } -func (p *Pattern) extendedMatch(item *Item) []Offset { +func (p *Pattern) extendedMatch(item *Item) ([]Offset, int32) { input := p.prepareInput(item) offsets := []Offset{} + var totalBonus int32 for _, termSet := range p.termSets { var offset *Offset + var bonus int32 for _, term := range termSet { pfun := p.procFun[term.typ] - if sidx, eidx, tlen := p.iter(pfun, input, term.caseSensitive, p.forward, term.text); sidx >= 0 { + off, pen := p.iter(pfun, input, term.caseSensitive, p.forward, term.text) + if sidx := off[0]; sidx >= 0 { if term.inv { continue } - offset = &Offset{int32(sidx), int32(eidx), int32(tlen)} + offset, bonus = &off, pen break } else if term.inv { - offset = &Offset{0, 0, 0} + offset, bonus = &Offset{0, 0, 0}, 0 continue } } if offset != nil { offsets = append(offsets, *offset) + totalBonus += bonus } } - return offsets + return offsets, totalBonus } func (p *Pattern) prepareInput(item *Item) []Token { @@ -360,13 +367,16 @@ func (p *Pattern) prepareInput(item *Item) []Token { return ret } -func (p *Pattern) iter(pfun func(bool, bool, []rune, []rune) (int, int), - tokens []Token, caseSensitive bool, forward bool, pattern []rune) (int, int, int) { +func (p *Pattern) iter(pfun func(bool, bool, []rune, []rune) algo.Result, + tokens []Token, caseSensitive bool, forward bool, pattern []rune) (Offset, int32) { for _, part := range tokens { - prefixLength := part.prefixLength - if sidx, eidx := pfun(caseSensitive, forward, part.text, pattern); sidx >= 0 { - return sidx + prefixLength, eidx + prefixLength, part.trimLength + prefixLength := int32(part.prefixLength) + if res := pfun(caseSensitive, forward, part.text, pattern); res.Start >= 0 { + var sidx int32 = res.Start + prefixLength + var eidx int32 = res.End + prefixLength + return Offset{sidx, eidx, int32(part.trimLength)}, res.Bonus } } - return -1, -1, -1 // math.MaxUint16 + // TODO: math.MaxUint16 + return Offset{-1, -1, -1}, 0.0 } diff --git a/src/pattern_test.go b/src/pattern_test.go index 6bf571cd..2f27fdab 100644 --- a/src/pattern_test.go +++ b/src/pattern_test.go @@ -70,10 +70,10 @@ func TestExact(t *testing.T) { clearPatternCache() pattern := BuildPattern(true, true, CaseSmart, true, []Range{}, Delimiter{}, []rune("'abc")) - sidx, eidx := algo.ExactMatchNaive( + res := algo.ExactMatchNaive( pattern.caseSensitive, pattern.forward, []rune("aabbcc abc"), pattern.termSets[0][0].text) - if sidx != 7 || eidx != 10 { - t.Errorf("%s / %d / %d", pattern.termSets, sidx, eidx) + if res.Start != 7 || res.End != 10 { + t.Errorf("%s / %d / %d", pattern.termSets, res.Start, res.End) } } @@ -82,11 +82,11 @@ func TestEqual(t *testing.T) { clearPatternCache() pattern := BuildPattern(true, true, CaseSmart, true, []Range{}, Delimiter{}, []rune("^AbC$")) - match := func(str string, sidxExpected int, eidxExpected int) { - sidx, eidx := algo.EqualMatch( + match := func(str string, sidxExpected int32, eidxExpected int32) { + res := algo.EqualMatch( pattern.caseSensitive, pattern.forward, []rune(str), pattern.termSets[0][0].text) - if sidx != sidxExpected || eidx != eidxExpected { - t.Errorf("%s / %d / %d", pattern.termSets, sidx, eidx) + if res.Start != sidxExpected || res.End != eidxExpected { + t.Errorf("%s / %d / %d", pattern.termSets, res.Start, res.End) } } match("ABC", -1, -1) |
