diff options
| author | Junegunn Choi <junegunn.c@gmail.com> | 2016-04-16 14:02:43 +0900 |
|---|---|---|
| committer | Junegunn Choi <junegunn.c@gmail.com> | 2016-04-16 14:33:38 +0900 |
| commit | 2f6d23b91e845f53e746e7cf74477a735ec88a85 (patch) | |
| tree | 7e80f65638d310c002b559b0d7a79323d495aec3 /src/pattern.go | |
| parent | 5f63a7b587b4ce221d1e90a559051c51fad2ff78 (diff) | |
| download | fzf-2f6d23b91e845f53e746e7cf74477a735ec88a85.tar.gz | |
Enhanced ranking algorithm
Based on the patch by Matt Westcott (@mjwestcott).
But with a more conservative approach:
- Does not use linearly increasing penalties; It is agreed upon that we
should prefer matching characters at the beginnings of the words, but
it's not always clear that the relevance is inversely proportional to
the distance from the beginning.
- The approach here is more conservative in that the bonus is never
large enough to override the matchlen, so it can be thought of as the
first implicit tiebreak criterion.
- One may argue the change breaks the contract of --tiebreak, but the
judgement depends on the definition of "tie".
Diffstat (limited to 'src/pattern.go')
| -rw-r--r-- | src/pattern.go | 52 |
1 files changed, 31 insertions, 21 deletions
diff --git a/src/pattern.go b/src/pattern.go index af73b674..fda5cc9a 100644 --- a/src/pattern.go +++ b/src/pattern.go @@ -49,7 +49,7 @@ type Pattern struct { cacheable bool delimiter Delimiter nth []Range - procFun map[termType]func(bool, bool, []rune, []rune) (int, int) + procFun map[termType]func(bool, bool, []rune, []rune) algo.Result } var ( @@ -125,7 +125,7 @@ func BuildPattern(fuzzy bool, extended bool, caseMode Case, forward bool, cacheable: cacheable, nth: nth, delimiter: delimiter, - procFun: make(map[termType]func(bool, bool, []rune, []rune) (int, int))} + procFun: make(map[termType]func(bool, bool, []rune, []rune) algo.Result)} ptr.procFun[termFuzzy] = algo.FuzzyMatch ptr.procFun[termEqual] = algo.EqualMatch @@ -275,15 +275,16 @@ func (p *Pattern) matchChunk(chunk *Chunk) []*Item { matches := []*Item{} if !p.extended { for _, item := range *chunk { - if sidx, eidx, tlen := p.basicMatch(item); sidx >= 0 { + offset, bonus := p.basicMatch(item) + if sidx := offset[0]; sidx >= 0 { matches = append(matches, - dupItem(item, []Offset{Offset{int32(sidx), int32(eidx), int32(tlen)}})) + dupItem(item, []Offset{offset}, bonus)) } } } else { for _, item := range *chunk { - if offsets := p.extendedMatch(item); len(offsets) == len(p.termSets) { - matches = append(matches, dupItem(item, offsets)) + if offsets, bonus := p.extendedMatch(item); len(offsets) == len(p.termSets) { + matches = append(matches, dupItem(item, offsets, bonus)) } } } @@ -293,25 +294,27 @@ func (p *Pattern) matchChunk(chunk *Chunk) []*Item { // MatchItem returns true if the Item is a match func (p *Pattern) MatchItem(item *Item) bool { if !p.extended { - sidx, _, _ := p.basicMatch(item) + offset, _ := p.basicMatch(item) + sidx := offset[0] return sidx >= 0 } - offsets := p.extendedMatch(item) + offsets, _ := p.extendedMatch(item) return len(offsets) == len(p.termSets) } -func dupItem(item *Item, offsets []Offset) *Item { +func dupItem(item *Item, offsets []Offset, bonus int32) *Item { sort.Sort(ByOrder(offsets)) return &Item{ text: item.text, origText: item.origText, transformed: item.transformed, offsets: offsets, + bonus: bonus, colors: item.colors, rank: buildEmptyRank(item.Index())} } -func (p *Pattern) basicMatch(item *Item) (int, int, int) { +func (p *Pattern) basicMatch(item *Item) (Offset, int32) { input := p.prepareInput(item) if p.fuzzy { return p.iter(algo.FuzzyMatch, input, p.caseSensitive, p.forward, p.text) @@ -319,29 +322,33 @@ func (p *Pattern) basicMatch(item *Item) (int, int, int) { return p.iter(algo.ExactMatchNaive, input, p.caseSensitive, p.forward, p.text) } -func (p *Pattern) extendedMatch(item *Item) []Offset { +func (p *Pattern) extendedMatch(item *Item) ([]Offset, int32) { input := p.prepareInput(item) offsets := []Offset{} + var totalBonus int32 for _, termSet := range p.termSets { var offset *Offset + var bonus int32 for _, term := range termSet { pfun := p.procFun[term.typ] - if sidx, eidx, tlen := p.iter(pfun, input, term.caseSensitive, p.forward, term.text); sidx >= 0 { + off, pen := p.iter(pfun, input, term.caseSensitive, p.forward, term.text) + if sidx := off[0]; sidx >= 0 { if term.inv { continue } - offset = &Offset{int32(sidx), int32(eidx), int32(tlen)} + offset, bonus = &off, pen break } else if term.inv { - offset = &Offset{0, 0, 0} + offset, bonus = &Offset{0, 0, 0}, 0 continue } } if offset != nil { offsets = append(offsets, *offset) + totalBonus += bonus } } - return offsets + return offsets, totalBonus } func (p *Pattern) prepareInput(item *Item) []Token { @@ -360,13 +367,16 @@ func (p *Pattern) prepareInput(item *Item) []Token { return ret } -func (p *Pattern) iter(pfun func(bool, bool, []rune, []rune) (int, int), - tokens []Token, caseSensitive bool, forward bool, pattern []rune) (int, int, int) { +func (p *Pattern) iter(pfun func(bool, bool, []rune, []rune) algo.Result, + tokens []Token, caseSensitive bool, forward bool, pattern []rune) (Offset, int32) { for _, part := range tokens { - prefixLength := part.prefixLength - if sidx, eidx := pfun(caseSensitive, forward, part.text, pattern); sidx >= 0 { - return sidx + prefixLength, eidx + prefixLength, part.trimLength + prefixLength := int32(part.prefixLength) + if res := pfun(caseSensitive, forward, part.text, pattern); res.Start >= 0 { + var sidx int32 = res.Start + prefixLength + var eidx int32 = res.End + prefixLength + return Offset{sidx, eidx, int32(part.trimLength)}, res.Bonus } } - return -1, -1, -1 // math.MaxUint16 + // TODO: math.MaxUint16 + return Offset{-1, -1, -1}, 0.0 } |
