micro/internal/buffer/search.go

394 lines
12 KiB
Go

package buffer
import (
"fmt"
"regexp"
"unicode/utf8"
"github.com/zyedidia/micro/v2/internal/util"
)
// RegexpGroup combines a Regexp with padded versions.
// We want "^" and "$" to match only the beginning/end of a line, not that
// of the search region somewhere in the middle of a line. In that case we
// use padded regexps to require a rune before or after the match. (This
// also affects other empty-string patters like "\\b".)
type RegexpGroup [4]*regexp.Regexp
const (
padStart = 1 << iota
padEnd
)
// NewRegexpGroup creates a RegexpGroup from a string
func NewRegexpGroup(s string) (RegexpGroup, error) {
var rgrp RegexpGroup
var err error
rgrp[0], err = regexp.Compile(s)
if err == nil {
rgrp[padStart] = regexp.MustCompile(".(?:" + s + ")")
rgrp[padEnd] = regexp.MustCompile("(?:" + s + ").")
rgrp[padStart|padEnd] = regexp.MustCompile(".(?:" + s + ").")
}
return rgrp, err
}
func regexpGroup(re any) (RegexpGroup, error) {
switch re := re.(type) {
case RegexpGroup:
return re, nil
case string:
return NewRegexpGroup(re)
default:
return RegexpGroup{}, fmt.Errorf(`cannot convert "%v" (of type %[1]T) to type RegexpGroup`, re)
}
}
type bytesFind func(*regexp.Regexp, []byte) []int
func (b *Buffer) findDownFunc(re any, start, end Loc, find bytesFind) ([]Loc, error) {
rgrp, err := regexpGroup(re)
if err != nil {
return nil, err
}
lastcn := util.CharacterCount(b.LineBytes(b.LinesNum() - 1))
if start.Y > b.LinesNum()-1 {
start.X = lastcn - 1
}
if end.Y > b.LinesNum()-1 {
end.X = lastcn
}
start.Y = util.Clamp(start.Y, 0, b.LinesNum()-1)
end.Y = util.Clamp(end.Y, 0, b.LinesNum()-1)
if start.GreaterThan(end) {
start, end = end, start
}
for i := start.Y; i <= end.Y; i++ {
l := b.LineBytes(i)
from, to := 0, len(l)
padMode := 0
if i == end.Y {
nchars := util.CharacterCount(l)
end.X = util.Clamp(end.X, 0, nchars)
if end.X < nchars {
padMode |= padEnd
to = util.NextRunePos(l, util.BytePosFromCharPos(l, end.X))
}
}
if i == start.Y {
nchars := util.CharacterCount(l)
start.X = util.Clamp(start.X, 0, nchars)
if start.X > 0 {
padMode |= padStart
from = util.PreviousRunePos(l, util.BytePosFromCharPos(l, start.X))
}
}
s := l[from:to]
match := find(rgrp[padMode], s)
if match != nil {
if padMode&padStart != 0 {
match[0] = util.NextRunePos(s, match[0])
}
if padMode&padEnd != 0 {
match[1] = util.PreviousRunePos(s, match[1])
}
return util.RangeMap(match, func(j, pos int) Loc {
if pos >= 0 {
x := util.CharacterCount(l[:from+pos])
if j%2 == 0 {
r, _ := utf8.DecodeRune(s[pos:])
if util.IsMark(r) {
x--
}
}
return Loc{x, i}
} else { // start or end of unused submatch
return LocVoid()
}
}), nil
}
}
return nil, nil
}
type bufferFind func(*Buffer, any, Loc, Loc) ([]Loc, error)
// FindDown returns a slice containing the start and end positions
// of the first match of `re` between `start` and `end`, or nil
// if no match exists.
func (b *Buffer) FindDown(re any, start, end Loc) ([]Loc, error) {
return b.findDownFunc(re, start, end, (*regexp.Regexp).FindIndex)
}
// FindDownSubmatch returns a slice containing the start and end positions
// of the first match of `re` between `start` and `end` plus those
// of all submatches (capturing groups), or nil if no match exists.
// The start and end positions of an unused submatch are void.
func (b *Buffer) FindDownSubmatch(re any, start, end Loc) ([]Loc, error) {
return b.findDownFunc(re, start, end, (*regexp.Regexp).FindSubmatchIndex)
}
func (b *Buffer) findUpFunc(re any, start, end Loc, find bytesFind) ([]Loc, error) {
rgrp, err := regexpGroup(re)
if err != nil {
return nil, err
}
lastcn := util.CharacterCount(b.LineBytes(b.LinesNum() - 1))
if start.Y > b.LinesNum()-1 {
start.X = lastcn - 1
}
if end.Y > b.LinesNum()-1 {
end.X = lastcn
}
start.Y = util.Clamp(start.Y, 0, b.LinesNum()-1)
end.Y = util.Clamp(end.Y, 0, b.LinesNum()-1)
if start.GreaterThan(end) {
start, end = end, start
}
var locs []Loc
for i := end.Y; i >= start.Y; i-- {
charCount := util.CharacterCount(b.LineBytes(i))
from := Loc{0, i}.Clamp(start, end)
to := Loc{charCount, i}.Clamp(start, end)
b.findAllFuncFunc(rgrp, from, to, func(b *Buffer, re any, start, end Loc) ([]Loc, error) {
return b.findDownFunc(rgrp, start, end, find)
}, func(match []Loc) {
locs = match
})
if locs != nil {
return locs, nil
}
}
return nil, nil
}
// FindUp returns a slice containing the start and end positions
// of the last match of `re` between `start` and `end`, or nil
// if no match exists.
func (b *Buffer) FindUp(re any, start, end Loc) ([]Loc, error) {
return b.findUpFunc(re, start, end, func(re *regexp.Regexp, l []byte) []int {
allMatches := re.FindAllIndex(l, -1)
if allMatches != nil {
return allMatches[len(allMatches)-1]
} else {
return nil
}
})
}
// FindUpSubmatch returns a slice containing the start and end positions
// of the last match of `re` between `start` and `end` plus those
// of all submatches (capturing groups), or nil if no match exists.
// The start and end positions of an unused submatch are void.
func (b *Buffer) FindUpSubmatch(re any, start, end Loc) ([]Loc, error) {
return b.findUpFunc(re, start, end, func(r *regexp.Regexp, l []byte) []int {
allMatches := r.FindAllSubmatchIndex(l, -1)
if allMatches != nil {
return allMatches[len(allMatches)-1]
} else {
return nil
}
})
}
func (b *Buffer) findAllFuncFunc(re any, start, end Loc, find bufferFind, f func([]Loc)) (int, error) {
rgrp, err := regexpGroup(re)
if err != nil {
return -1, err
}
n := 0
loc := start
for {
match, _ := find(b, rgrp, loc, end)
if match == nil {
break
}
n++
f(match)
if match[0] != match[1] {
loc = match[1]
} else if match[1] != end {
loc = match[1].Move(1, b)
} else {
break
}
}
return n, nil
}
// FindAllFunc calls the function `f` once for each match between `start`
// and `end` of the regexp given by `re`. The argument of `f` is the slice
// containing the start and end positions of the match. FindAllFunc returns
// the number of matches plus any error that occured when compiling the regexp.
func (b *Buffer) FindAllFunc(re any, start, end Loc, f func([]Loc)) (int, error) {
return b.findAllFuncFunc(re, start, end, (*Buffer).FindDown, f)
}
// FindAll returns a slice containing the start and end positions of all
// matches between `start` and `end` of the regexp given by `re`, plus any
// error that occured when compiling the regexp. If no match is found, the
// slice returned is nil.
func (b *Buffer) FindAll(re any, start, end Loc) ([][]Loc, error) {
var matches [][]Loc
_, err := b.FindAllFunc(re, start, end, func(match []Loc) {
matches = append(matches, match)
})
return matches, err
}
// FindAllSubmatchFunc calls the function `f` once for each match between
// `start` and `end` of the regexp given by `re`. The argument of `f` is the
// slice containing the start and end positions of the match and all submatches
// (capturing groups). FindAllSubmatch Func returns the number of matches plus
// any error that occured when compiling the regexp.
func (b *Buffer) FindAllSubmatchFunc(re any, start, end Loc, f func([]Loc)) (int, error) {
return b.findAllFuncFunc(re, start, end, (*Buffer).FindDownSubmatch, f)
}
// FindAllSubmatch returns a slice containing the start and end positions of
// all matches and all submatches (capturing groups) between `start` and `end`
// of the regexp given by `re`, plus any error that occured when compiling
// the regexp. If no match is found, the slice returned is nil.
func (b *Buffer) FindAllSubmatch(re any, start, end Loc) ([][]Loc, error) {
var matches [][]Loc
_, err := b.FindAllSubmatchFunc(re, start, end, func(match []Loc) {
matches = append(matches, match)
})
return matches, err
}
// MatchedStrings converts a slice containing start and end positions of
// matches or submatches to a slice containing the corresponding strings.
// Unused submatches are converted to empty strings.
func (b *Buffer) MatchedStrings(locs []Loc) []string {
strs := make([]string, len(locs)/2)
for i := 0; 2*i < len(locs); i += 2 {
if !locs[2*i].IsVoid() {
strs[i] = string(b.Substr(locs[2*i], locs[2*i+1]))
}
}
return strs
}
// FindNext finds the next occurrence of a given string in the buffer
// It returns the start and end location of the match (if found) and
// a boolean indicating if it was found
// May also return an error if the search regex is invalid
func (b *Buffer) FindNext(s string, start, end, from Loc, down bool, useRegex bool) ([2]Loc, bool, error) {
if s == "" {
return [2]Loc{}, false, nil
}
if !useRegex {
s = regexp.QuoteMeta(s)
}
if b.Settings["ignorecase"].(bool) {
s = "(?i)" + s
}
rgrp, err := NewRegexpGroup(s)
if err != nil {
return [2]Loc{}, false, err
}
var match []Loc
if down {
match, _ = b.FindDown(rgrp, from, end)
if match == nil {
match, _ = b.FindDown(rgrp, start, end)
}
} else {
match, _ = b.FindUp(rgrp, from, start)
if match == nil {
match, _ = b.FindUp(rgrp, end, start)
}
}
if match != nil {
return [2]Loc{match[0], match[1]}, true, nil
} else {
return [2]Loc{}, false, nil
}
}
func (b *Buffer) replaceAllFuncFunc(re any, start, end Loc, find bufferFind, repl func(match []Loc) []byte) (int, Loc, error) {
charsEnd := util.CharacterCount(b.LineBytes(end.Y))
var deltas []Delta
n, err := b.findAllFuncFunc(re, start, end, find, func(match []Loc) {
deltas = append(deltas, Delta{repl(match), match[0], match[1]})
})
if err != nil {
return -1, LocVoid(), err
}
b.MultipleReplace(deltas)
deltaX := util.CharacterCount(b.LineBytes(end.Y)) - charsEnd
return n, Loc{end.X + deltaX, end.Y}, nil
}
// ReplaceAll replaces all matches of the regexp `re` in the given area. The
// new text is obtained from `template` by replacing each variable with the
// corresponding submatch as in `Regexp.Expand`. The function returns the
// number of replacements made, the new end position and any error that
// occured during regexp compilation
func (b *Buffer) ReplaceAll(re any, start, end Loc, template []byte) (int, Loc, error) {
var replace []byte
find := func(b *Buffer, r any, start, end Loc) ([]Loc, error) {
return b.findDownFunc(r, start, end, func(re *regexp.Regexp, l []byte) []int {
match := re.FindSubmatchIndex(l)
if match == nil {
return nil
}
replace = re.Expand(nil, template, l, match)
return match[:2] // this way match[2:] is not transformed to Loc's
})
}
return b.replaceAllFuncFunc(re, start, end, find, func(match []Loc) []byte {
return replace
})
}
// ReplaceAllLiteral replaces all matches of the regexp `re` with `repl` in
// the given area. The function returns the number of replacements made, the
// new end position and any error that occured during regexp compilation
func (b *Buffer) ReplaceAllLiteral(re any, start, end Loc, repl []byte) (int, Loc, error) {
return b.ReplaceAllFunc(re, start, end, func([]Loc) []byte {
return repl
})
}
// ReplaceAllFunc replaces all matches of the regexp `re` with `repl(match)`
// in the given area, where `match` is the slice containing start and end
// positions of the match. The function returns the number of replacements
// made, the new end position and any error that occured during regexp
// compilation
func (b *Buffer) ReplaceAllFunc(re any, start, end Loc, repl func(match []Loc) []byte) (int, Loc, error) {
return b.replaceAllFuncFunc(re, start, end, (*Buffer).FindDown, repl)
}
// ReplaceAllSubmatchFunc replaces all matches of the regexp `re` with
// `repl(match)` in the given area, where `match` is the slice containing
// start and end positions of the match and all submatches. The function
// returns the number of replacements made, the new end position and any
// error that occured during regexp compilation
func (b *Buffer) ReplaceAllSubmatchFunc(re any, start, end Loc, repl func(match []Loc) []byte) (int, Loc, error) {
return b.replaceAllFuncFunc(re, start, end, (*Buffer).FindDownSubmatch, repl)
}