mirror of
https://github.com/zyedidia/micro.git
synced 2025-06-18 14:55:38 -04:00
Final touches for combining character support
This commit is contained in:
parent
79c0ea17ad
commit
ff0683d6d0
@ -1,15 +1,11 @@
|
||||
package util
|
||||
|
||||
import (
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// LuaRuneAt is a helper function for lua plugins to return the rune
|
||||
// at an index within a string
|
||||
func LuaRuneAt(str string, runeidx int) string {
|
||||
i := 0
|
||||
for len(str) > 0 {
|
||||
r, size := utf8.DecodeRuneInString(str)
|
||||
r, _, size := DecodeCharacterInString(str)
|
||||
|
||||
str = str[size:]
|
||||
|
||||
@ -26,7 +22,7 @@ func LuaRuneAt(str string, runeidx int) string {
|
||||
func LuaGetLeadingWhitespace(s string) string {
|
||||
ws := []byte{}
|
||||
for len(s) > 0 {
|
||||
r, size := utf8.DecodeRuneInString(s)
|
||||
r, _, size := DecodeCharacterInString(s)
|
||||
if r == ' ' || r == '\t' {
|
||||
ws = append(ws, byte(r))
|
||||
} else {
|
||||
@ -40,6 +36,6 @@ func LuaGetLeadingWhitespace(s string) string {
|
||||
|
||||
// LuaIsWordChar returns true if the first rune in a string is a word character
|
||||
func LuaIsWordChar(s string) bool {
|
||||
r, _ := utf8.DecodeRuneInString(s)
|
||||
r, _, _ := DecodeCharacterInString(s)
|
||||
return IsWordChar(r)
|
||||
}
|
||||
|
@ -5,6 +5,17 @@ import (
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// Unicode is annoying. A "code point" (rune in Go-speak) may need up to
|
||||
// 4 bytes to represent it. In general, a code point will represent a
|
||||
// complete character, but this is not always the case. A character with
|
||||
// accents may be made up of multiple code points (the code point for the
|
||||
// original character, and additional code points for each accent/marking).
|
||||
// The functions below are meant to help deal with these additional "combining"
|
||||
// code points. In underlying operations (search, replace, etc...), micro will
|
||||
// treat a character with combining code points as just the original code point.
|
||||
// For rendering, micro will display the combining characters. It's not perfect
|
||||
// but it's pretty good.
|
||||
|
||||
// combining character range table
|
||||
var combining = &unicode.RangeTable{
|
||||
R16: []unicode.Range16{
|
||||
@ -35,6 +46,25 @@ func DecodeCharacter(b []byte) (rune, []rune, int) {
|
||||
return r, combc, size
|
||||
}
|
||||
|
||||
// DecodeCharacterInString returns the next character from a string
|
||||
// A character is a rune along with any accompanying combining runes
|
||||
func DecodeCharacterInString(str string) (rune, []rune, int) {
|
||||
r, size := utf8.DecodeRuneInString(str)
|
||||
str = str[size:]
|
||||
c, s := utf8.DecodeRuneInString(str)
|
||||
|
||||
var combc []rune
|
||||
for unicode.In(c, combining) {
|
||||
combc = append(combc, c)
|
||||
size += s
|
||||
|
||||
str = str[s:]
|
||||
c, s = utf8.DecodeRuneInString(str)
|
||||
}
|
||||
|
||||
return r, combc, size
|
||||
}
|
||||
|
||||
// CharacterCount returns the number of characters in a byte array
|
||||
// Similar to utf8.RuneCount but for unicode characters
|
||||
func CharacterCount(b []byte) int {
|
||||
|
@ -13,7 +13,6 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/blang/semver"
|
||||
runewidth "github.com/mattn/go-runewidth"
|
||||
@ -82,7 +81,7 @@ func SliceEndStr(str string, index int) string {
|
||||
return str[totalSize:]
|
||||
}
|
||||
|
||||
_, size := utf8.DecodeRuneInString(str[totalSize:])
|
||||
_, _, size := DecodeCharacterInString(str[totalSize:])
|
||||
totalSize += size
|
||||
i++
|
||||
}
|
||||
@ -119,7 +118,7 @@ func SliceStartStr(str string, index int) string {
|
||||
return str[:totalSize]
|
||||
}
|
||||
|
||||
_, size := utf8.DecodeRuneInString(str[totalSize:])
|
||||
_, _, size := DecodeCharacterInString(str[totalSize:])
|
||||
totalSize += size
|
||||
i++
|
||||
}
|
||||
|
@ -3,7 +3,6 @@ package highlight
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
func sliceStart(slc []byte, index int) []byte {
|
||||
@ -15,7 +14,7 @@ func sliceStart(slc []byte, index int) []byte {
|
||||
return slc[totalSize:]
|
||||
}
|
||||
|
||||
_, size := utf8.DecodeRune(slc[totalSize:])
|
||||
_, _, size := DecodeCharacter(slc[totalSize:])
|
||||
totalSize += size
|
||||
i++
|
||||
}
|
||||
@ -32,7 +31,7 @@ func sliceEnd(slc []byte, index int) []byte {
|
||||
return slc[:totalSize]
|
||||
}
|
||||
|
||||
_, size := utf8.DecodeRune(slc[totalSize:])
|
||||
_, _, size := DecodeCharacter(slc[totalSize:])
|
||||
totalSize += size
|
||||
i++
|
||||
}
|
||||
@ -47,9 +46,9 @@ func runePos(p int, str []byte) int {
|
||||
return 0
|
||||
}
|
||||
if p >= len(str) {
|
||||
return utf8.RuneCount(str)
|
||||
return CharacterCount(str)
|
||||
}
|
||||
return utf8.RuneCount(str[:p])
|
||||
return CharacterCount(str[:p])
|
||||
}
|
||||
|
||||
func combineLineMatch(src, dst LineMatch) LineMatch {
|
||||
@ -112,7 +111,7 @@ func findIndex(regex *regexp.Regexp, skip *regexp.Regexp, str []byte, canMatchSt
|
||||
var strbytes []byte
|
||||
if skip != nil {
|
||||
strbytes = skip.ReplaceAllFunc(str, func(match []byte) []byte {
|
||||
res := make([]byte, utf8.RuneCount(match))
|
||||
res := make([]byte, CharacterCount(match))
|
||||
return res
|
||||
})
|
||||
} else {
|
||||
@ -148,7 +147,7 @@ func findAllIndex(regex *regexp.Regexp, str []byte, canMatchStart, canMatchEnd b
|
||||
}
|
||||
|
||||
func (h *Highlighter) highlightRegion(highlights LineMatch, start int, canMatchEnd bool, lineNum int, line []byte, curRegion *region, statesOnly bool) LineMatch {
|
||||
lineLen := utf8.RuneCount(line)
|
||||
lineLen := CharacterCount(line)
|
||||
if start == 0 {
|
||||
if !statesOnly {
|
||||
if _, ok := highlights[0]; !ok {
|
||||
@ -236,7 +235,7 @@ func (h *Highlighter) highlightRegion(highlights LineMatch, start int, canMatchE
|
||||
}
|
||||
|
||||
func (h *Highlighter) highlightEmptyRegion(highlights LineMatch, start int, canMatchEnd bool, lineNum int, line []byte, statesOnly bool) LineMatch {
|
||||
lineLen := utf8.RuneCount(line)
|
||||
lineLen := CharacterCount(line)
|
||||
if lineLen == 0 {
|
||||
if canMatchEnd {
|
||||
h.lastRegion = nil
|
||||
|
86
pkg/highlight/unicode.go
Normal file
86
pkg/highlight/unicode.go
Normal file
@ -0,0 +1,86 @@
|
||||
package highlight
|
||||
|
||||
import (
|
||||
"unicode"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// combining character range table
|
||||
var combining = &unicode.RangeTable{
|
||||
R16: []unicode.Range16{
|
||||
{0x0300, 0x036f, 1}, // combining diacritical marks
|
||||
{0x1ab0, 0x1aff, 1}, // combining diacritical marks extended
|
||||
{0x1dc0, 0x1dff, 1}, // combining diacritical marks supplement
|
||||
{0x20d0, 0x20ff, 1}, // combining diacritical marks for symbols
|
||||
{0xfe20, 0xfe2f, 1}, // combining half marks
|
||||
},
|
||||
}
|
||||
|
||||
// DecodeCharacter returns the next character from an array of bytes
|
||||
// A character is a rune along with any accompanying combining runes
|
||||
func DecodeCharacter(b []byte) (rune, []rune, int) {
|
||||
r, size := utf8.DecodeRune(b)
|
||||
b = b[size:]
|
||||
c, s := utf8.DecodeRune(b)
|
||||
|
||||
var combc []rune
|
||||
for unicode.In(c, combining) {
|
||||
combc = append(combc, c)
|
||||
size += s
|
||||
|
||||
b = b[s:]
|
||||
c, s = utf8.DecodeRune(b)
|
||||
}
|
||||
|
||||
return r, combc, size
|
||||
}
|
||||
|
||||
// DecodeCharacterInString returns the next character from a string
|
||||
// A character is a rune along with any accompanying combining runes
|
||||
func DecodeCharacterInString(str string) (rune, []rune, int) {
|
||||
r, size := utf8.DecodeRuneInString(str)
|
||||
str = str[size:]
|
||||
c, s := utf8.DecodeRuneInString(str)
|
||||
|
||||
var combc []rune
|
||||
for unicode.In(c, combining) {
|
||||
combc = append(combc, c)
|
||||
size += s
|
||||
|
||||
str = str[s:]
|
||||
c, s = utf8.DecodeRuneInString(str)
|
||||
}
|
||||
|
||||
return r, combc, size
|
||||
}
|
||||
|
||||
// CharacterCount returns the number of characters in a byte array
|
||||
// Similar to utf8.RuneCount but for unicode characters
|
||||
func CharacterCount(b []byte) int {
|
||||
s := 0
|
||||
|
||||
for len(b) > 0 {
|
||||
r, size := utf8.DecodeRune(b)
|
||||
if !unicode.In(r, combining) {
|
||||
s++
|
||||
}
|
||||
|
||||
b = b[size:]
|
||||
}
|
||||
|
||||
return s
|
||||
}
|
||||
|
||||
// CharacterCount returns the number of characters in a string
|
||||
// Similar to utf8.RuneCountInString but for unicode characters
|
||||
func CharacterCountInString(str string) int {
|
||||
s := 0
|
||||
|
||||
for _, r := range str {
|
||||
if !unicode.In(r, combining) {
|
||||
s++
|
||||
}
|
||||
}
|
||||
|
||||
return s
|
||||
}
|
Loading…
Reference in New Issue
Block a user