123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170 |
- // License: GPLv3 Copyright: 2022, Kovid Goyal, <kovid at kovidgoyal.net>
- package utils
- import (
- "fmt"
- "strings"
- "unicode/utf8"
- )
- var _ = fmt.Print
- func Capitalize(x string) string {
- if x == "" {
- return x
- }
- s, sz := utf8.DecodeRuneInString(x)
- cr := strings.ToUpper(string(s))
- return cr + x[sz:]
- }
- type ScanLines struct {
- entries []string
- scanner *StringScanner
- }
- func NewScanLines(entries ...string) *ScanLines {
- return &ScanLines{entries: entries}
- }
- func (self *ScanLines) Scan() bool {
- if self.scanner == nil {
- if len(self.entries) == 0 {
- return false
- }
- self.scanner = NewLineScanner(self.entries[0])
- self.entries = self.entries[1:]
- return self.Scan()
- } else {
- if self.scanner.Scan() {
- return true
- }
- self.scanner = nil
- return self.Scan()
- }
- }
- func (self *ScanLines) Text() string {
- if self.scanner == nil {
- return ""
- }
- return self.scanner.Text()
- }
- type StringScannerScanFunc = func(data string) (remaining_data, token string)
- type StringScannerPostprocessFunc = func(token string) string
- func ScanFuncForSeparator(sep string) StringScannerScanFunc {
- if len(sep) == 1 {
- sb := sep[0]
- return func(data string) (remaining_data, token string) {
- idx := strings.IndexByte(data, sb)
- if idx < 0 {
- return "", data
- }
- return data[idx+len(sep):], data[:idx]
- }
- }
- return func(data string) (remaining_data, token string) {
- idx := strings.Index(data, sep)
- if idx < 0 {
- return "", data
- }
- return data[idx+len(sep):], data[:idx]
- }
- }
- // Faster, better designed, zero-allocation version of bufio.Scanner for strings
- type StringScanner struct {
- ScanFunc StringScannerScanFunc
- PostProcessTokenFunc StringScannerPostprocessFunc
- data string
- token string
- }
- func (self *StringScanner) Scan() bool {
- if self.data == "" {
- self.token = ""
- return false
- }
- self.data, self.token = self.ScanFunc(self.data)
- if self.PostProcessTokenFunc != nil {
- self.token = self.PostProcessTokenFunc(self.token)
- }
- return true
- }
- func (self *StringScanner) Err() error { return nil }
- func (self *StringScanner) Text() string {
- return self.token
- }
- func (self *StringScanner) Split(data string, expected_number ...int) (ans []string) {
- if len(expected_number) != 0 {
- ans = make([]string, 0, expected_number[0])
- } else {
- ans = []string{}
- }
- self.data = data
- for self.Scan() {
- ans = append(ans, self.Text())
- }
- return
- }
- func NewLineScanner(text string) *StringScanner {
- return &StringScanner{
- data: text, ScanFunc: ScanFuncForSeparator("\n"),
- PostProcessTokenFunc: func(s string) string {
- if len(s) > 0 && s[len(s)-1] == '\r' {
- s = s[:len(s)-1]
- }
- return s
- },
- }
- }
- func NewSeparatorScanner(text, separator string) *StringScanner {
- return &StringScanner{
- data: text, ScanFunc: ScanFuncForSeparator(separator),
- }
- }
- func Splitlines(x string, expected_number_of_lines ...int) (ans []string) {
- return NewLineScanner("").Split(x, expected_number_of_lines...)
- }
- // Return a function that can be called sequentially with rune based offsets
- // converting them to byte based offsets. The rune offsets must be monotonic,
- // otherwise the function returns -1
- func RuneOffsetsToByteOffsets(text string) func(int) int {
- self := struct {
- char_offset, byte_offset, last int
- bytes []byte
- }{bytes: UnsafeStringToBytes(text)}
- return func(x int) (sz int) {
- switch {
- case x == self.last:
- return self.byte_offset
- case x < self.last:
- return -1
- }
- self.last = x
- x -= self.char_offset
- for x > 0 {
- _, d := utf8.DecodeRune(self.bytes)
- sz += d
- self.bytes = self.bytes[d:]
- x--
- self.char_offset++
- }
- self.byte_offset += sz
- return self.byte_offset
- }
- }
|