strings.go 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170
  1. // License: GPLv3 Copyright: 2022, Kovid Goyal, <kovid at kovidgoyal.net>
  2. package utils
  3. import (
  4. "fmt"
  5. "strings"
  6. "unicode/utf8"
  7. )
  8. var _ = fmt.Print
  9. func Capitalize(x string) string {
  10. if x == "" {
  11. return x
  12. }
  13. s, sz := utf8.DecodeRuneInString(x)
  14. cr := strings.ToUpper(string(s))
  15. return cr + x[sz:]
  16. }
  17. type ScanLines struct {
  18. entries []string
  19. scanner *StringScanner
  20. }
  21. func NewScanLines(entries ...string) *ScanLines {
  22. return &ScanLines{entries: entries}
  23. }
  24. func (self *ScanLines) Scan() bool {
  25. if self.scanner == nil {
  26. if len(self.entries) == 0 {
  27. return false
  28. }
  29. self.scanner = NewLineScanner(self.entries[0])
  30. self.entries = self.entries[1:]
  31. return self.Scan()
  32. } else {
  33. if self.scanner.Scan() {
  34. return true
  35. }
  36. self.scanner = nil
  37. return self.Scan()
  38. }
  39. }
  40. func (self *ScanLines) Text() string {
  41. if self.scanner == nil {
  42. return ""
  43. }
  44. return self.scanner.Text()
  45. }
  46. type StringScannerScanFunc = func(data string) (remaining_data, token string)
  47. type StringScannerPostprocessFunc = func(token string) string
  48. func ScanFuncForSeparator(sep string) StringScannerScanFunc {
  49. if len(sep) == 1 {
  50. sb := sep[0]
  51. return func(data string) (remaining_data, token string) {
  52. idx := strings.IndexByte(data, sb)
  53. if idx < 0 {
  54. return "", data
  55. }
  56. return data[idx+len(sep):], data[:idx]
  57. }
  58. }
  59. return func(data string) (remaining_data, token string) {
  60. idx := strings.Index(data, sep)
  61. if idx < 0 {
  62. return "", data
  63. }
  64. return data[idx+len(sep):], data[:idx]
  65. }
  66. }
  67. // Faster, better designed, zero-allocation version of bufio.Scanner for strings
  68. type StringScanner struct {
  69. ScanFunc StringScannerScanFunc
  70. PostProcessTokenFunc StringScannerPostprocessFunc
  71. data string
  72. token string
  73. }
  74. func (self *StringScanner) Scan() bool {
  75. if self.data == "" {
  76. self.token = ""
  77. return false
  78. }
  79. self.data, self.token = self.ScanFunc(self.data)
  80. if self.PostProcessTokenFunc != nil {
  81. self.token = self.PostProcessTokenFunc(self.token)
  82. }
  83. return true
  84. }
  85. func (self *StringScanner) Err() error { return nil }
  86. func (self *StringScanner) Text() string {
  87. return self.token
  88. }
  89. func (self *StringScanner) Split(data string, expected_number ...int) (ans []string) {
  90. if len(expected_number) != 0 {
  91. ans = make([]string, 0, expected_number[0])
  92. } else {
  93. ans = []string{}
  94. }
  95. self.data = data
  96. for self.Scan() {
  97. ans = append(ans, self.Text())
  98. }
  99. return
  100. }
  101. func NewLineScanner(text string) *StringScanner {
  102. return &StringScanner{
  103. data: text, ScanFunc: ScanFuncForSeparator("\n"),
  104. PostProcessTokenFunc: func(s string) string {
  105. if len(s) > 0 && s[len(s)-1] == '\r' {
  106. s = s[:len(s)-1]
  107. }
  108. return s
  109. },
  110. }
  111. }
  112. func NewSeparatorScanner(text, separator string) *StringScanner {
  113. return &StringScanner{
  114. data: text, ScanFunc: ScanFuncForSeparator(separator),
  115. }
  116. }
  117. func Splitlines(x string, expected_number_of_lines ...int) (ans []string) {
  118. return NewLineScanner("").Split(x, expected_number_of_lines...)
  119. }
  120. // Return a function that can be called sequentially with rune based offsets
  121. // converting them to byte based offsets. The rune offsets must be monotonic,
  122. // otherwise the function returns -1
  123. func RuneOffsetsToByteOffsets(text string) func(int) int {
  124. self := struct {
  125. char_offset, byte_offset, last int
  126. bytes []byte
  127. }{bytes: UnsafeStringToBytes(text)}
  128. return func(x int) (sz int) {
  129. switch {
  130. case x == self.last:
  131. return self.byte_offset
  132. case x < self.last:
  133. return -1
  134. }
  135. self.last = x
  136. x -= self.char_offset
  137. for x > 0 {
  138. _, d := utf8.DecodeRune(self.bytes)
  139. sz += d
  140. self.bytes = self.bytes[d:]
  141. x--
  142. self.char_offset++
  143. }
  144. self.byte_offset += sz
  145. return self.byte_offset
  146. }
  147. }