contextual_latiniser.go 1.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364
  1. // SPDX-FileCopyrightText: Adam Evyčędo
  2. //
  3. // SPDX-License-Identifier: AGPL-3.0-or-later
  4. package transformers
  5. import (
  6. "unicode/utf8"
  7. "golang.org/x/text/transform"
  8. )
  9. //nolint:gochecknoglobals
  10. var ZWJ = '\u200d'
  11. type ContextualLatiniser struct {
  12. Replace func(rune, rune) []rune
  13. PreviousRune rune
  14. }
  15. func (ContextualLatiniser) Reset() {}
  16. func (l ContextualLatiniser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
  17. for nSrc < len(src) {
  18. r, _ := utf8.DecodeRune(src[nSrc:])
  19. if r == utf8.RuneError {
  20. if !atEOF && !utf8.FullRune(src[nSrc:]) {
  21. err = transform.ErrShortSrc
  22. break
  23. }
  24. if nDst+3 > len(dst) {
  25. err = transform.ErrShortDst
  26. break
  27. }
  28. dst[nDst+0] = runeErrorString[0]
  29. dst[nDst+1] = runeErrorString[1]
  30. dst[nDst+2] = runeErrorString[2]
  31. nSrc++
  32. continue
  33. }
  34. replacement := l.Replace(l.PreviousRune, r)
  35. l.PreviousRune = r
  36. size := 0
  37. for _, r2 := range replacement {
  38. r2b := []byte(string(r2))
  39. size += len(r2b)
  40. }
  41. if nDst+size > len(dst) {
  42. err = transform.ErrShortDst
  43. break
  44. }
  45. for _, r2 := range replacement {
  46. r2b := []byte(string(r2))
  47. s := len(r2b)
  48. for i := 0; i < s; i++ {
  49. dst[nDst] = r2b[i]
  50. nDst++
  51. }
  52. nSrc++
  53. }
  54. }
  55. return
  56. }