tags.go 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164
  1. //
  2. // Copyright (C) 2017-2021 Marcus Rohrmoser, http://purl.mro.name/ShaarliGo
  3. //
  4. // This program is free software: you can redistribute it and/or modify
  5. // it under the terms of the GNU General Public License as published by
  6. // the Free Software Foundation, either version 3 of the License, or
  7. // (at your option) any later version.
  8. //
  9. // This program is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU General Public License for more details.
  13. //
  14. // You should have received a copy of the GNU General Public License
  15. // along with this program. If not, see <http://www.gnu.org/licenses/>.
  16. //
  17. package main
  18. import (
  19. "bufio"
  20. "sort"
  21. "strings"
  22. "unicode"
  23. "golang.org/x/text/transform"
  24. "golang.org/x/text/unicode/norm"
  25. )
  26. var emojiRunes map[rune]struct{}
  27. func init() {
  28. emojiRunes = make(map[rune]struct{}, len(emojiCodeMap))
  29. for _, v := range emojiCodeMap {
  30. r := []rune(v)[0]
  31. emojiRunes[r] = struct{}{}
  32. }
  33. emojiCodeMap = nil
  34. }
  35. // https://stackoverflow.com/a/39425959
  36. // http://cldr-build.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5B%3Aemoji%3A%5D&g=emoji
  37. func isEmojiRune(ru rune) bool {
  38. return false ||
  39. ('\u20d0' <= ru && ru <= '\u20ff') || // Combining Diacritical Marks for Symbols
  40. ('\u2328' == ru) || // keyboard
  41. ('\u238c' <= ru && ru <= '\u2454') || // Misc items
  42. ('\u2600' <= ru && ru <= '\u26FF') || // Misc symbols
  43. ('\u2700' <= ru && ru <= '\u27BF') || // Dingbats
  44. ('\u2b50' == ru) || // star
  45. ('\uFE00' <= ru && ru <= '\uFE0F') || // Variation Selectors
  46. ('\U0001f018' <= ru && ru <= '\U0001f270') || // Various asian characters
  47. ('\U0001F1E6' <= ru && ru <= '\U0001F1FF') || // Regional country flags
  48. ('\U0001F300' <= ru && ru <= '\U0001F5FF') || // Misc Symbols and Pictographs
  49. ('\U0001F600' <= ru && ru <= '\U0001F64F') || // Emoticons
  50. ('\U0001F680' <= ru && ru <= '\U0001F6FF') || // Transport and Map
  51. ('\U0001F900' <= ru && ru <= '\U0001F9FF') // Supplemental Symbols and Pictographs
  52. }
  53. const tpf = '#'
  54. func myPunct(r rune) bool {
  55. switch r {
  56. case '@', '§', '†', tpf:
  57. return false
  58. default:
  59. return unicode.IsPunct(r)
  60. }
  61. }
  62. func isTag(tag string) string {
  63. for _, c := range tag {
  64. if tpf == c {
  65. tag = tag[1:]
  66. break
  67. }
  68. if isEmojiRune(c) {
  69. break
  70. }
  71. return ""
  72. }
  73. return strings.TrimFunc(tag, myPunct)
  74. }
  75. func tagsFromString(str string) []string {
  76. scanner := bufio.NewScanner(strings.NewReader(str))
  77. scanner.Split(bufio.ScanWords)
  78. ret := make([]string, 0, 10)
  79. tmp := make(map[string]struct{}, 10)
  80. tmp[""] = struct{}{}
  81. for scanner.Scan() {
  82. tag, _, _ := strings.Cut(scanner.Text(), "\u200b")
  83. tag = isTag(tag)
  84. if _, ok := tmp[tag]; ok {
  85. continue
  86. }
  87. ret = append(ret, tag)
  88. tmp[tag] = struct{}{}
  89. }
  90. return ret
  91. }
  92. // https://stackoverflow.com/a/26722698
  93. func fold(str string) string {
  94. tr := transform.Chain(norm.NFD, transform.RemoveFunc(func(r rune) bool {
  95. return unicode.Is(unicode.Mn, r) // Mn: nonspacing marks
  96. }), norm.NFC)
  97. // todo: chain lowercase + trim
  98. if result, _, err := transform.String(tr, str); err != nil {
  99. panic(err)
  100. } else {
  101. return strings.TrimSpace(strings.ToLower(result))
  102. }
  103. }
  104. func tagsVisitor(tags ...string) func(func(string)) {
  105. return func(callback func(string)) {
  106. for _, tag := range tags {
  107. callback(tag)
  108. }
  109. }
  110. }
  111. func tagsNormalise(ds string, ex string, tavi func(func(string)), knovi func(func(string))) (description string, extended string, tags []string) {
  112. knodi := make(map[string]string, 1000)
  113. knovi(func(tag string) { knodi[fold(tag)] = tag })
  114. tags = make([]string, 0, 20)
  115. // 1. iterate text tags
  116. tadi := make(map[string]string, 20)
  117. tadi[""] = ""
  118. add := func(tag string) string {
  119. k := fold(tag)
  120. if _, ok := tadi[k]; ok {
  121. return ""
  122. }
  123. if v, ok := knodi[k]; ok {
  124. tag = v
  125. }
  126. tadi[k] = tag
  127. tags = append(tags, tag) // updating the reference correctly?
  128. return tag
  129. }
  130. for _, tag := range append(tagsFromString(ds), tagsFromString(ex)...) {
  131. add(tag)
  132. }
  133. // 2. visit all previous tags and add missing ones to tadi and extended
  134. tavi(func(tag string) {
  135. if t := add(tag); t == "" {
  136. return
  137. }
  138. ex += " #" + tag // todo: skip superfluous # before emojis
  139. })
  140. description = strings.TrimSpace(ds)
  141. extended = strings.TrimSpace(ex)
  142. sort.Strings(tags)
  143. return
  144. }