fuzzysearch.nim 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142
  1. # A Fuzzy Match implementation inspired by the sublime text fuzzy match algorithm
  2. # as described here: https://blog.forrestthewoods.com/reverse-engineering-sublime-text-s-fuzzy-match-4cffeed33fdb
  3. # Heavily modified to provide more subjectively useful results
  4. # for on the Nim manual.
  5. #
  6. import strutils
  7. import math
  8. const
  9. MaxUnmatchedLeadingChar = 3
  10. ## Maximum number of times the penalty for unmatched leading chars is applied.
  11. HeadingScaleFactor = 0.5
  12. ## The score from before the colon Char is multiplied by this.
  13. ## This is to weight function signatures and descriptions over module titles.
  14. type
  15. ScoreCard = enum
  16. StartMatch = -100 ## Start matching.
  17. LeadingCharDiff = -3 ## An unmatched, leading character was found.
  18. CharDiff = -1 ## An unmatched character was found.
  19. CharMatch = 0 ## A matched character was found.
  20. ConsecutiveMatch = 5 ## A consecutive match was found.
  21. LeadingCharMatch = 10 ## The character matches the beginning of the
  22. ## string or the first character of a word
  23. ## or camel case boundary.
  24. WordBoundryMatch = 20 ## The last ConsecutiveCharMatch that
  25. ## immediately precedes the end of the string,
  26. ## end of the pattern, or a LeadingCharMatch.
  27. proc fuzzyMatch*(pattern, str: cstring) : tuple[score: int, matched: bool] =
  28. var
  29. scoreState = StartMatch
  30. headerMatched = false
  31. unmatchedLeadingCharCount = 0
  32. consecutiveMatchCount = 0
  33. strIndex = 0
  34. patIndex = 0
  35. score = 0
  36. template transition(nextState) =
  37. scoreState = nextState
  38. score += ord(scoreState)
  39. while (strIndex < str.len) and (patIndex < pattern.len):
  40. var
  41. patternChar = pattern[patIndex].toLowerAscii
  42. strChar = str[strIndex].toLowerAscii
  43. # Ignore certain characters
  44. if patternChar in {'_', ' ', '.'}:
  45. patIndex += 1
  46. continue
  47. if strChar in {'_', ' ', '.'}:
  48. strIndex += 1
  49. continue
  50. # Since this algorithm will be used to search against Nim documentation,
  51. # the below logic prioritizes headers.
  52. if not headerMatched and strChar == ':':
  53. headerMatched = true
  54. scoreState = StartMatch
  55. score = int(floor(HeadingScaleFactor * float(score)))
  56. patIndex = 0
  57. strIndex += 1
  58. continue
  59. if strChar == patternChar:
  60. case scoreState
  61. of StartMatch, WordBoundryMatch:
  62. scoreState = LeadingCharMatch
  63. of CharMatch:
  64. transition(ConsecutiveMatch)
  65. of LeadingCharMatch, ConsecutiveMatch:
  66. consecutiveMatchCount += 1
  67. scoreState = ConsecutiveMatch
  68. score += ord(ConsecutiveMatch) * consecutiveMatchCount
  69. if scoreState == LeadingCharMatch:
  70. score += ord(LeadingCharMatch)
  71. var onBoundary = (patIndex == high(pattern))
  72. if not onBoundary and strIndex < high(str):
  73. let
  74. nextPatternChar = toLowerAscii(pattern[patIndex + 1])
  75. nextStrChar = toLowerAscii(str[strIndex + 1])
  76. onBoundary = (
  77. nextStrChar notin {'a'..'z'} and
  78. nextStrChar != nextPatternChar
  79. )
  80. if onBoundary:
  81. transition(WordBoundryMatch)
  82. of CharDiff, LeadingCharDiff:
  83. var isLeadingChar = (
  84. str[strIndex - 1] notin Letters or
  85. str[strIndex - 1] in {'a'..'z'} and
  86. str[strIndex] in {'A'..'Z'}
  87. )
  88. if isLeadingChar:
  89. scoreState = LeadingCharMatch
  90. #a non alpha or a camel case transition counts as a leading char.
  91. # Transition the state, but don't give the bonus yet; wait until we verify a consecutive match.
  92. else:
  93. transition(CharMatch)
  94. patIndex += 1
  95. else:
  96. case scoreState
  97. of StartMatch:
  98. transition(LeadingCharDiff)
  99. of ConsecutiveMatch:
  100. transition(CharDiff)
  101. consecutiveMatchCount = 0
  102. of LeadingCharDiff:
  103. if unmatchedLeadingCharCount < MaxUnmatchedLeadingChar:
  104. transition(LeadingCharDiff)
  105. unmatchedLeadingCharCount += 1
  106. else:
  107. transition(CharDiff)
  108. strIndex += 1
  109. if patIndex == pattern.len and (strIndex == str.len or str[strIndex] notin Letters):
  110. score += 10
  111. result = (
  112. score: max(0, score),
  113. matched: (score > 0),
  114. )