fuzzysearch.inl 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122
  1. // Returns true if each character in pattern is found sequentially within str
  2. // https://github.com/forrestthewoods/lib_fts
  3. bool fuzzy_match(char const * pattern, char const * str)
  4. {
  5. while (*pattern != '\0' && *str != '\0') {
  6. if (tolower(*pattern) == tolower(*str))
  7. ++pattern;
  8. ++str;
  9. }
  10. return *pattern == '\0' ? true : false;
  11. }
  12. bool fuzzy_match(char const * pattern, char const * str, int & outScore)
  13. {
  14. // Score consts
  15. const int adjacency_bonus = 5; // bonus for adjacent matches
  16. const int separator_bonus = 10; // bonus if match occurs after a separator
  17. const int camel_bonus = 10; // bonus if match is uppercase and prev is lower
  18. const int leading_letter_penalty = -3; // penalty applied for every letter in str before the first match
  19. const int max_leading_letter_penalty = -9; // maximum penalty for leading letters
  20. const int unmatched_letter_penalty = -1; // penalty for every letter that doesn't matter
  21. // Loop variables
  22. int score = 0;
  23. char const * patternIter = pattern;
  24. char const * strIter = str;
  25. bool prevMatched = false;
  26. bool prevLower = false;
  27. bool prevSeparator = true; // true so if first letter match gets separator bonus
  28. // Use "best" matched letter if multiple string letters match the pattern
  29. char const * bestLetter = NULL;
  30. int bestLetterScore = 0;
  31. // Loop over strings
  32. while (*strIter != '\0')
  33. {
  34. const char patternLetter = *patternIter;
  35. const char strLetter = *strIter;
  36. bool nextMatch = *patternIter != '\0' && tolower(patternLetter) == tolower(strLetter);
  37. bool rematch = bestLetter && tolower(*bestLetter) == tolower(strLetter);
  38. bool advanced = nextMatch && bestLetter;
  39. bool patternRepeat = bestLetter && patternIter != '\0' && tolower(*bestLetter) == tolower(patternLetter);
  40. if (advanced || patternRepeat)
  41. {
  42. score += bestLetterScore;
  43. bestLetter = NULL;
  44. bestLetterScore = 0;
  45. }
  46. if (nextMatch || rematch)
  47. {
  48. int newScore = 0;
  49. // Apply penalty for each letter before the first pattern match
  50. if (patternIter == pattern)
  51. {
  52. int count = int(strIter - str);
  53. int penalty = std::max(leading_letter_penalty * count, max_leading_letter_penalty);
  54. score += penalty;
  55. }
  56. // Apply bonus for consecutive bonuses
  57. if (prevMatched)
  58. newScore += adjacency_bonus;
  59. // Apply bonus for matches after a separator
  60. if (prevSeparator)
  61. newScore += separator_bonus;
  62. // Apply bonus across camel case boundaries
  63. if (prevLower && isupper(strLetter))
  64. newScore += camel_bonus;
  65. // Update pattern iter IFF the next pattern letter was matched
  66. if (nextMatch)
  67. ++patternIter;
  68. // Update best letter in str which may be for a "next" letter or a rematch
  69. if (newScore >= bestLetterScore)
  70. {
  71. // Apply penalty for now skipped letter
  72. if (bestLetter != NULL)
  73. score += unmatched_letter_penalty;
  74. bestLetter = strIter;
  75. bestLetterScore = newScore;
  76. }
  77. prevMatched = true;
  78. }
  79. else
  80. {
  81. score += unmatched_letter_penalty;
  82. prevMatched = false;
  83. }
  84. // Separators should be more easily defined
  85. prevLower = islower(strLetter) != 0;
  86. prevSeparator = strLetter == '_' || strLetter == ' ';
  87. ++strIter;
  88. }
  89. // Apply score for last match
  90. if (bestLetter)
  91. score += bestLetterScore;
  92. // Did not match full pattern
  93. if (*patternIter != '\0')
  94. return false;
  95. outScore = score;
  96. return true;
  97. }