glob.lua 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596
  1. local lpeg = vim.lpeg
  2. local P, S, V, R, B = lpeg.P, lpeg.S, lpeg.V, lpeg.R, lpeg.B
  3. local C, Cc, Ct, Cf, Cmt = lpeg.C, lpeg.Cc, lpeg.Ct, lpeg.Cf, lpeg.Cmt
  4. local M = {}
  5. local pathsep = P('/')
  6. --- Parses a raw glob into an |lua-lpeg| pattern.
  7. ---
  8. --- This uses glob semantics from LSP 3.17.0: https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#pattern
  9. ---
  10. --- Glob patterns can have the following syntax:
  11. --- - `*` to match one or more characters in a path segment
  12. --- - `?` to match on one character in a path segment
  13. --- - `**` to match any number of path segments, including none
  14. --- - `{}` to group conditions (e.g. `*.{ts,js}` matches TypeScript and JavaScript files)
  15. --- - `[]` to declare a range of characters to match in a path segment (e.g., `example.[0-9]` to match on `example.0`, `example.1`, …)
  16. --- - `[!...]` to negate a range of characters to match in a path segment (e.g., `example.[!0-9]` to match on `example.a`, `example.b`, but not `example.0`)
  17. ---
  18. ---@param pattern string The raw glob pattern
  19. ---@return vim.lpeg.Pattern pattern An |lua-lpeg| representation of the pattern
  20. function M.to_lpeg(pattern)
  21. local function class(inv, ranges)
  22. local patt = R(unpack(vim.tbl_map(table.concat, ranges)))
  23. if inv == '!' then
  24. patt = P(1) - patt
  25. end
  26. return patt
  27. end
  28. local function condlist(conds, after)
  29. return vim.iter(conds):fold(P(false), function(acc, cond)
  30. return acc + cond * after
  31. end)
  32. end
  33. local function mul(acc, m)
  34. return acc * m
  35. end
  36. local function star(stars, after)
  37. return (-after * (P(1) - pathsep)) ^ #stars * after
  38. end
  39. local function dstar(after)
  40. return (-after * P(1)) ^ 0 * after
  41. end
  42. -- luacheck: push ignore s
  43. local function cut(_s, idx, match)
  44. return idx, match
  45. end
  46. -- luacheck: pop
  47. --- @diagnostic disable-next-line: missing-fields
  48. local p = P({
  49. 'Pattern',
  50. Pattern = V('Elem') ^ -1 * V('End'),
  51. Elem = Cmt(
  52. Cf(
  53. (V('DStar') + V('Star') + V('Ques') + V('Class') + V('CondList') + V('Literal'))
  54. * (V('Elem') + V('End')),
  55. mul
  56. ),
  57. cut
  58. ),
  59. DStar = (B(pathsep) + -B(P(1)))
  60. * P('**')
  61. * (pathsep * (V('Elem') + V('End')) + V('End'))
  62. / dstar,
  63. Star = C(P('*') ^ 1) * (V('Elem') + V('End')) / star,
  64. Ques = P('?') * Cc(P(1) - pathsep),
  65. Class = P('[')
  66. * C(P('!') ^ -1)
  67. * Ct(Ct(C(P(1)) * P('-') * C(P(1) - P(']'))) ^ 1 * P(']'))
  68. / class,
  69. CondList = P('{') * Ct(V('Cond') * (P(',') * V('Cond')) ^ 0) * P('}') * V('Pattern') / condlist,
  70. -- TODO: '*' inside a {} condition is interpreted literally but should probably have the same
  71. -- wildcard semantics it usually has.
  72. -- Fixing this is non-trivial because '*' should match non-greedily up to "the rest of the
  73. -- pattern" which in all other cases is the entire succeeding part of the pattern, but at the end of a {}
  74. -- condition means "everything after the {}" where several other options separated by ',' may
  75. -- exist in between that should not be matched by '*'.
  76. Cond = Cmt(Cf((V('Ques') + V('Class') + V('Literal') - S(',}')) ^ 1, mul), cut) + Cc(P(0)),
  77. Literal = P(1) / P,
  78. End = P(-1) * Cc(P(-1)),
  79. })
  80. local lpeg_pattern = p:match(pattern) --[[@as vim.lpeg.Pattern?]]
  81. assert(lpeg_pattern, 'Invalid glob')
  82. return lpeg_pattern
  83. end
  84. return M