languagetree.lua 38 KB


  1. --- @brief A [LanguageTree]() contains a tree of parsers: the root treesitter parser for {lang} and
  2. --- any "injected" language parsers, which themselves may inject other languages, recursively.
  3. --- For example a Lua buffer containing some Vimscript commands needs multiple parsers to fully
  4. --- understand its contents.
  5. ---
  6. --- To create a LanguageTree (parser object) for a given buffer and language, use:
  7. ---
  8. --- ```lua
  9. --- local parser = vim.treesitter.get_parser(bufnr, lang)
  10. --- ```
  11. ---
  12. --- (where `bufnr=0` means current buffer). `lang` defaults to 'filetype'.
  13. --- Note: currently the parser is retained for the lifetime of a buffer but this may change;
  14. --- a plugin should keep a reference to the parser object if it wants incremental updates.
  15. ---
  16. --- Whenever you need to access the current syntax tree, parse the buffer:
  17. ---
  18. --- ```lua
  19. --- local tree = parser:parse({ start_row, end_row })
  20. --- ```
  21. ---
  22. --- This returns a table of immutable |treesitter-tree| objects representing the current state of
  23. --- the buffer. When the plugin wants to access the state after a (possible) edit it must call
  24. --- `parse()` again. If the buffer wasn't edited, the same tree will be returned again without extra
  25. --- work. If the buffer was parsed before, incremental parsing will be done of the changed parts.
  26. ---
  27. --- Note: To use the parser directly inside a |nvim_buf_attach()| Lua callback, you must call
  28. --- |vim.treesitter.get_parser()| before you register your callback. But preferably parsing
  29. --- shouldn't be done directly in the change callback anyway as they will be very frequent. Rather
  30. --- a plugin that does any kind of analysis on a tree should use a timer to throttle too frequent
  31. --- updates.
  32. ---
  33. -- Debugging:
  34. --
  35. -- vim.g.__ts_debug levels:
  36. -- - 1. Messages from languagetree.lua
  37. -- - 2. Parse messages from treesitter
  38. -- - 2. Lex messages from treesitter
  39. --
  40. -- Log file can be found in stdpath('log')/treesitter.log
  41. local query = require('vim.treesitter.query')
  42. local language = require('vim.treesitter.language')
  43. local Range = require('vim.treesitter._range')
  44. local default_parse_timeout_ms = 3
  45. ---@alias TSCallbackName
  46. ---| 'changedtree'
  47. ---| 'bytes'
  48. ---| 'detach'
  49. ---| 'child_added'
  50. ---| 'child_removed'
  51. ---@alias TSCallbackNameOn
  52. ---| 'on_changedtree'
  53. ---| 'on_bytes'
  54. ---| 'on_detach'
  55. ---| 'on_child_added'
  56. ---| 'on_child_removed'
  57. --- @type table<TSCallbackNameOn,TSCallbackName>
  58. local TSCallbackNames = {
  59. on_changedtree = 'changedtree',
  60. on_bytes = 'bytes',
  61. on_detach = 'detach',
  62. on_child_added = 'child_added',
  63. on_child_removed = 'child_removed',
  64. }
  65. ---@nodoc
  66. ---@class vim.treesitter.LanguageTree
  67. ---@field private _callbacks table<TSCallbackName,function[]> Callback handlers
  68. ---@field package _callbacks_rec table<TSCallbackName,function[]> Callback handlers (recursive)
  69. ---@field private _children table<string,vim.treesitter.LanguageTree> Injected languages
  70. ---@field private _injection_query vim.treesitter.Query Queries defining injected languages
  71. ---@field private _injections_processed boolean
  72. ---@field private _opts table Options
  73. ---@field private _parser TSParser Parser for language
  74. ---Table of regions for which the tree is currently running an async parse
  75. ---@field private _ranges_being_parsed table<string, boolean>
  76. ---Table of callback queues, keyed by each region for which the callbacks should be run
  77. ---@field private _cb_queues table<string, fun(err?: string, trees?: table<integer, TSTree>)[]>
  78. ---@field private _has_regions boolean
  79. ---@field private _regions table<integer, Range6[]>?
  80. ---List of regions this tree should manage and parse. If nil then regions are
  81. ---taken from _trees. This is mostly a short-lived cache for included_regions()
  82. ---@field private _lang string Language name
  83. ---@field private _parent? vim.treesitter.LanguageTree Parent LanguageTree
  84. ---@field private _source (integer|string) Buffer or string to parse
  85. ---@field private _trees table<integer, TSTree> Reference to parsed tree (one for each language).
  86. ---Each key is the index of region, which is synced with _regions and _valid.
  87. ---@field private _valid boolean|table<integer,boolean> If the parsed tree is valid
  88. ---@field private _logger? fun(logtype: string, msg: string)
  89. ---@field private _logfile? file*
  90. local LanguageTree = {}
  91. ---Optional arguments:
  92. ---@class vim.treesitter.LanguageTree.new.Opts
  93. ---@inlinedoc
  94. ---@field queries? table<string,string> -- Deprecated
  95. ---@field injections? table<string,string>
  96. LanguageTree.__index = LanguageTree
  97. --- @nodoc
  98. ---
  99. --- LanguageTree contains a tree of parsers: the root treesitter parser for {lang} and any
  100. --- "injected" language parsers, which themselves may inject other languages, recursively.
  101. ---
  102. ---@param source (integer|string) Buffer or text string to parse
  103. ---@param lang string Root language of this tree
  104. ---@param opts vim.treesitter.LanguageTree.new.Opts?
  105. ---@return vim.treesitter.LanguageTree parser object
  106. function LanguageTree.new(source, lang, opts)
  107. assert(language.add(lang))
  108. opts = opts or {}
  109. if source == 0 then
  110. source = vim.api.nvim_get_current_buf()
  111. end
  112. local injections = opts.injections or {}
  113. --- @class vim.treesitter.LanguageTree
  114. local self = {
  115. _source = source,
  116. _lang = lang,
  117. _children = {},
  118. _trees = {},
  119. _opts = opts,
  120. _injection_query = injections[lang] and query.parse(lang, injections[lang])
  121. or query.get(lang, 'injections'),
  122. _has_regions = false,
  123. _injections_processed = false,
  124. _valid = false,
  125. _parser = vim._create_ts_parser(lang),
  126. _ranges_being_parsed = {},
  127. _cb_queues = {},
  128. _callbacks = {},
  129. _callbacks_rec = {},
  130. }
  131. setmetatable(self, LanguageTree)
  132. if vim.g.__ts_debug and type(vim.g.__ts_debug) == 'number' then
  133. self:_set_logger()
  134. self:_log('START')
  135. end
  136. for _, name in pairs(TSCallbackNames) do
  137. self._callbacks[name] = {}
  138. self._callbacks_rec[name] = {}
  139. end
  140. return self
  141. end
  142. --- @private
  143. function LanguageTree:_set_logger()
  144. local source = self:source()
  145. source = type(source) == 'string' and 'text' or tostring(source)
  146. local lang = self:lang()
  147. local logdir = vim.fn.stdpath('log') --[[@as string]]
  148. vim.fn.mkdir(logdir, 'p')
  149. local logfilename = vim.fs.joinpath(logdir, 'treesitter.log')
  150. local logfile, openerr = io.open(logfilename, 'a+')
  151. if not logfile or openerr then
  152. error(string.format('Could not open file (%s) for logging: %s', logfilename, openerr))
  153. return
  154. end
  155. self._logfile = logfile
  156. self._logger = function(logtype, msg)
  157. self._logfile:write(string.format('%s:%s:(%s) %s\n', source, lang, logtype, msg))
  158. self._logfile:flush()
  159. end
  160. local log_lex = vim.g.__ts_debug >= 3
  161. local log_parse = vim.g.__ts_debug >= 2
  162. self._parser:_set_logger(log_lex, log_parse, self._logger)
  163. end
  164. ---Measure execution time of a function
  165. ---@generic R1, R2, R3
  166. ---@param f fun(): R1, R2, R3
  167. ---@return number, R1, R2, R3
  168. local function tcall(f, ...)
  169. local start = vim.uv.hrtime()
  170. ---@diagnostic disable-next-line
  171. local r = { f(...) }
  172. --- @type number
  173. local duration = (vim.uv.hrtime() - start) / 1000000
  174. --- @diagnostic disable-next-line: redundant-return-value
  175. return duration, unpack(r)
  176. end
  177. ---@private
  178. ---@param ... any
  179. function LanguageTree:_log(...)
  180. if not self._logger then
  181. return
  182. end
  183. if not vim.g.__ts_debug or vim.g.__ts_debug < 1 then
  184. return
  185. end
  186. local args = { ... }
  187. if type(args[1]) == 'function' then
  188. args = { args[1]() }
  189. end
  190. local info = debug.getinfo(2, 'nl')
  191. local nregions = vim.tbl_count(self:included_regions())
  192. local prefix =
  193. string.format('%s:%d: (#regions=%d) ', info.name or '???', info.currentline or 0, nregions)
  194. local msg = { prefix }
  195. for _, x in ipairs(args) do
  196. if type(x) == 'string' then
  197. msg[#msg + 1] = x
  198. else
  199. msg[#msg + 1] = vim.inspect(x, { newline = ' ', indent = '' })
  200. end
  201. end
  202. self._logger('nvim', table.concat(msg, ' '))
  203. end
  204. --- Invalidates this parser and its children.
  205. ---
  206. --- Should only be called when the tracked state of the LanguageTree is not valid against the parse
  207. --- tree in treesitter. Doesn't clear filesystem cache. Called often, so needs to be fast.
  208. ---@param reload boolean|nil
  209. function LanguageTree:invalidate(reload)
  210. self._valid = false
  211. self._parser:reset()
  212. -- buffer was reloaded, reparse all trees
  213. if reload then
  214. for _, t in pairs(self._trees) do
  215. self:_do_callback('changedtree', t:included_ranges(true), t)
  216. end
  217. self._trees = {}
  218. end
  219. for _, child in pairs(self._children) do
  220. child:invalidate(reload)
  221. end
  222. end
  223. --- Returns all trees of the regions parsed by this parser.
  224. --- Does not include child languages.
  225. --- The result is list-like if
  226. --- * this LanguageTree is the root, in which case the result is empty or a singleton list; or
  227. --- * the root LanguageTree is fully parsed.
  228. ---
  229. ---@return table<integer, TSTree>
  230. function LanguageTree:trees()
  231. return self._trees
  232. end
  233. --- Gets the language of this tree node.
  234. --- @return string
  235. function LanguageTree:lang()
  236. return self._lang
  237. end
  238. --- Returns whether this LanguageTree is valid, i.e., |LanguageTree:trees()| reflects the latest
  239. --- state of the source. If invalid, user should call |LanguageTree:parse()|.
  240. ---@param exclude_children boolean|nil whether to ignore the validity of children (default `false`)
  241. ---@return boolean
  242. function LanguageTree:is_valid(exclude_children)
  243. local valid = self._valid
  244. if type(valid) == 'table' then
  245. for i, _ in pairs(self:included_regions()) do
  246. if not valid[i] then
  247. return false
  248. end
  249. end
  250. end
  251. if not exclude_children then
  252. if not self._injections_processed then
  253. return false
  254. end
  255. for _, child in pairs(self._children) do
  256. if not child:is_valid(exclude_children) then
  257. return false
  258. end
  259. end
  260. end
  261. if type(valid) == 'boolean' then
  262. return valid
  263. end
  264. self._valid = true
  265. return true
  266. end
  267. --- Returns a map of language to child tree.
  268. --- @return table<string,vim.treesitter.LanguageTree>
  269. function LanguageTree:children()
  270. return self._children
  271. end
  272. --- Returns the source content of the language tree (bufnr or string).
  273. --- @return integer|string
  274. function LanguageTree:source()
  275. return self._source
  276. end
  277. --- @param region Range6[]
  278. --- @param range? boolean|Range
  279. --- @return boolean
  280. local function intercepts_region(region, range)
  281. if #region == 0 then
  282. return true
  283. end
  284. if range == nil then
  285. return false
  286. end
  287. if type(range) == 'boolean' then
  288. return range
  289. end
  290. for _, r in ipairs(region) do
  291. if Range.intercepts(r, range) then
  292. return true
  293. end
  294. end
  295. return false
  296. end
  297. --- @private
  298. --- @param range boolean|Range?
  299. --- @param timeout integer?
  300. --- @return Range6[] changes
  301. --- @return integer no_regions_parsed
  302. --- @return number total_parse_time
  303. --- @return boolean finished whether async parsing still needs time
  304. function LanguageTree:_parse_regions(range, timeout)
  305. local changes = {}
  306. local no_regions_parsed = 0
  307. local total_parse_time = 0
  308. if type(self._valid) ~= 'table' then
  309. self._valid = {}
  310. end
  311. -- If there are no ranges, set to an empty list
  312. -- so the included ranges in the parser are cleared.
  313. for i, ranges in pairs(self:included_regions()) do
  314. if
  315. not self._valid[i]
  316. and (
  317. intercepts_region(ranges, range)
  318. or (self._trees[i] and intercepts_region(self._trees[i]:included_ranges(false), range))
  319. )
  320. then
  321. self._parser:set_included_ranges(ranges)
  322. self._parser:set_timeout(timeout and timeout * 1000 or 0) -- ms -> micros
  323. local parse_time, tree, tree_changes =
  324. tcall(self._parser.parse, self._parser, self._trees[i], self._source, true)
  325. if not tree then
  326. return changes, no_regions_parsed, total_parse_time, false
  327. end
  328. -- Pass ranges if this is an initial parse
  329. local cb_changes = self._trees[i] and tree_changes or tree:included_ranges(true)
  330. self:_do_callback('changedtree', cb_changes, tree)
  331. self._trees[i] = tree
  332. vim.list_extend(changes, tree_changes)
  333. total_parse_time = total_parse_time + parse_time
  334. no_regions_parsed = no_regions_parsed + 1
  335. self._valid[i] = true
  336. end
  337. end
  338. return changes, no_regions_parsed, total_parse_time, true
  339. end
  340. --- @private
  341. --- @return number
  342. function LanguageTree:_add_injections()
  343. local seen_langs = {} ---@type table<string,boolean>
  344. local query_time, injections_by_lang = tcall(self._get_injections, self)
  345. for lang, injection_regions in pairs(injections_by_lang) do
  346. local has_lang = pcall(language.add, lang)
  347. -- Child language trees should just be ignored if not found, since
  348. -- they can depend on the text of a node. Intermediate strings
  349. -- would cause errors for unknown parsers.
  350. if has_lang then
  351. local child = self._children[lang]
  352. if not child then
  353. child = self:add_child(lang)
  354. end
  355. child:set_included_regions(injection_regions)
  356. seen_langs[lang] = true
  357. end
  358. end
  359. for lang, _ in pairs(self._children) do
  360. if not seen_langs[lang] then
  361. self:remove_child(lang)
  362. end
  363. end
  364. return query_time
  365. end
  366. --- @param range boolean|Range?
  367. --- @return string
  368. local function range_to_string(range)
  369. return type(range) == 'table' and table.concat(range, ',') or tostring(range)
  370. end
  371. --- @private
  372. --- @param range boolean|Range?
  373. --- @param callback fun(err?: string, trees?: table<integer, TSTree>)
  374. function LanguageTree:_push_async_callback(range, callback)
  375. local key = range_to_string(range)
  376. self._cb_queues[key] = self._cb_queues[key] or {}
  377. local queue = self._cb_queues[key]
  378. queue[#queue + 1] = callback
  379. end
  380. --- @private
  381. --- @param range boolean|Range?
  382. --- @param err? string
  383. --- @param trees? table<integer, TSTree>
  384. function LanguageTree:_run_async_callbacks(range, err, trees)
  385. local key = range_to_string(range)
  386. for _, cb in ipairs(self._cb_queues[key]) do
  387. cb(err, trees)
  388. end
  389. self._ranges_being_parsed[key] = nil
  390. self._cb_queues[key] = nil
  391. end
  392. --- Run an asynchronous parse, calling {on_parse} when complete.
  393. ---
  394. --- @private
  395. --- @param range boolean|Range?
  396. --- @param on_parse fun(err?: string, trees?: table<integer, TSTree>)
  397. --- @return table<integer, TSTree>? trees the list of parsed trees, if parsing completed synchronously
  398. function LanguageTree:_async_parse(range, on_parse)
  399. self:_push_async_callback(range, on_parse)
  400. -- If we are already running an async parse, just queue the callback.
  401. local range_string = range_to_string(range)
  402. if not self._ranges_being_parsed[range_string] then
  403. self._ranges_being_parsed[range_string] = true
  404. else
  405. return
  406. end
  407. local source = self._source
  408. local buf = vim.b[source]
  409. local ct = buf.changedtick
  410. local total_parse_time = 0
  411. local redrawtime = vim.o.redrawtime
  412. local timeout = not vim.g._ts_force_sync_parsing and default_parse_timeout_ms or nil
  413. local function step()
  414. if type(source) == 'number' and not vim.api.nvim_buf_is_valid(source) then
  415. return nil
  416. end
  417. -- If buffer was changed in the middle of parsing, reset parse state
  418. if buf.changedtick ~= ct then
  419. ct = buf.changedtick
  420. total_parse_time = 0
  421. end
  422. local parse_time, trees, finished = tcall(self._parse, self, range, timeout)
  423. total_parse_time = total_parse_time + parse_time
  424. if finished then
  425. self:_run_async_callbacks(range, nil, trees)
  426. return trees
  427. elseif total_parse_time > redrawtime then
  428. self:_run_async_callbacks(range, 'TIMEOUT', nil)
  429. return nil
  430. else
  431. vim.schedule(step)
  432. end
  433. end
  434. return step()
  435. end
  436. --- Recursively parse all regions in the language tree using |treesitter-parsers|
  437. --- for the corresponding languages and run injection queries on the parsed trees
  438. --- to determine whether child trees should be created and parsed.
  439. ---
  440. --- Any region with empty range (`{}`, typically only the root tree) is always parsed;
  441. --- otherwise (typically injections) only if it intersects {range} (or if {range} is `true`).
  442. ---
  443. --- @param range boolean|Range|nil: Parse this range in the parser's source.
  444. --- Set to `true` to run a complete parse of the source (Note: Can be slow!)
  445. --- Set to `false|nil` to only parse regions with empty ranges (typically
  446. --- only the root tree without injections).
  447. --- @param on_parse fun(err?: string, trees?: table<integer, TSTree>)? Function invoked when parsing completes.
  448. --- When provided and `vim.g._ts_force_sync_parsing` is not set, parsing will run
  449. --- asynchronously. The first argument to the function is a string representing the error type,
  450. --- in case of a failure (currently only possible for timeouts). The second argument is the list
  451. --- of trees returned by the parse (upon success), or `nil` if the parse timed out (determined
  452. --- by 'redrawtime').
  453. ---
  454. --- If parsing was still able to finish synchronously (within 3ms), `parse()` returns the list
  455. --- of trees. Otherwise, it returns `nil`.
  456. --- @return table<integer, TSTree>?
  457. function LanguageTree:parse(range, on_parse)
  458. if on_parse then
  459. return self:_async_parse(range, on_parse)
  460. end
  461. local trees, _ = self:_parse(range)
  462. return trees
  463. end
  464. --- @private
  465. --- @param range boolean|Range|nil
  466. --- @param timeout integer?
  467. --- @return table<integer, TSTree> trees
  468. --- @return boolean finished
  469. function LanguageTree:_parse(range, timeout)
  470. if self:is_valid() then
  471. self:_log('valid')
  472. return self._trees, true
  473. end
  474. local changes --- @type Range6[]?
  475. -- Collect some stats
  476. local no_regions_parsed = 0
  477. local query_time = 0
  478. local total_parse_time = 0
  479. -- At least 1 region is invalid
  480. if not self:is_valid(true) then
  481. local is_finished
  482. changes, no_regions_parsed, total_parse_time, is_finished = self:_parse_regions(range, timeout)
  483. timeout = timeout and math.max(timeout - total_parse_time, 0)
  484. if not is_finished then
  485. return self._trees, false
  486. end
  487. -- Need to run injections when we parsed something
  488. if no_regions_parsed > 0 then
  489. self._injections_processed = false
  490. end
  491. end
  492. if not self._injections_processed and range then
  493. query_time = self:_add_injections()
  494. self._injections_processed = true
  495. end
  496. self:_log({
  497. changes = changes and #changes > 0 and changes or nil,
  498. regions_parsed = no_regions_parsed,
  499. parse_time = total_parse_time,
  500. query_time = query_time,
  501. range = range,
  502. })
  503. for _, child in pairs(self._children) do
  504. if timeout == 0 then
  505. return self._trees, false
  506. end
  507. local ctime, _, child_finished = tcall(child._parse, child, range, timeout)
  508. timeout = timeout and math.max(timeout - ctime, 0)
  509. if not child_finished then
  510. return self._trees, child_finished
  511. end
  512. end
  513. return self._trees, true
  514. end
  515. --- Invokes the callback for each |LanguageTree| recursively.
  516. ---
  517. --- Note: This includes the invoking tree's child trees as well.
  518. ---
  519. ---@param fn fun(tree: TSTree, ltree: vim.treesitter.LanguageTree)
  520. function LanguageTree:for_each_tree(fn)
  521. for _, tree in pairs(self._trees) do
  522. fn(tree, self)
  523. end
  524. for _, child in pairs(self._children) do
  525. child:for_each_tree(fn)
  526. end
  527. end
  528. --- Adds a child language to this |LanguageTree|.
  529. ---
  530. --- If the language already exists as a child, it will first be removed.
  531. ---
  532. ---@private
  533. ---@param lang string Language to add.
  534. ---@return vim.treesitter.LanguageTree injected
  535. function LanguageTree:add_child(lang)
  536. if self._children[lang] then
  537. self:remove_child(lang)
  538. end
  539. local child = LanguageTree.new(self._source, lang, self._opts)
  540. -- Inherit recursive callbacks
  541. for nm, cb in pairs(self._callbacks_rec) do
  542. vim.list_extend(child._callbacks_rec[nm], cb)
  543. end
  544. child._parent = self
  545. self._children[lang] = child
  546. self:_do_callback('child_added', self._children[lang])
  547. return self._children[lang]
  548. end
  549. ---Returns the parent tree. `nil` for the root tree.
  550. ---@return vim.treesitter.LanguageTree?
  551. function LanguageTree:parent()
  552. return self._parent
  553. end
  554. --- Removes a child language from this |LanguageTree|.
  555. ---
  556. ---@private
  557. ---@param lang string Language to remove.
  558. function LanguageTree:remove_child(lang)
  559. local child = self._children[lang]
  560. if child then
  561. self._children[lang] = nil
  562. child:destroy()
  563. self:_do_callback('child_removed', child)
  564. end
  565. end
  566. --- Destroys this |LanguageTree| and all its children.
  567. ---
  568. --- Any cleanup logic should be performed here.
  569. ---
  570. --- Note: This DOES NOT remove this tree from a parent. Instead,
  571. --- `remove_child` must be called on the parent to remove it.
  572. function LanguageTree:destroy()
  573. -- Cleanup here
  574. for _, child in pairs(self._children) do
  575. child:destroy()
  576. end
  577. end
  578. ---@param region Range6[]
  579. local function region_tostr(region)
  580. if #region == 0 then
  581. return '[]'
  582. end
  583. local srow, scol = region[1][1], region[1][2]
  584. local erow, ecol = region[#region][4], region[#region][5]
  585. return string.format('[%d:%d-%d:%d]', srow, scol, erow, ecol)
  586. end
  587. ---@private
  588. ---Iterate through all the regions. fn returns a boolean to indicate if the
  589. ---region is valid or not.
  590. ---@param fn fun(index: integer, region: Range6[]): boolean
  591. function LanguageTree:_iter_regions(fn)
  592. if not self._valid then
  593. return
  594. end
  595. local was_valid = type(self._valid) ~= 'table'
  596. if was_valid then
  597. self:_log('was valid', self._valid)
  598. self._valid = {}
  599. end
  600. local all_valid = true
  601. for i, region in pairs(self:included_regions()) do
  602. if was_valid or self._valid[i] then
  603. self._valid[i] = fn(i, region)
  604. if not self._valid[i] then
  605. self:_log(function()
  606. return 'invalidating region', i, region_tostr(region)
  607. end)
  608. end
  609. end
  610. if not self._valid[i] then
  611. all_valid = false
  612. end
  613. end
  614. -- Compress the valid value to 'true' if there are no invalid regions
  615. if all_valid then
  616. self._valid = all_valid
  617. end
  618. end
  619. --- Sets the included regions that should be parsed by this |LanguageTree|.
  620. --- A region is a set of nodes and/or ranges that will be parsed in the same context.
  621. ---
  622. --- For example, `{ { node1 }, { node2} }` contains two separate regions.
  623. --- They will be parsed by the parser in two different contexts, thus resulting
  624. --- in two separate trees.
  625. ---
  626. --- On the other hand, `{ { node1, node2 } }` is a single region consisting of
  627. --- two nodes. This will be parsed by the parser in a single context, thus resulting
  628. --- in a single tree.
  629. ---
  630. --- This allows for embedded languages to be parsed together across different
  631. --- nodes, which is useful for templating languages like ERB and EJS.
  632. ---
  633. ---@private
  634. ---@param new_regions (Range4|Range6|TSNode)[][] List of regions this tree should manage and parse.
  635. function LanguageTree:set_included_regions(new_regions)
  636. self._has_regions = true
  637. -- Transform the tables from 4 element long to 6 element long (with byte offset)
  638. for _, region in ipairs(new_regions) do
  639. for i, range in ipairs(region) do
  640. if type(range) == 'table' and #range == 4 then
  641. region[i] = Range.add_bytes(self._source, range --[[@as Range4]])
  642. elseif type(range) == 'userdata' then
  643. --- @diagnostic disable-next-line: missing-fields LuaLS varargs bug
  644. region[i] = { range:range(true) }
  645. end
  646. end
  647. end
  648. -- included_regions is not guaranteed to be list-like, but this is still sound, i.e. if
  649. -- new_regions is different from included_regions, then outdated regions in included_regions are
  650. -- invalidated. For example, if included_regions = new_regions ++ hole ++ outdated_regions, then
  651. -- outdated_regions is invalidated by _iter_regions in else branch.
  652. if #self:included_regions() ~= #new_regions then
  653. -- TODO(lewis6991): inefficient; invalidate trees incrementally
  654. for _, t in pairs(self._trees) do
  655. self:_do_callback('changedtree', t:included_ranges(true), t)
  656. end
  657. self._trees = {}
  658. self:invalidate()
  659. else
  660. self:_iter_regions(function(i, region)
  661. return vim.deep_equal(new_regions[i], region)
  662. end)
  663. end
  664. self._regions = new_regions
  665. end
  666. ---Gets the set of included regions managed by this LanguageTree. This can be different from the
  667. ---regions set by injection query, because a partial |LanguageTree:parse()| drops the regions
  668. ---outside the requested range.
  669. ---Each list represents a range in the form of
  670. ---{ {start_row}, {start_col}, {start_bytes}, {end_row}, {end_col}, {end_bytes} }.
  671. ---@return table<integer, Range6[]>
  672. function LanguageTree:included_regions()
  673. if self._regions then
  674. return self._regions
  675. end
  676. if not self._has_regions then
  677. -- treesitter.c will default empty ranges to { -1, -1, -1, -1, -1, -1} (the full range)
  678. return { {} }
  679. end
  680. local regions = {} ---@type Range6[][]
  681. for i, _ in pairs(self._trees) do
  682. regions[i] = self._trees[i]:included_ranges(true)
  683. end
  684. self._regions = regions
  685. return regions
  686. end
  687. ---@param node TSNode
  688. ---@param source string|integer
  689. ---@param metadata vim.treesitter.query.TSMetadata
  690. ---@param include_children boolean
  691. ---@return Range6[]
  692. local function get_node_ranges(node, source, metadata, include_children)
  693. local range = vim.treesitter.get_range(node, source, metadata)
  694. local child_count = node:named_child_count()
  695. if include_children or child_count == 0 then
  696. return { range }
  697. end
  698. local ranges = {} ---@type Range6[]
  699. local srow, scol, sbyte, erow, ecol, ebyte = Range.unpack6(range)
  700. -- We are excluding children so we need to mask out their ranges
  701. for i = 0, child_count - 1 do
  702. local child = assert(node:named_child(i))
  703. local c_srow, c_scol, c_sbyte, c_erow, c_ecol, c_ebyte = child:range(true)
  704. if c_srow > srow or c_scol > scol then
  705. ranges[#ranges + 1] = { srow, scol, sbyte, c_srow, c_scol, c_sbyte }
  706. end
  707. srow = c_erow
  708. scol = c_ecol
  709. sbyte = c_ebyte
  710. end
  711. if erow > srow or ecol > scol then
  712. ranges[#ranges + 1] = Range.add_bytes(source, { srow, scol, sbyte, erow, ecol, ebyte })
  713. end
  714. return ranges
  715. end
  716. ---@nodoc
  717. ---@class vim.treesitter.languagetree.InjectionElem
  718. ---@field combined boolean
  719. ---@field regions Range6[][]
  720. ---@alias vim.treesitter.languagetree.Injection table<string,table<integer,vim.treesitter.languagetree.InjectionElem>>
  721. ---@param t table<integer,vim.treesitter.languagetree.Injection>
  722. ---@param tree_index integer
  723. ---@param pattern integer
  724. ---@param lang string
  725. ---@param combined boolean
  726. ---@param ranges Range6[]
  727. local function add_injection(t, tree_index, pattern, lang, combined, ranges)
  728. if #ranges == 0 then
  729. -- Make sure not to add an empty range set as this is interpreted to mean the whole buffer.
  730. return
  731. end
  732. -- Each tree index should be isolated from the other nodes.
  733. if not t[tree_index] then
  734. t[tree_index] = {}
  735. end
  736. if not t[tree_index][lang] then
  737. t[tree_index][lang] = {}
  738. end
  739. -- Key this by pattern. If combined is set to true all captures of this pattern
  740. -- will be parsed by treesitter as the same "source".
  741. -- If combined is false, each "region" will be parsed as a single source.
  742. if not t[tree_index][lang][pattern] then
  743. t[tree_index][lang][pattern] = { combined = combined, regions = {} }
  744. end
  745. table.insert(t[tree_index][lang][pattern].regions, ranges)
  746. end
  747. -- TODO(clason): replace by refactored `ts.has_parser` API (without side effects)
  748. --- The result of this function is cached to prevent nvim_get_runtime_file from being
  749. --- called too often
  750. --- @param lang string parser name
  751. --- @return boolean # true if parser for {lang} exists on rtp
  752. local has_parser = vim.func._memoize(1, function(lang)
  753. return vim._ts_has_language(lang)
  754. or #vim.api.nvim_get_runtime_file('parser/' .. lang .. '.*', false) > 0
  755. end)
  756. --- Return parser name for language (if exists) or filetype (if registered and exists).
  757. ---
  758. ---@param alias string language or filetype name
  759. ---@return string? # resolved parser name
  760. local function resolve_lang(alias)
  761. -- validate that `alias` is a legal language
  762. if not (alias and alias:match('[%w_]+') == alias) then
  763. return
  764. end
  765. if has_parser(alias) then
  766. return alias
  767. end
  768. local lang = vim.treesitter.language.get_lang(alias)
  769. if lang and has_parser(lang) then
  770. return lang
  771. end
  772. end
  773. ---@private
  774. --- Extract injections according to:
  775. --- https://tree-sitter.github.io/tree-sitter/syntax-highlighting#language-injection
  776. ---@param match table<integer,TSNode[]>
  777. ---@param metadata vim.treesitter.query.TSMetadata
  778. ---@return string?, boolean, Range6[]
  779. function LanguageTree:_get_injection(match, metadata)
  780. local ranges = {} ---@type Range6[]
  781. local combined = metadata['injection.combined'] ~= nil
  782. local injection_lang = metadata['injection.language'] --[[@as string?]]
  783. local lang = metadata['injection.self'] ~= nil and self:lang()
  784. or metadata['injection.parent'] ~= nil and self._parent:lang()
  785. or (injection_lang and resolve_lang(injection_lang))
  786. local include_children = metadata['injection.include-children'] ~= nil
  787. for id, nodes in pairs(match) do
  788. for _, node in ipairs(nodes) do
  789. local name = self._injection_query.captures[id]
  790. -- Lang should override any other language tag
  791. if name == 'injection.language' then
  792. local text = vim.treesitter.get_node_text(node, self._source, { metadata = metadata[id] })
  793. lang = resolve_lang(text:lower()) -- language names are always lower case
  794. elseif name == 'injection.filename' then
  795. local text = vim.treesitter.get_node_text(node, self._source, { metadata = metadata[id] })
  796. local ft = vim.filetype.match({ filename = text })
  797. lang = ft and resolve_lang(ft)
  798. elseif name == 'injection.content' then
  799. ranges = get_node_ranges(node, self._source, metadata[id], include_children)
  800. end
  801. end
  802. end
  803. return lang, combined, ranges
  804. end
  805. --- Can't use vim.tbl_flatten since a range is just a table.
  806. ---@param regions Range6[][]
  807. ---@return Range6[]
  808. local function combine_regions(regions)
  809. local result = {} ---@type Range6[]
  810. for _, region in ipairs(regions) do
  811. for _, range in ipairs(region) do
  812. result[#result + 1] = range
  813. end
  814. end
  815. return result
  816. end
  817. --- Gets language injection regions by language.
  818. ---
  819. --- This is where most of the injection processing occurs.
  820. ---
  821. --- TODO: Allow for an offset predicate to tailor the injection range
  822. --- instead of using the entire nodes range.
  823. --- @private
  824. --- @return table<string, Range6[][]>
  825. function LanguageTree:_get_injections()
  826. if not self._injection_query or #self._injection_query.captures == 0 then
  827. return {}
  828. end
  829. ---@type table<integer,vim.treesitter.languagetree.Injection>
  830. local injections = {}
  831. for index, tree in pairs(self._trees) do
  832. local root_node = tree:root()
  833. local start_line, _, end_line, _ = root_node:range()
  834. for pattern, match, metadata in
  835. self._injection_query:iter_matches(root_node, self._source, start_line, end_line + 1)
  836. do
  837. local lang, combined, ranges = self:_get_injection(match, metadata)
  838. if lang then
  839. add_injection(injections, index, pattern, lang, combined, ranges)
  840. else
  841. self:_log('match from injection query failed for pattern', pattern)
  842. end
  843. end
  844. end
  845. ---@type table<string,Range6[][]>
  846. local result = {}
  847. -- Generate a map by lang of node lists.
  848. -- Each list is a set of ranges that should be parsed together.
  849. for _, lang_map in pairs(injections) do
  850. for lang, patterns in pairs(lang_map) do
  851. if not result[lang] then
  852. result[lang] = {}
  853. end
  854. for _, entry in pairs(patterns) do
  855. if entry.combined then
  856. table.insert(result[lang], combine_regions(entry.regions))
  857. else
  858. for _, ranges in pairs(entry.regions) do
  859. table.insert(result[lang], ranges)
  860. end
  861. end
  862. end
  863. end
  864. end
  865. return result
  866. end
  867. ---@private
  868. ---@param cb_name TSCallbackName
  869. function LanguageTree:_do_callback(cb_name, ...)
  870. for _, cb in ipairs(self._callbacks[cb_name]) do
  871. cb(...)
  872. end
  873. for _, cb in ipairs(self._callbacks_rec[cb_name]) do
  874. cb(...)
  875. end
  876. end
  877. ---@package
  878. function LanguageTree:_edit(
  879. start_byte,
  880. end_byte_old,
  881. end_byte_new,
  882. start_row,
  883. start_col,
  884. end_row_old,
  885. end_col_old,
  886. end_row_new,
  887. end_col_new
  888. )
  889. for _, tree in pairs(self._trees) do
  890. tree:edit(
  891. start_byte,
  892. end_byte_old,
  893. end_byte_new,
  894. start_row,
  895. start_col,
  896. end_row_old,
  897. end_col_old,
  898. end_row_new,
  899. end_col_new
  900. )
  901. end
  902. self._parser:reset()
  903. self._regions = nil
  904. local changed_range = {
  905. start_row,
  906. start_col,
  907. start_byte,
  908. end_row_old,
  909. end_col_old,
  910. end_byte_old,
  911. }
  912. -- Validate regions after editing the tree
  913. self:_iter_regions(function(_, region)
  914. if #region == 0 then
  915. -- empty region, use the full source
  916. return false
  917. end
  918. for _, r in ipairs(region) do
  919. if Range.intercepts(r, changed_range) then
  920. return false
  921. end
  922. end
  923. return true
  924. end)
  925. for _, child in pairs(self._children) do
  926. child:_edit(
  927. start_byte,
  928. end_byte_old,
  929. end_byte_new,
  930. start_row,
  931. start_col,
  932. end_row_old,
  933. end_col_old,
  934. end_row_new,
  935. end_col_new
  936. )
  937. end
  938. end
  939. ---@nodoc
  940. ---@param bufnr integer
  941. ---@param changed_tick integer
  942. ---@param start_row integer
  943. ---@param start_col integer
  944. ---@param start_byte integer
  945. ---@param old_row integer
  946. ---@param old_col integer
  947. ---@param old_byte integer
  948. ---@param new_row integer
  949. ---@param new_col integer
  950. ---@param new_byte integer
  951. function LanguageTree:_on_bytes(
  952. bufnr,
  953. changed_tick,
  954. start_row,
  955. start_col,
  956. start_byte,
  957. old_row,
  958. old_col,
  959. old_byte,
  960. new_row,
  961. new_col,
  962. new_byte
  963. )
  964. local old_end_col = old_col + ((old_row == 0) and start_col or 0)
  965. local new_end_col = new_col + ((new_row == 0) and start_col or 0)
  966. self:_log(
  967. 'on_bytes',
  968. bufnr,
  969. changed_tick,
  970. start_row,
  971. start_col,
  972. start_byte,
  973. old_row,
  974. old_col,
  975. old_byte,
  976. new_row,
  977. new_col,
  978. new_byte
  979. )
  980. -- Edit trees together BEFORE emitting a bytes callback.
  981. self:_edit(
  982. start_byte,
  983. start_byte + old_byte,
  984. start_byte + new_byte,
  985. start_row,
  986. start_col,
  987. start_row + old_row,
  988. old_end_col,
  989. start_row + new_row,
  990. new_end_col
  991. )
  992. self:_do_callback(
  993. 'bytes',
  994. bufnr,
  995. changed_tick,
  996. start_row,
  997. start_col,
  998. start_byte,
  999. old_row,
  1000. old_col,
  1001. old_byte,
  1002. new_row,
  1003. new_col,
  1004. new_byte
  1005. )
  1006. end
  1007. ---@nodoc
  1008. function LanguageTree:_on_reload()
  1009. self:invalidate(true)
  1010. end
  1011. ---@nodoc
  1012. function LanguageTree:_on_detach(...)
  1013. self:invalidate(true)
  1014. self:_do_callback('detach', ...)
  1015. if self._logfile then
  1016. self._logger('nvim', 'detaching')
  1017. self._logger = nil
  1018. self._logfile:close()
  1019. end
  1020. end
  1021. --- Registers callbacks for the [LanguageTree].
  1022. ---@param cbs table<TSCallbackNameOn,function> An [nvim_buf_attach()]-like table argument with the following handlers:
  1023. --- - `on_bytes` : see [nvim_buf_attach()].
  1024. --- - `on_changedtree` : a callback that will be called every time the tree has syntactical changes.
  1025. --- It will be passed two arguments: a table of the ranges (as node ranges) that
  1026. --- changed and the changed tree.
  1027. --- - `on_child_added` : emitted when a child is added to the tree.
  1028. --- - `on_child_removed` : emitted when a child is removed from the tree.
  1029. --- - `on_detach` : emitted when the buffer is detached, see [nvim_buf_detach_event].
  1030. --- Takes one argument, the number of the buffer.
  1031. --- @param recursive? boolean Apply callbacks recursively for all children. Any new children will
  1032. --- also inherit the callbacks.
  1033. function LanguageTree:register_cbs(cbs, recursive)
  1034. if not cbs then
  1035. return
  1036. end
  1037. local callbacks = recursive and self._callbacks_rec or self._callbacks
  1038. for name, cbname in pairs(TSCallbackNames) do
  1039. if cbs[name] then
  1040. table.insert(callbacks[cbname], cbs[name])
  1041. end
  1042. end
  1043. if recursive then
  1044. for _, child in pairs(self._children) do
  1045. child:register_cbs(cbs, true)
  1046. end
  1047. end
  1048. end
  1049. ---@param tree TSTree
  1050. ---@param range Range
  1051. ---@return boolean
  1052. local function tree_contains(tree, range)
  1053. local tree_ranges = tree:included_ranges(false)
  1054. return Range.contains({
  1055. tree_ranges[1][1],
  1056. tree_ranges[1][2],
  1057. tree_ranges[#tree_ranges][3],
  1058. tree_ranges[#tree_ranges][4],
  1059. }, range)
  1060. end
  1061. --- Determines whether {range} is contained in the |LanguageTree|.
  1062. ---
  1063. ---@param range Range4
  1064. ---@return boolean
  1065. function LanguageTree:contains(range)
  1066. for _, tree in pairs(self._trees) do
  1067. if tree_contains(tree, range) then
  1068. return true
  1069. end
  1070. end
  1071. return false
  1072. end
  1073. --- @class vim.treesitter.LanguageTree.tree_for_range.Opts
  1074. --- @inlinedoc
  1075. ---
  1076. --- Ignore injected languages
  1077. --- (default: `true`)
  1078. --- @field ignore_injections? boolean
  1079. --- Gets the tree that contains {range}.
  1080. ---
  1081. ---@param range Range4
  1082. ---@param opts? vim.treesitter.LanguageTree.tree_for_range.Opts
  1083. ---@return TSTree?
  1084. function LanguageTree:tree_for_range(range, opts)
  1085. opts = opts or {}
  1086. local ignore = vim.F.if_nil(opts.ignore_injections, true)
  1087. if not ignore then
  1088. for _, child in pairs(self._children) do
  1089. local tree = child:tree_for_range(range, opts)
  1090. if tree then
  1091. return tree
  1092. end
  1093. end
  1094. end
  1095. for _, tree in pairs(self._trees) do
  1096. if tree_contains(tree, range) then
  1097. return tree
  1098. end
  1099. end
  1100. return nil
  1101. end
  1102. --- Gets the smallest node that contains {range}.
  1103. ---
  1104. ---@param range Range4
  1105. ---@param opts? vim.treesitter.LanguageTree.tree_for_range.Opts
  1106. ---@return TSNode?
  1107. function LanguageTree:node_for_range(range, opts)
  1108. local tree = self:tree_for_range(range, opts)
  1109. if tree then
  1110. return tree:root():descendant_for_range(unpack(range))
  1111. end
  1112. end
  1113. --- Gets the smallest named node that contains {range}.
  1114. ---
  1115. ---@param range Range4
  1116. ---@param opts? vim.treesitter.LanguageTree.tree_for_range.Opts
  1117. ---@return TSNode?
  1118. function LanguageTree:named_node_for_range(range, opts)
  1119. local tree = self:tree_for_range(range, opts)
  1120. if tree then
  1121. return tree:root():named_descendant_for_range(unpack(range))
  1122. end
  1123. end
  1124. --- Gets the appropriate language that contains {range}.
  1125. ---
  1126. ---@param range Range4
  1127. ---@return vim.treesitter.LanguageTree tree Managing {range}
  1128. function LanguageTree:language_for_range(range)
  1129. for _, child in pairs(self._children) do
  1130. if child:contains(range) then
  1131. return child:language_for_range(range)
  1132. end
  1133. end
  1134. return self
  1135. end
  1136. return LanguageTree