languagetree.lua 37 KB


  1. --- @brief A [LanguageTree]() contains a tree of parsers: the root treesitter parser for {lang} and
  2. --- any "injected" language parsers, which themselves may inject other languages, recursively.
  3. --- For example a Lua buffer containing some Vimscript commands needs multiple parsers to fully
  4. --- understand its contents.
  5. ---
  6. --- To create a LanguageTree (parser object) for a given buffer and language, use:
  7. ---
  8. --- ```lua
  9. --- local parser = vim.treesitter.get_parser(bufnr, lang)
  10. --- ```
  11. ---
  12. --- (where `bufnr=0` means current buffer). `lang` defaults to 'filetype'.
  13. --- Note: currently the parser is retained for the lifetime of a buffer but this may change;
  14. --- a plugin should keep a reference to the parser object if it wants incremental updates.
  15. ---
  16. --- Whenever you need to access the current syntax tree, parse the buffer:
  17. ---
  18. --- ```lua
  19. --- local tree = parser:parse({ start_row, end_row })
  20. --- ```
  21. ---
  22. --- This returns a table of immutable |treesitter-tree| objects representing the current state of
  23. --- the buffer. When the plugin wants to access the state after a (possible) edit it must call
  24. --- `parse()` again. If the buffer wasn't edited, the same tree will be returned again without extra
  25. --- work. If the buffer was parsed before, incremental parsing will be done of the changed parts.
  26. ---
  27. --- Note: To use the parser directly inside a |nvim_buf_attach()| Lua callback, you must call
  28. --- |vim.treesitter.get_parser()| before you register your callback. But preferably parsing
  29. --- shouldn't be done directly in the change callback anyway as they will be very frequent. Rather
  30. --- a plugin that does any kind of analysis on a tree should use a timer to throttle too frequent
  31. --- updates.
  32. ---
  33. -- Debugging:
  34. --
  35. -- vim.g.__ts_debug levels:
  36. -- - 1. Messages from languagetree.lua
  37. -- - 2. Parse messages from treesitter
  38. -- - 2. Lex messages from treesitter
  39. --
  40. -- Log file can be found in stdpath('log')/treesitter.log
  41. local query = require('vim.treesitter.query')
  42. local language = require('vim.treesitter.language')
  43. local Range = require('vim.treesitter._range')
  44. local default_parse_timeout_ms = 3
  45. ---@alias TSCallbackName
  46. ---| 'changedtree'
  47. ---| 'bytes'
  48. ---| 'detach'
  49. ---| 'child_added'
  50. ---| 'child_removed'
  51. ---@alias TSCallbackNameOn
  52. ---| 'on_changedtree'
  53. ---| 'on_bytes'
  54. ---| 'on_detach'
  55. ---| 'on_child_added'
  56. ---| 'on_child_removed'
  57. --- @type table<TSCallbackNameOn,TSCallbackName>
  58. local TSCallbackNames = {
  59. on_changedtree = 'changedtree',
  60. on_bytes = 'bytes',
  61. on_detach = 'detach',
  62. on_child_added = 'child_added',
  63. on_child_removed = 'child_removed',
  64. }
  65. ---@nodoc
  66. ---@class vim.treesitter.LanguageTree
  67. ---@field private _callbacks table<TSCallbackName,function[]> Callback handlers
  68. ---@field package _callbacks_rec table<TSCallbackName,function[]> Callback handlers (recursive)
  69. ---@field private _children table<string,vim.treesitter.LanguageTree> Injected languages
  70. ---@field private _injection_query vim.treesitter.Query Queries defining injected languages
  71. ---@field private _injections_processed boolean
  72. ---@field private _opts table Options
  73. ---@field private _parser TSParser Parser for language
  74. ---Table of regions for which the tree is currently running an async parse
  75. ---@field private _ranges_being_parsed table<string, boolean>
  76. ---Table of callback queues, keyed by each region for which the callbacks should be run
  77. ---@field private _cb_queues table<string, fun(err?: string, trees?: table<integer, TSTree>)[]>
  78. ---@field private _has_regions boolean
  79. ---@field private _regions table<integer, Range6[]>?
  80. ---List of regions this tree should manage and parse. If nil then regions are
  81. ---taken from _trees. This is mostly a short-lived cache for included_regions()
  82. ---@field private _lang string Language name
  83. ---@field private _parent? vim.treesitter.LanguageTree Parent LanguageTree
  84. ---@field private _source (integer|string) Buffer or string to parse
  85. ---@field private _trees table<integer, TSTree> Reference to parsed tree (one for each language).
  86. ---Each key is the index of region, which is synced with _regions and _valid.
  87. ---@field private _valid boolean|table<integer,boolean> If the parsed tree is valid
  88. ---@field private _logger? fun(logtype: string, msg: string)
  89. ---@field private _logfile? file*
  90. local LanguageTree = {}
  91. ---Optional arguments:
  92. ---@class vim.treesitter.LanguageTree.new.Opts
  93. ---@inlinedoc
  94. ---@field queries? table<string,string> -- Deprecated
  95. ---@field injections? table<string,string>
  96. LanguageTree.__index = LanguageTree
  97. --- @nodoc
  98. ---
  99. --- LanguageTree contains a tree of parsers: the root treesitter parser for {lang} and any
  100. --- "injected" language parsers, which themselves may inject other languages, recursively.
  101. ---
  102. ---@param source (integer|string) Buffer or text string to parse
  103. ---@param lang string Root language of this tree
  104. ---@param opts vim.treesitter.LanguageTree.new.Opts?
  105. ---@return vim.treesitter.LanguageTree parser object
  106. function LanguageTree.new(source, lang, opts)
  107. assert(language.add(lang))
  108. opts = opts or {}
  109. if source == 0 then
  110. source = vim.api.nvim_get_current_buf()
  111. end
  112. local injections = opts.injections or {}
  113. --- @type vim.treesitter.LanguageTree
  114. local self = {
  115. _source = source,
  116. _lang = lang,
  117. _children = {},
  118. _trees = {},
  119. _opts = opts,
  120. _injection_query = injections[lang] and query.parse(lang, injections[lang])
  121. or query.get(lang, 'injections'),
  122. _has_regions = false,
  123. _injections_processed = false,
  124. _valid = false,
  125. _parser = vim._create_ts_parser(lang),
  126. _ranges_being_parsed = {},
  127. _cb_queues = {},
  128. _callbacks = {},
  129. _callbacks_rec = {},
  130. }
  131. setmetatable(self, LanguageTree)
  132. if vim.g.__ts_debug and type(vim.g.__ts_debug) == 'number' then
  133. self:_set_logger()
  134. self:_log('START')
  135. end
  136. for _, name in pairs(TSCallbackNames) do
  137. self._callbacks[name] = {}
  138. self._callbacks_rec[name] = {}
  139. end
  140. return self
  141. end
  142. --- @private
  143. function LanguageTree:_set_logger()
  144. local source = self:source()
  145. source = type(source) == 'string' and 'text' or tostring(source)
  146. local lang = self:lang()
  147. local logdir = vim.fn.stdpath('log') --[[@as string]]
  148. vim.fn.mkdir(logdir, 'p')
  149. local logfilename = vim.fs.joinpath(logdir, 'treesitter.log')
  150. local logfile, openerr = io.open(logfilename, 'a+')
  151. if not logfile or openerr then
  152. error(string.format('Could not open file (%s) for logging: %s', logfilename, openerr))
  153. return
  154. end
  155. self._logfile = logfile
  156. self._logger = function(logtype, msg)
  157. self._logfile:write(string.format('%s:%s:(%s) %s\n', source, lang, logtype, msg))
  158. self._logfile:flush()
  159. end
  160. local log_lex = vim.g.__ts_debug >= 3
  161. local log_parse = vim.g.__ts_debug >= 2
  162. self._parser:_set_logger(log_lex, log_parse, self._logger)
  163. end
  164. ---Measure execution time of a function
  165. ---@generic R1, R2, R3
  166. ---@param f fun(): R1, R2, R2
  167. ---@return number, R1, R2, R3
  168. local function tcall(f, ...)
  169. local start = vim.uv.hrtime()
  170. ---@diagnostic disable-next-line
  171. local r = { f(...) }
  172. --- @type number
  173. local duration = (vim.uv.hrtime() - start) / 1000000
  174. return duration, unpack(r)
  175. end
  176. ---@private
  177. ---@param ... any
  178. function LanguageTree:_log(...)
  179. if not self._logger then
  180. return
  181. end
  182. if not vim.g.__ts_debug or vim.g.__ts_debug < 1 then
  183. return
  184. end
  185. local args = { ... }
  186. if type(args[1]) == 'function' then
  187. args = { args[1]() }
  188. end
  189. local info = debug.getinfo(2, 'nl')
  190. local nregions = vim.tbl_count(self:included_regions())
  191. local prefix =
  192. string.format('%s:%d: (#regions=%d) ', info.name or '???', info.currentline or 0, nregions)
  193. local msg = { prefix }
  194. for _, x in ipairs(args) do
  195. if type(x) == 'string' then
  196. msg[#msg + 1] = x
  197. else
  198. msg[#msg + 1] = vim.inspect(x, { newline = ' ', indent = '' })
  199. end
  200. end
  201. self._logger('nvim', table.concat(msg, ' '))
  202. end
  203. --- Invalidates this parser and its children.
  204. ---
  205. --- Should only be called when the tracked state of the LanguageTree is not valid against the parse
  206. --- tree in treesitter. Doesn't clear filesystem cache. Called often, so needs to be fast.
  207. ---@param reload boolean|nil
  208. function LanguageTree:invalidate(reload)
  209. self._valid = false
  210. self._parser:reset()
  211. -- buffer was reloaded, reparse all trees
  212. if reload then
  213. for _, t in pairs(self._trees) do
  214. self:_do_callback('changedtree', t:included_ranges(true), t)
  215. end
  216. self._trees = {}
  217. end
  218. for _, child in pairs(self._children) do
  219. child:invalidate(reload)
  220. end
  221. end
  222. --- Returns all trees of the regions parsed by this parser.
  223. --- Does not include child languages.
  224. --- The result is list-like if
  225. --- * this LanguageTree is the root, in which case the result is empty or a singleton list; or
  226. --- * the root LanguageTree is fully parsed.
  227. ---
  228. ---@return table<integer, TSTree>
  229. function LanguageTree:trees()
  230. return self._trees
  231. end
  232. --- Gets the language of this tree node.
  233. function LanguageTree:lang()
  234. return self._lang
  235. end
  236. --- Returns whether this LanguageTree is valid, i.e., |LanguageTree:trees()| reflects the latest
  237. --- state of the source. If invalid, user should call |LanguageTree:parse()|.
  238. ---@param exclude_children boolean|nil whether to ignore the validity of children (default `false`)
  239. ---@return boolean
  240. function LanguageTree:is_valid(exclude_children)
  241. local valid = self._valid
  242. if type(valid) == 'table' then
  243. for i, _ in pairs(self:included_regions()) do
  244. if not valid[i] then
  245. return false
  246. end
  247. end
  248. end
  249. if not exclude_children then
  250. if not self._injections_processed then
  251. return false
  252. end
  253. for _, child in pairs(self._children) do
  254. if not child:is_valid(exclude_children) then
  255. return false
  256. end
  257. end
  258. end
  259. if type(valid) == 'boolean' then
  260. return valid
  261. end
  262. self._valid = true
  263. return true
  264. end
  265. --- Returns a map of language to child tree.
  266. function LanguageTree:children()
  267. return self._children
  268. end
  269. --- Returns the source content of the language tree (bufnr or string).
  270. function LanguageTree:source()
  271. return self._source
  272. end
  273. --- @param region Range6[]
  274. --- @param range? boolean|Range
  275. --- @return boolean
  276. local function intercepts_region(region, range)
  277. if #region == 0 then
  278. return true
  279. end
  280. if range == nil then
  281. return false
  282. end
  283. if type(range) == 'boolean' then
  284. return range
  285. end
  286. for _, r in ipairs(region) do
  287. if Range.intercepts(r, range) then
  288. return true
  289. end
  290. end
  291. return false
  292. end
  293. --- @private
  294. --- @param range boolean|Range?
  295. --- @param timeout integer?
  296. --- @return Range6[] changes
  297. --- @return integer no_regions_parsed
  298. --- @return number total_parse_time
  299. --- @return boolean finished whether async parsing still needs time
  300. function LanguageTree:_parse_regions(range, timeout)
  301. local changes = {}
  302. local no_regions_parsed = 0
  303. local total_parse_time = 0
  304. if type(self._valid) ~= 'table' then
  305. self._valid = {}
  306. end
  307. -- If there are no ranges, set to an empty list
  308. -- so the included ranges in the parser are cleared.
  309. for i, ranges in pairs(self:included_regions()) do
  310. if
  311. not self._valid[i]
  312. and (
  313. intercepts_region(ranges, range)
  314. or (self._trees[i] and intercepts_region(self._trees[i]:included_ranges(false), range))
  315. )
  316. then
  317. self._parser:set_included_ranges(ranges)
  318. self._parser:set_timeout(timeout and timeout * 1000 or 0) -- ms -> micros
  319. local parse_time, tree, tree_changes =
  320. tcall(self._parser.parse, self._parser, self._trees[i], self._source, true)
  321. if not tree then
  322. return changes, no_regions_parsed, total_parse_time, false
  323. end
  324. -- Pass ranges if this is an initial parse
  325. local cb_changes = self._trees[i] and tree_changes or tree:included_ranges(true)
  326. self:_do_callback('changedtree', cb_changes, tree)
  327. self._trees[i] = tree
  328. vim.list_extend(changes, tree_changes)
  329. total_parse_time = total_parse_time + parse_time
  330. no_regions_parsed = no_regions_parsed + 1
  331. self._valid[i] = true
  332. end
  333. end
  334. return changes, no_regions_parsed, total_parse_time, true
  335. end
  336. --- @private
  337. --- @return number
  338. function LanguageTree:_add_injections()
  339. local seen_langs = {} ---@type table<string,boolean>
  340. local query_time, injections_by_lang = tcall(self._get_injections, self)
  341. for lang, injection_regions in pairs(injections_by_lang) do
  342. local has_lang = pcall(language.add, lang)
  343. -- Child language trees should just be ignored if not found, since
  344. -- they can depend on the text of a node. Intermediate strings
  345. -- would cause errors for unknown parsers.
  346. if has_lang then
  347. local child = self._children[lang]
  348. if not child then
  349. child = self:add_child(lang)
  350. end
  351. child:set_included_regions(injection_regions)
  352. seen_langs[lang] = true
  353. end
  354. end
  355. for lang, _ in pairs(self._children) do
  356. if not seen_langs[lang] then
  357. self:remove_child(lang)
  358. end
  359. end
  360. return query_time
  361. end
  362. --- @param range boolean|Range?
  363. --- @return string
  364. local function range_to_string(range)
  365. return type(range) == 'table' and table.concat(range, ',') or tostring(range)
  366. end
  367. --- @private
  368. --- @param range boolean|Range?
  369. --- @param callback fun(err?: string, trees?: table<integer, TSTree>)
  370. function LanguageTree:_push_async_callback(range, callback)
  371. local key = range_to_string(range)
  372. self._cb_queues[key] = self._cb_queues[key] or {}
  373. local queue = self._cb_queues[key]
  374. queue[#queue + 1] = callback
  375. end
  376. --- @private
  377. --- @param range boolean|Range?
  378. --- @param err? string
  379. --- @param trees? table<integer, TSTree>
  380. function LanguageTree:_run_async_callbacks(range, err, trees)
  381. local key = range_to_string(range)
  382. for _, cb in ipairs(self._cb_queues[key]) do
  383. cb(err, trees)
  384. end
  385. self._ranges_being_parsed[key] = false
  386. self._cb_queues[key] = {}
  387. end
  388. --- Run an asynchronous parse, calling {on_parse} when complete.
  389. ---
  390. --- @private
  391. --- @param range boolean|Range?
  392. --- @param on_parse fun(err?: string, trees?: table<integer, TSTree>)
  393. --- @return table<integer, TSTree>? trees the list of parsed trees, if parsing completed synchronously
  394. function LanguageTree:_async_parse(range, on_parse)
  395. self:_push_async_callback(range, on_parse)
  396. -- If we are already running an async parse, just queue the callback.
  397. local range_string = range_to_string(range)
  398. if not self._ranges_being_parsed[range_string] then
  399. self._ranges_being_parsed[range_string] = true
  400. else
  401. return
  402. end
  403. local buf = vim.b[self._source]
  404. local ct = buf.changedtick
  405. local total_parse_time = 0
  406. local redrawtime = vim.o.redrawtime
  407. local timeout = not vim.g._ts_force_sync_parsing and default_parse_timeout_ms or nil
  408. local function step()
  409. -- If buffer was changed in the middle of parsing, reset parse state
  410. if buf.changedtick ~= ct then
  411. ct = buf.changedtick
  412. total_parse_time = 0
  413. end
  414. local parse_time, trees, finished = tcall(self._parse, self, range, timeout)
  415. total_parse_time = total_parse_time + parse_time
  416. if finished then
  417. self:_run_async_callbacks(range, nil, trees)
  418. return trees
  419. elseif total_parse_time > redrawtime then
  420. self:_run_async_callbacks(range, 'TIMEOUT', nil)
  421. return nil
  422. else
  423. vim.schedule(step)
  424. end
  425. end
  426. return step()
  427. end
  428. --- Recursively parse all regions in the language tree using |treesitter-parsers|
  429. --- for the corresponding languages and run injection queries on the parsed trees
  430. --- to determine whether child trees should be created and parsed.
  431. ---
  432. --- Any region with empty range (`{}`, typically only the root tree) is always parsed;
  433. --- otherwise (typically injections) only if it intersects {range} (or if {range} is `true`).
  434. ---
  435. --- @param range boolean|Range|nil: Parse this range in the parser's source.
  436. --- Set to `true` to run a complete parse of the source (Note: Can be slow!)
  437. --- Set to `false|nil` to only parse regions with empty ranges (typically
  438. --- only the root tree without injections).
  439. --- @param on_parse fun(err?: string, trees?: table<integer, TSTree>)? Function invoked when parsing completes.
  440. --- When provided and `vim.g._ts_force_sync_parsing` is not set, parsing will run
  441. --- asynchronously. The first argument to the function is a string respresenting the error type,
  442. --- in case of a failure (currently only possible for timeouts). The second argument is the list
  443. --- of trees returned by the parse (upon success), or `nil` if the parse timed out (determined
  444. --- by 'redrawtime').
  445. ---
  446. --- If parsing was still able to finish synchronously (within 3ms), `parse()` returns the list
  447. --- of trees. Otherwise, it returns `nil`.
  448. --- @return table<integer, TSTree>?
  449. function LanguageTree:parse(range, on_parse)
  450. if on_parse then
  451. return self:_async_parse(range, on_parse)
  452. end
  453. local trees, _ = self:_parse(range)
  454. return trees
  455. end
  456. --- @private
  457. --- @param range boolean|Range|nil
  458. --- @param timeout integer?
  459. --- @return table<integer, TSTree> trees
  460. --- @return boolean finished
  461. function LanguageTree:_parse(range, timeout)
  462. if self:is_valid() then
  463. self:_log('valid')
  464. return self._trees, true
  465. end
  466. local changes --- @type Range6[]?
  467. -- Collect some stats
  468. local no_regions_parsed = 0
  469. local query_time = 0
  470. local total_parse_time = 0
  471. local is_finished --- @type boolean
  472. -- At least 1 region is invalid
  473. if not self:is_valid(true) then
  474. changes, no_regions_parsed, total_parse_time, is_finished = self:_parse_regions(range, timeout)
  475. timeout = timeout and math.max(timeout - total_parse_time, 0)
  476. if not is_finished then
  477. return self._trees, is_finished
  478. end
  479. -- Need to run injections when we parsed something
  480. if no_regions_parsed > 0 then
  481. self._injections_processed = false
  482. end
  483. end
  484. if not self._injections_processed and range then
  485. query_time = self:_add_injections()
  486. self._injections_processed = true
  487. end
  488. self:_log({
  489. changes = changes and #changes > 0 and changes or nil,
  490. regions_parsed = no_regions_parsed,
  491. parse_time = total_parse_time,
  492. query_time = query_time,
  493. range = range,
  494. })
  495. for _, child in pairs(self._children) do
  496. if timeout == 0 then
  497. return self._trees, false
  498. end
  499. local ctime, _, child_finished = tcall(child._parse, child, range, timeout)
  500. timeout = timeout and math.max(timeout - ctime, 0)
  501. if not child_finished then
  502. return self._trees, child_finished
  503. end
  504. end
  505. return self._trees, true
  506. end
  507. --- Invokes the callback for each |LanguageTree| recursively.
  508. ---
  509. --- Note: This includes the invoking tree's child trees as well.
  510. ---
  511. ---@param fn fun(tree: TSTree, ltree: vim.treesitter.LanguageTree)
  512. function LanguageTree:for_each_tree(fn)
  513. for _, tree in pairs(self._trees) do
  514. fn(tree, self)
  515. end
  516. for _, child in pairs(self._children) do
  517. child:for_each_tree(fn)
  518. end
  519. end
  520. --- Adds a child language to this |LanguageTree|.
  521. ---
  522. --- If the language already exists as a child, it will first be removed.
  523. ---
  524. ---@private
  525. ---@param lang string Language to add.
  526. ---@return vim.treesitter.LanguageTree injected
  527. function LanguageTree:add_child(lang)
  528. if self._children[lang] then
  529. self:remove_child(lang)
  530. end
  531. local child = LanguageTree.new(self._source, lang, self._opts)
  532. -- Inherit recursive callbacks
  533. for nm, cb in pairs(self._callbacks_rec) do
  534. vim.list_extend(child._callbacks_rec[nm], cb)
  535. end
  536. child._parent = self
  537. self._children[lang] = child
  538. self:_do_callback('child_added', self._children[lang])
  539. return self._children[lang]
  540. end
  541. --- @package
  542. function LanguageTree:parent()
  543. return self._parent
  544. end
  545. --- Removes a child language from this |LanguageTree|.
  546. ---
  547. ---@private
  548. ---@param lang string Language to remove.
  549. function LanguageTree:remove_child(lang)
  550. local child = self._children[lang]
  551. if child then
  552. self._children[lang] = nil
  553. child:destroy()
  554. self:_do_callback('child_removed', child)
  555. end
  556. end
  557. --- Destroys this |LanguageTree| and all its children.
  558. ---
  559. --- Any cleanup logic should be performed here.
  560. ---
  561. --- Note: This DOES NOT remove this tree from a parent. Instead,
  562. --- `remove_child` must be called on the parent to remove it.
  563. function LanguageTree:destroy()
  564. -- Cleanup here
  565. for _, child in pairs(self._children) do
  566. child:destroy()
  567. end
  568. end
  569. ---@param region Range6[]
  570. local function region_tostr(region)
  571. if #region == 0 then
  572. return '[]'
  573. end
  574. local srow, scol = region[1][1], region[1][2]
  575. local erow, ecol = region[#region][4], region[#region][5]
  576. return string.format('[%d:%d-%d:%d]', srow, scol, erow, ecol)
  577. end
  578. ---@private
  579. ---Iterate through all the regions. fn returns a boolean to indicate if the
  580. ---region is valid or not.
  581. ---@param fn fun(index: integer, region: Range6[]): boolean
  582. function LanguageTree:_iter_regions(fn)
  583. if not self._valid then
  584. return
  585. end
  586. local was_valid = type(self._valid) ~= 'table'
  587. if was_valid then
  588. self:_log('was valid', self._valid)
  589. self._valid = {}
  590. end
  591. local all_valid = true
  592. for i, region in pairs(self:included_regions()) do
  593. if was_valid or self._valid[i] then
  594. self._valid[i] = fn(i, region)
  595. if not self._valid[i] then
  596. self:_log(function()
  597. return 'invalidating region', i, region_tostr(region)
  598. end)
  599. end
  600. end
  601. if not self._valid[i] then
  602. all_valid = false
  603. end
  604. end
  605. -- Compress the valid value to 'true' if there are no invalid regions
  606. if all_valid then
  607. self._valid = all_valid
  608. end
  609. end
  610. --- Sets the included regions that should be parsed by this |LanguageTree|.
  611. --- A region is a set of nodes and/or ranges that will be parsed in the same context.
  612. ---
  613. --- For example, `{ { node1 }, { node2} }` contains two separate regions.
  614. --- They will be parsed by the parser in two different contexts, thus resulting
  615. --- in two separate trees.
  616. ---
  617. --- On the other hand, `{ { node1, node2 } }` is a single region consisting of
  618. --- two nodes. This will be parsed by the parser in a single context, thus resulting
  619. --- in a single tree.
  620. ---
  621. --- This allows for embedded languages to be parsed together across different
  622. --- nodes, which is useful for templating languages like ERB and EJS.
  623. ---
  624. ---@private
  625. ---@param new_regions (Range4|Range6|TSNode)[][] List of regions this tree should manage and parse.
  626. function LanguageTree:set_included_regions(new_regions)
  627. self._has_regions = true
  628. -- Transform the tables from 4 element long to 6 element long (with byte offset)
  629. for _, region in ipairs(new_regions) do
  630. for i, range in ipairs(region) do
  631. if type(range) == 'table' and #range == 4 then
  632. region[i] = Range.add_bytes(self._source, range --[[@as Range4]])
  633. elseif type(range) == 'userdata' then
  634. region[i] = { range:range(true) }
  635. end
  636. end
  637. end
  638. -- included_regions is not guaranteed to be list-like, but this is still sound, i.e. if
  639. -- new_regions is different from included_regions, then outdated regions in included_regions are
  640. -- invalidated. For example, if included_regions = new_regions ++ hole ++ outdated_regions, then
  641. -- outdated_regions is invalidated by _iter_regions in else branch.
  642. if #self:included_regions() ~= #new_regions then
  643. -- TODO(lewis6991): inefficient; invalidate trees incrementally
  644. for _, t in pairs(self._trees) do
  645. self:_do_callback('changedtree', t:included_ranges(true), t)
  646. end
  647. self._trees = {}
  648. self:invalidate()
  649. else
  650. self:_iter_regions(function(i, region)
  651. return vim.deep_equal(new_regions[i], region)
  652. end)
  653. end
  654. self._regions = new_regions
  655. end
  656. ---Gets the set of included regions managed by this LanguageTree. This can be different from the
  657. ---regions set by injection query, because a partial |LanguageTree:parse()| drops the regions
  658. ---outside the requested range.
  659. ---Each list represents a range in the form of
  660. ---{ {start_row}, {start_col}, {start_bytes}, {end_row}, {end_col}, {end_bytes} }.
  661. ---@return table<integer, Range6[]>
  662. function LanguageTree:included_regions()
  663. if self._regions then
  664. return self._regions
  665. end
  666. if not self._has_regions then
  667. -- treesitter.c will default empty ranges to { -1, -1, -1, -1, -1, -1} (the full range)
  668. return { {} }
  669. end
  670. local regions = {} ---@type Range6[][]
  671. for i, _ in pairs(self._trees) do
  672. regions[i] = self._trees[i]:included_ranges(true)
  673. end
  674. self._regions = regions
  675. return regions
  676. end
  677. ---@param node TSNode
  678. ---@param source string|integer
  679. ---@param metadata vim.treesitter.query.TSMetadata
  680. ---@param include_children boolean
  681. ---@return Range6[]
  682. local function get_node_ranges(node, source, metadata, include_children)
  683. local range = vim.treesitter.get_range(node, source, metadata)
  684. local child_count = node:named_child_count()
  685. if include_children or child_count == 0 then
  686. return { range }
  687. end
  688. local ranges = {} ---@type Range6[]
  689. local srow, scol, sbyte, erow, ecol, ebyte = Range.unpack6(range)
  690. -- We are excluding children so we need to mask out their ranges
  691. for i = 0, child_count - 1 do
  692. local child = assert(node:named_child(i))
  693. local c_srow, c_scol, c_sbyte, c_erow, c_ecol, c_ebyte = child:range(true)
  694. if c_srow > srow or c_scol > scol then
  695. ranges[#ranges + 1] = { srow, scol, sbyte, c_srow, c_scol, c_sbyte }
  696. end
  697. srow = c_erow
  698. scol = c_ecol
  699. sbyte = c_ebyte
  700. end
  701. if erow > srow or ecol > scol then
  702. ranges[#ranges + 1] = Range.add_bytes(source, { srow, scol, sbyte, erow, ecol, ebyte })
  703. end
  704. return ranges
  705. end
  706. ---@nodoc
  707. ---@class vim.treesitter.languagetree.InjectionElem
  708. ---@field combined boolean
  709. ---@field regions Range6[][]
  710. ---@alias vim.treesitter.languagetree.Injection table<string,table<integer,vim.treesitter.languagetree.InjectionElem>>
  711. ---@param t table<integer,vim.treesitter.languagetree.Injection>
  712. ---@param tree_index integer
  713. ---@param pattern integer
  714. ---@param lang string
  715. ---@param combined boolean
  716. ---@param ranges Range6[]
  717. local function add_injection(t, tree_index, pattern, lang, combined, ranges)
  718. if #ranges == 0 then
  719. -- Make sure not to add an empty range set as this is interpreted to mean the whole buffer.
  720. return
  721. end
  722. -- Each tree index should be isolated from the other nodes.
  723. if not t[tree_index] then
  724. t[tree_index] = {}
  725. end
  726. if not t[tree_index][lang] then
  727. t[tree_index][lang] = {}
  728. end
  729. -- Key this by pattern. If combined is set to true all captures of this pattern
  730. -- will be parsed by treesitter as the same "source".
  731. -- If combined is false, each "region" will be parsed as a single source.
  732. if not t[tree_index][lang][pattern] then
  733. t[tree_index][lang][pattern] = { combined = combined, regions = {} }
  734. end
  735. table.insert(t[tree_index][lang][pattern].regions, ranges)
  736. end
  737. -- TODO(clason): replace by refactored `ts.has_parser` API (without side effects)
  738. --- The result of this function is cached to prevent nvim_get_runtime_file from being
  739. --- called too often
  740. --- @param lang string parser name
  741. --- @return boolean # true if parser for {lang} exists on rtp
  742. local has_parser = vim.func._memoize(1, function(lang)
  743. return vim._ts_has_language(lang)
  744. or #vim.api.nvim_get_runtime_file('parser/' .. lang .. '.*', false) > 0
  745. end)
  746. --- Return parser name for language (if exists) or filetype (if registered and exists).
  747. ---
  748. ---@param alias string language or filetype name
  749. ---@return string? # resolved parser name
  750. local function resolve_lang(alias)
  751. -- validate that `alias` is a legal language
  752. if not (alias and alias:match('[%w_]+') == alias) then
  753. return
  754. end
  755. if has_parser(alias) then
  756. return alias
  757. end
  758. local lang = vim.treesitter.language.get_lang(alias)
  759. if lang and has_parser(lang) then
  760. return lang
  761. end
  762. end
  763. ---@private
  764. --- Extract injections according to:
  765. --- https://tree-sitter.github.io/tree-sitter/syntax-highlighting#language-injection
  766. ---@param match table<integer,TSNode[]>
  767. ---@param metadata vim.treesitter.query.TSMetadata
  768. ---@return string?, boolean, Range6[]
  769. function LanguageTree:_get_injection(match, metadata)
  770. local ranges = {} ---@type Range6[]
  771. local combined = metadata['injection.combined'] ~= nil
  772. local injection_lang = metadata['injection.language'] --[[@as string?]]
  773. local lang = metadata['injection.self'] ~= nil and self:lang()
  774. or metadata['injection.parent'] ~= nil and self._parent:lang()
  775. or (injection_lang and resolve_lang(injection_lang))
  776. local include_children = metadata['injection.include-children'] ~= nil
  777. for id, nodes in pairs(match) do
  778. for _, node in ipairs(nodes) do
  779. local name = self._injection_query.captures[id]
  780. -- Lang should override any other language tag
  781. if name == 'injection.language' then
  782. local text = vim.treesitter.get_node_text(node, self._source, { metadata = metadata[id] })
  783. lang = resolve_lang(text:lower()) -- language names are always lower case
  784. elseif name == 'injection.filename' then
  785. local text = vim.treesitter.get_node_text(node, self._source, { metadata = metadata[id] })
  786. local ft = vim.filetype.match({ filename = text })
  787. lang = ft and resolve_lang(ft)
  788. elseif name == 'injection.content' then
  789. ranges = get_node_ranges(node, self._source, metadata[id], include_children)
  790. end
  791. end
  792. end
  793. return lang, combined, ranges
  794. end
  795. --- Can't use vim.tbl_flatten since a range is just a table.
  796. ---@param regions Range6[][]
  797. ---@return Range6[]
  798. local function combine_regions(regions)
  799. local result = {} ---@type Range6[]
  800. for _, region in ipairs(regions) do
  801. for _, range in ipairs(region) do
  802. result[#result + 1] = range
  803. end
  804. end
  805. return result
  806. end
  807. --- Gets language injection regions by language.
  808. ---
  809. --- This is where most of the injection processing occurs.
  810. ---
  811. --- TODO: Allow for an offset predicate to tailor the injection range
  812. --- instead of using the entire nodes range.
  813. --- @private
  814. --- @return table<string, Range6[][]>
  815. function LanguageTree:_get_injections()
  816. if not self._injection_query then
  817. return {}
  818. end
  819. ---@type table<integer,vim.treesitter.languagetree.Injection>
  820. local injections = {}
  821. for index, tree in pairs(self._trees) do
  822. local root_node = tree:root()
  823. local start_line, _, end_line, _ = root_node:range()
  824. for pattern, match, metadata in
  825. self._injection_query:iter_matches(root_node, self._source, start_line, end_line + 1)
  826. do
  827. local lang, combined, ranges = self:_get_injection(match, metadata)
  828. if lang then
  829. add_injection(injections, index, pattern, lang, combined, ranges)
  830. else
  831. self:_log('match from injection query failed for pattern', pattern)
  832. end
  833. end
  834. end
  835. ---@type table<string,Range6[][]>
  836. local result = {}
  837. -- Generate a map by lang of node lists.
  838. -- Each list is a set of ranges that should be parsed together.
  839. for _, lang_map in pairs(injections) do
  840. for lang, patterns in pairs(lang_map) do
  841. if not result[lang] then
  842. result[lang] = {}
  843. end
  844. for _, entry in pairs(patterns) do
  845. if entry.combined then
  846. table.insert(result[lang], combine_regions(entry.regions))
  847. else
  848. for _, ranges in pairs(entry.regions) do
  849. table.insert(result[lang], ranges)
  850. end
  851. end
  852. end
  853. end
  854. end
  855. return result
  856. end
  857. ---@private
  858. ---@param cb_name TSCallbackName
  859. function LanguageTree:_do_callback(cb_name, ...)
  860. for _, cb in ipairs(self._callbacks[cb_name]) do
  861. cb(...)
  862. end
  863. for _, cb in ipairs(self._callbacks_rec[cb_name]) do
  864. cb(...)
  865. end
  866. end
  867. ---@package
  868. function LanguageTree:_edit(
  869. start_byte,
  870. end_byte_old,
  871. end_byte_new,
  872. start_row,
  873. start_col,
  874. end_row_old,
  875. end_col_old,
  876. end_row_new,
  877. end_col_new
  878. )
  879. for _, tree in pairs(self._trees) do
  880. tree:edit(
  881. start_byte,
  882. end_byte_old,
  883. end_byte_new,
  884. start_row,
  885. start_col,
  886. end_row_old,
  887. end_col_old,
  888. end_row_new,
  889. end_col_new
  890. )
  891. end
  892. self._parser:reset()
  893. self._regions = nil
  894. local changed_range = {
  895. start_row,
  896. start_col,
  897. start_byte,
  898. end_row_old,
  899. end_col_old,
  900. end_byte_old,
  901. }
  902. -- Validate regions after editing the tree
  903. self:_iter_regions(function(_, region)
  904. if #region == 0 then
  905. -- empty region, use the full source
  906. return false
  907. end
  908. for _, r in ipairs(region) do
  909. if Range.intercepts(r, changed_range) then
  910. return false
  911. end
  912. end
  913. return true
  914. end)
  915. for _, child in pairs(self._children) do
  916. child:_edit(
  917. start_byte,
  918. end_byte_old,
  919. end_byte_new,
  920. start_row,
  921. start_col,
  922. end_row_old,
  923. end_col_old,
  924. end_row_new,
  925. end_col_new
  926. )
  927. end
  928. end
  929. ---@nodoc
  930. ---@param bufnr integer
  931. ---@param changed_tick integer
  932. ---@param start_row integer
  933. ---@param start_col integer
  934. ---@param start_byte integer
  935. ---@param old_row integer
  936. ---@param old_col integer
  937. ---@param old_byte integer
  938. ---@param new_row integer
  939. ---@param new_col integer
  940. ---@param new_byte integer
  941. function LanguageTree:_on_bytes(
  942. bufnr,
  943. changed_tick,
  944. start_row,
  945. start_col,
  946. start_byte,
  947. old_row,
  948. old_col,
  949. old_byte,
  950. new_row,
  951. new_col,
  952. new_byte
  953. )
  954. local old_end_col = old_col + ((old_row == 0) and start_col or 0)
  955. local new_end_col = new_col + ((new_row == 0) and start_col or 0)
  956. self:_log(
  957. 'on_bytes',
  958. bufnr,
  959. changed_tick,
  960. start_row,
  961. start_col,
  962. start_byte,
  963. old_row,
  964. old_col,
  965. old_byte,
  966. new_row,
  967. new_col,
  968. new_byte
  969. )
  970. -- Edit trees together BEFORE emitting a bytes callback.
  971. self:_edit(
  972. start_byte,
  973. start_byte + old_byte,
  974. start_byte + new_byte,
  975. start_row,
  976. start_col,
  977. start_row + old_row,
  978. old_end_col,
  979. start_row + new_row,
  980. new_end_col
  981. )
  982. self:_do_callback(
  983. 'bytes',
  984. bufnr,
  985. changed_tick,
  986. start_row,
  987. start_col,
  988. start_byte,
  989. old_row,
  990. old_col,
  991. old_byte,
  992. new_row,
  993. new_col,
  994. new_byte
  995. )
  996. end
  997. ---@nodoc
  998. function LanguageTree:_on_reload()
  999. self:invalidate(true)
  1000. end
  1001. ---@nodoc
  1002. function LanguageTree:_on_detach(...)
  1003. self:invalidate(true)
  1004. self:_do_callback('detach', ...)
  1005. if self._logfile then
  1006. self._logger('nvim', 'detaching')
  1007. self._logger = nil
  1008. self._logfile:close()
  1009. end
  1010. end
  1011. --- Registers callbacks for the [LanguageTree].
  1012. ---@param cbs table<TSCallbackNameOn,function> An [nvim_buf_attach()]-like table argument with the following handlers:
  1013. --- - `on_bytes` : see [nvim_buf_attach()].
  1014. --- - `on_changedtree` : a callback that will be called every time the tree has syntactical changes.
  1015. --- It will be passed two arguments: a table of the ranges (as node ranges) that
  1016. --- changed and the changed tree.
  1017. --- - `on_child_added` : emitted when a child is added to the tree.
  1018. --- - `on_child_removed` : emitted when a child is removed from the tree.
  1019. --- - `on_detach` : emitted when the buffer is detached, see [nvim_buf_detach_event].
  1020. --- Takes one argument, the number of the buffer.
  1021. --- @param recursive? boolean Apply callbacks recursively for all children. Any new children will
  1022. --- also inherit the callbacks.
  1023. function LanguageTree:register_cbs(cbs, recursive)
  1024. if not cbs then
  1025. return
  1026. end
  1027. local callbacks = recursive and self._callbacks_rec or self._callbacks
  1028. for name, cbname in pairs(TSCallbackNames) do
  1029. if cbs[name] then
  1030. table.insert(callbacks[cbname], cbs[name])
  1031. end
  1032. end
  1033. if recursive then
  1034. for _, child in pairs(self._children) do
  1035. child:register_cbs(cbs, true)
  1036. end
  1037. end
  1038. end
  1039. ---@param tree TSTree
  1040. ---@param range Range
  1041. ---@return boolean
  1042. local function tree_contains(tree, range)
  1043. local tree_ranges = tree:included_ranges(false)
  1044. return Range.contains({
  1045. tree_ranges[1][1],
  1046. tree_ranges[1][2],
  1047. tree_ranges[#tree_ranges][3],
  1048. tree_ranges[#tree_ranges][4],
  1049. }, range)
  1050. end
  1051. --- Determines whether {range} is contained in the |LanguageTree|.
  1052. ---
  1053. ---@param range Range4
  1054. ---@return boolean
  1055. function LanguageTree:contains(range)
  1056. for _, tree in pairs(self._trees) do
  1057. if tree_contains(tree, range) then
  1058. return true
  1059. end
  1060. end
  1061. return false
  1062. end
  1063. --- @class vim.treesitter.LanguageTree.tree_for_range.Opts
  1064. --- @inlinedoc
  1065. ---
  1066. --- Ignore injected languages
  1067. --- (default: `true`)
  1068. --- @field ignore_injections? boolean
  1069. --- Gets the tree that contains {range}.
  1070. ---
  1071. ---@param range Range4
  1072. ---@param opts? vim.treesitter.LanguageTree.tree_for_range.Opts
  1073. ---@return TSTree?
  1074. function LanguageTree:tree_for_range(range, opts)
  1075. opts = opts or {}
  1076. local ignore = vim.F.if_nil(opts.ignore_injections, true)
  1077. if not ignore then
  1078. for _, child in pairs(self._children) do
  1079. local tree = child:tree_for_range(range, opts)
  1080. if tree then
  1081. return tree
  1082. end
  1083. end
  1084. end
  1085. for _, tree in pairs(self._trees) do
  1086. if tree_contains(tree, range) then
  1087. return tree
  1088. end
  1089. end
  1090. return nil
  1091. end
  1092. --- Gets the smallest node that contains {range}.
  1093. ---
  1094. ---@param range Range4
  1095. ---@param opts? vim.treesitter.LanguageTree.tree_for_range.Opts
  1096. ---@return TSNode?
  1097. function LanguageTree:node_for_range(range, opts)
  1098. local tree = self:tree_for_range(range, opts)
  1099. if tree then
  1100. return tree:root():descendant_for_range(unpack(range))
  1101. end
  1102. end
  1103. --- Gets the smallest named node that contains {range}.
  1104. ---
  1105. ---@param range Range4
  1106. ---@param opts? vim.treesitter.LanguageTree.tree_for_range.Opts
  1107. ---@return TSNode?
  1108. function LanguageTree:named_node_for_range(range, opts)
  1109. local tree = self:tree_for_range(range, opts)
  1110. if tree then
  1111. return tree:root():named_descendant_for_range(unpack(range))
  1112. end
  1113. end
  1114. --- Gets the appropriate language that contains {range}.
  1115. ---
  1116. ---@param range Range4
  1117. ---@return vim.treesitter.LanguageTree tree Managing {range}
  1118. function LanguageTree:language_for_range(range)
  1119. for _, child in pairs(self._children) do
  1120. if child:contains(range) then
  1121. return child:language_for_range(range)
  1122. end
  1123. end
  1124. return self
  1125. end
  1126. return LanguageTree