collect.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415
  1. // License: GPLv3 Copyright: 2023, Kovid Goyal, <kovid at kovidgoyal.net>
  2. package diff
  3. import (
  4. "crypto/md5"
  5. "fmt"
  6. "io/fs"
  7. "os"
  8. "path/filepath"
  9. "strings"
  10. "unicode/utf8"
  11. "kitty/tools/utils"
  12. )
  13. var _ = fmt.Print
  14. var path_name_map, remote_dirs map[string]string
  15. var mimetypes_cache, data_cache, hash_cache *utils.LRUCache[string, string]
  16. var size_cache *utils.LRUCache[string, int64]
  17. var lines_cache *utils.LRUCache[string, []string]
  18. var light_highlighted_lines_cache *utils.LRUCache[string, []string]
  19. var dark_highlighted_lines_cache *utils.LRUCache[string, []string]
  20. var is_text_cache *utils.LRUCache[string, bool]
  21. func init_caches() {
  22. path_name_map = make(map[string]string, 32)
  23. remote_dirs = make(map[string]string, 32)
  24. const sz = 4096
  25. size_cache = utils.NewLRUCache[string, int64](sz)
  26. mimetypes_cache = utils.NewLRUCache[string, string](sz)
  27. data_cache = utils.NewLRUCache[string, string](sz)
  28. is_text_cache = utils.NewLRUCache[string, bool](sz)
  29. lines_cache = utils.NewLRUCache[string, []string](sz)
  30. light_highlighted_lines_cache = utils.NewLRUCache[string, []string](sz)
  31. dark_highlighted_lines_cache = utils.NewLRUCache[string, []string](sz)
  32. hash_cache = utils.NewLRUCache[string, string](sz)
  33. }
  34. func add_remote_dir(val string) {
  35. x := filepath.Base(val)
  36. idx := strings.LastIndex(x, "-")
  37. if idx > -1 {
  38. x = x[idx+1:]
  39. } else {
  40. x = ""
  41. }
  42. remote_dirs[val] = x
  43. }
  44. func mimetype_for_path(path string) string {
  45. return mimetypes_cache.MustGetOrCreate(path, func(path string) string {
  46. mt := utils.GuessMimeTypeWithFileSystemAccess(path)
  47. if mt == "" {
  48. mt = "application/octet-stream"
  49. }
  50. if utils.KnownTextualMimes[mt] {
  51. if _, a, found := strings.Cut(mt, "/"); found {
  52. mt = "text/" + a
  53. }
  54. }
  55. return mt
  56. })
  57. }
  58. func data_for_path(path string) (string, error) {
  59. return data_cache.GetOrCreate(path, func(path string) (string, error) {
  60. ans, err := os.ReadFile(path)
  61. return utils.UnsafeBytesToString(ans), err
  62. })
  63. }
  64. func size_for_path(path string) (int64, error) {
  65. return size_cache.GetOrCreate(path, func(path string) (int64, error) {
  66. s, err := os.Stat(path)
  67. if err != nil {
  68. return 0, err
  69. }
  70. return s.Size(), nil
  71. })
  72. }
  73. func is_image(path string) bool {
  74. return strings.HasPrefix(mimetype_for_path(path), "image/")
  75. }
  76. func is_path_text(path string) bool {
  77. return is_text_cache.MustGetOrCreate(path, func(path string) bool {
  78. if is_image(path) {
  79. return false
  80. }
  81. s1, err := os.Stat(path)
  82. if err == nil {
  83. s2, err := os.Stat("/dev/null")
  84. if err == nil && os.SameFile(s1, s2) {
  85. return false
  86. }
  87. }
  88. d, err := data_for_path(path)
  89. if err != nil {
  90. return false
  91. }
  92. return utf8.ValidString(d)
  93. })
  94. }
  95. func hash_for_path(path string) (string, error) {
  96. return hash_cache.GetOrCreate(path, func(path string) (string, error) {
  97. ans, err := data_for_path(path)
  98. if err != nil {
  99. return "", err
  100. }
  101. hash := md5.Sum(utils.UnsafeStringToBytes(ans))
  102. return utils.UnsafeBytesToString(hash[:]), err
  103. })
  104. }
  105. // Remove all control codes except newlines
  106. func sanitize_control_codes(x string) string {
  107. pat := utils.MustCompile("[\x00-\x09\x0b-\x1f\x7f\u0080-\u009f]")
  108. return pat.ReplaceAllLiteralString(x, "░")
  109. }
  110. func sanitize_tabs_and_carriage_returns(x string) string {
  111. return strings.NewReplacer("\t", conf.Replace_tab_by, "\r", "⏎").Replace(x)
  112. }
  113. func sanitize(x string) string {
  114. return sanitize_control_codes(sanitize_tabs_and_carriage_returns(x))
  115. }
  116. func text_to_lines(text string) []string {
  117. lines := make([]string, 0, 512)
  118. splitlines_like_git(text, false, func(line string) { lines = append(lines, line) })
  119. return lines
  120. }
  121. func lines_for_path(path string) ([]string, error) {
  122. return lines_cache.GetOrCreate(path, func(path string) ([]string, error) {
  123. ans, err := data_for_path(path)
  124. if err != nil {
  125. return nil, err
  126. }
  127. return text_to_lines(sanitize(ans)), nil
  128. })
  129. }
  130. func highlighted_lines_for_path(path string) ([]string, error) {
  131. plain_lines, err := lines_for_path(path)
  132. if err != nil {
  133. return nil, err
  134. }
  135. var ans []string
  136. var found bool
  137. if use_light_colors {
  138. ans, found = light_highlighted_lines_cache.Get(path)
  139. } else {
  140. ans, found = dark_highlighted_lines_cache.Get(path)
  141. }
  142. if found && len(ans) == len(plain_lines) {
  143. return ans, nil
  144. }
  145. return plain_lines, nil
  146. }
  147. type Collection struct {
  148. changes, renames, type_map map[string]string
  149. adds, removes *utils.Set[string]
  150. all_paths []string
  151. paths_to_highlight *utils.Set[string]
  152. added_count, removed_count int
  153. }
  154. func (self *Collection) add_change(left, right string) {
  155. self.changes[left] = right
  156. self.all_paths = append(self.all_paths, left)
  157. self.paths_to_highlight.Add(left)
  158. self.paths_to_highlight.Add(right)
  159. self.type_map[left] = `diff`
  160. }
  161. func (self *Collection) add_rename(left, right string) {
  162. self.renames[left] = right
  163. self.all_paths = append(self.all_paths, left)
  164. self.type_map[left] = `rename`
  165. }
  166. func (self *Collection) add_add(right string) {
  167. self.adds.Add(right)
  168. self.all_paths = append(self.all_paths, right)
  169. self.paths_to_highlight.Add(right)
  170. self.type_map[right] = `add`
  171. if is_path_text(right) {
  172. num, _ := lines_for_path(right)
  173. self.added_count += len(num)
  174. }
  175. }
  176. func (self *Collection) add_removal(left string) {
  177. self.removes.Add(left)
  178. self.all_paths = append(self.all_paths, left)
  179. self.paths_to_highlight.Add(left)
  180. self.type_map[left] = `removal`
  181. if is_path_text(left) {
  182. num, _ := lines_for_path(left)
  183. self.removed_count += len(num)
  184. }
  185. }
  186. func (self *Collection) finalize() {
  187. utils.StableSortWithKey(self.all_paths, func(path string) string {
  188. return path_name_map[path]
  189. })
  190. }
  191. func (self *Collection) Len() int { return len(self.all_paths) }
  192. func (self *Collection) Items() int { return len(self.all_paths) }
  193. func (self *Collection) Apply(f func(path, typ, changed_path string) error) error {
  194. for _, path := range self.all_paths {
  195. typ := self.type_map[path]
  196. changed_path := ""
  197. switch typ {
  198. case "diff":
  199. changed_path = self.changes[path]
  200. case "rename":
  201. changed_path = self.renames[path]
  202. }
  203. if err := f(path, typ, changed_path); err != nil {
  204. return err
  205. }
  206. }
  207. return nil
  208. }
  209. func allowed(path string, patterns ...string) bool {
  210. name := filepath.Base(path)
  211. for _, pat := range patterns {
  212. if matched, err := filepath.Match(pat, name); err == nil && matched {
  213. return false
  214. }
  215. }
  216. return true
  217. }
  218. func remote_hostname(path string) (string, string) {
  219. for q, val := range remote_dirs {
  220. if strings.HasPrefix(path, q) {
  221. return q, val
  222. }
  223. }
  224. return "", ""
  225. }
  226. func resolve_remote_name(path, defval string) string {
  227. remote_dir, rh := remote_hostname(path)
  228. if remote_dir != "" && rh != "" {
  229. r, err := filepath.Rel(remote_dir, path)
  230. if err == nil {
  231. return rh + ":" + r
  232. }
  233. }
  234. return defval
  235. }
  236. func walk(base string, patterns []string, names *utils.Set[string], pmap, path_name_map map[string]string) error {
  237. base, err := filepath.Abs(base)
  238. if err != nil {
  239. return err
  240. }
  241. return filepath.WalkDir(base, func(path string, d fs.DirEntry, err error) error {
  242. if err != nil {
  243. return err
  244. }
  245. is_allowed := allowed(path, patterns...)
  246. if !is_allowed {
  247. if d.IsDir() {
  248. return fs.SkipDir
  249. }
  250. return nil
  251. }
  252. if d.IsDir() {
  253. return nil
  254. }
  255. path, err = filepath.Abs(path)
  256. if err != nil {
  257. return err
  258. }
  259. name, err := filepath.Rel(base, path)
  260. if err != nil {
  261. return err
  262. }
  263. if name != "." {
  264. path_name_map[path] = name
  265. names.Add(name)
  266. pmap[name] = path
  267. }
  268. return nil
  269. })
  270. }
  271. func (self *Collection) collect_files(left, right string) error {
  272. left_names, right_names := utils.NewSet[string](16), utils.NewSet[string](16)
  273. left_path_map, right_path_map := make(map[string]string, 16), make(map[string]string, 16)
  274. err := walk(left, conf.Ignore_name, left_names, left_path_map, path_name_map)
  275. if err != nil {
  276. return err
  277. }
  278. if err = walk(right, conf.Ignore_name, right_names, right_path_map, path_name_map); err != nil {
  279. return err
  280. }
  281. common_names := left_names.Intersect(right_names)
  282. changed_names := utils.NewSet[string](common_names.Len())
  283. for n := range common_names.Iterable() {
  284. ld, err := data_for_path(left_path_map[n])
  285. var rd string
  286. if err == nil {
  287. rd, err = data_for_path(right_path_map[n])
  288. }
  289. if err != nil {
  290. return err
  291. }
  292. if ld != rd {
  293. changed_names.Add(n)
  294. self.add_change(left_path_map[n], right_path_map[n])
  295. } else {
  296. if lstat, err := os.Stat(left_path_map[n]); err == nil {
  297. if rstat, err := os.Stat(right_path_map[n]); err == nil {
  298. if lstat.Mode() != rstat.Mode() {
  299. // identical files with only a mode change
  300. changed_names.Add(n)
  301. self.add_change(left_path_map[n], right_path_map[n])
  302. }
  303. }
  304. }
  305. }
  306. }
  307. removed := left_names.Subtract(common_names)
  308. added := right_names.Subtract(common_names)
  309. ahash, rhash := make(map[string]string, added.Len()), make(map[string]string, removed.Len())
  310. for a := range added.Iterable() {
  311. ahash[a], err = hash_for_path(right_path_map[a])
  312. if err != nil {
  313. return err
  314. }
  315. }
  316. for r := range removed.Iterable() {
  317. rhash[r], err = hash_for_path(left_path_map[r])
  318. if err != nil {
  319. return err
  320. }
  321. }
  322. for name, rh := range rhash {
  323. found := false
  324. for n, ah := range ahash {
  325. if ah == rh {
  326. ld, _ := data_for_path(left_path_map[name])
  327. rd, _ := data_for_path(right_path_map[n])
  328. if ld == rd {
  329. self.add_rename(left_path_map[name], right_path_map[n])
  330. added.Discard(n)
  331. found = true
  332. break
  333. }
  334. }
  335. }
  336. if !found {
  337. self.add_removal(left_path_map[name])
  338. }
  339. }
  340. for name := range added.Iterable() {
  341. self.add_add(right_path_map[name])
  342. }
  343. return nil
  344. }
  345. func create_collection(left, right string) (ans *Collection, err error) {
  346. ans = &Collection{
  347. changes: make(map[string]string),
  348. renames: make(map[string]string),
  349. type_map: make(map[string]string),
  350. adds: utils.NewSet[string](32),
  351. removes: utils.NewSet[string](32),
  352. paths_to_highlight: utils.NewSet[string](32),
  353. all_paths: make([]string, 0, 32),
  354. }
  355. left_stat, err := os.Stat(left)
  356. if err != nil {
  357. return nil, err
  358. }
  359. if left_stat.IsDir() {
  360. err = ans.collect_files(left, right)
  361. if err != nil {
  362. return nil, err
  363. }
  364. } else {
  365. pl, err := filepath.Abs(left)
  366. if err != nil {
  367. return nil, err
  368. }
  369. pr, err := filepath.Abs(right)
  370. if err != nil {
  371. return nil, err
  372. }
  373. path_name_map[pl] = resolve_remote_name(pl, left)
  374. path_name_map[pr] = resolve_remote_name(pr, right)
  375. ans.add_change(pl, pr)
  376. }
  377. ans.finalize()
  378. return ans, err
  379. }