compact-table.scm 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118
  1. ; Copyright (c) 1993-2008 by Richard Kelsey and Jonathan Rees. See file COPYING.
  2. ; Copyright (c) 2005-2006 by Basis Technology Corporation. See file COPYING.
  3. ; A compact table is an encoding of a very large vector that has lots
  4. ; of recurring patterns. It was written for encoding Unicode tables.
  5. ; The vector is partitioned into blocks, and the blocks get assembled
  6. ; into a new compressed vector. Each time a new block gets added, the
  7. ; algorithm looks if the same block is already present in the
  8. ; compressed vector, or the compressed vector ends with a prefix of
  9. ; the new block. In the former case, nothing needs to get added. In
  10. ; the latter case, only the suffix needs to get added. At the same
  11. ; time, the algorithm computes a table with indices of the block
  12. ; beginnings.
  13. ; The algorithm can take a long time; little attempt at optimization
  14. ; has been made. It's mainly intended for offline computation as part
  15. ; of a build process.
  16. ; This tries to merge BLOCK onto REVERSE-BASE, sharing the prefix of
  17. ; BLOCK.
  18. ; returns new reverse list + index offset
  19. (define (compact-block block reverse-base)
  20. (let* ((block-size (length block))
  21. (base-block (reverse (take-upto reverse-base block-size)))
  22. (base-block-size (length base-block)))
  23. (let loop ((base-block base-block)
  24. (offset 0))
  25. (if (list-prefix? base-block block)
  26. (values (append (reverse (list-tail block (- base-block-size offset)))
  27. reverse-base)
  28. offset)
  29. (loop (cdr base-block) (+ 1 offset))))))
  30. ; GET-VALUE is a thunk that returns the next value of the input vector
  31. ; every time it gets called. BLOCK-SIZE is the size of the blocks in
  32. ; the algorithm.
  33. ; The procedure returns two valuesthe indices vector and a vector of
  34. ; the actual values.
  35. (define (compute-compact-table get-value block-size)
  36. (define (get-block)
  37. (let loop ((i 0) (rev-block '()))
  38. (cond
  39. ((>= i block-size)
  40. (reverse rev-block))
  41. ((get-value)
  42. => (lambda (value)
  43. (loop (+ 1 i) (cons value rev-block))))
  44. (else
  45. (reverse rev-block)))))
  46. (let loop ((reverse-values '())
  47. (reverse-indices '())
  48. (last-index 0)
  49. ;; cache for blocks that have already shown up twice
  50. ;; (reduces run time *a lot*)
  51. (bingo-block-alist '()))
  52. (let ((block (get-block)))
  53. (cond
  54. ((null? block)
  55. (values (list->vector (reverse reverse-indices))
  56. (list->vector (reverse reverse-values))))
  57. ((assoc block bingo-block-alist)
  58. => (lambda (pair)
  59. (loop reverse-values
  60. (cons (cdr pair) reverse-indices)
  61. last-index
  62. bingo-block-alist)))
  63. ((sublist-index (reverse block) reverse-values)
  64. => (lambda (rev-index)
  65. (loop reverse-values
  66. (cons (+ (- block-size (length block)) (- last-index rev-index))
  67. reverse-indices)
  68. last-index
  69. (cons (cons block (- last-index rev-index)) bingo-block-alist))))
  70. (else
  71. (call-with-values
  72. (lambda () (compact-block block reverse-values))
  73. (lambda (reverse-values offset)
  74. (loop reverse-values
  75. (cons (+ last-index offset) reverse-indices)
  76. (+ last-index offset)
  77. bingo-block-alist))))))))
  78. ; List utilities
  79. (define (sublist-index sublist list)
  80. (let loop ((list list)
  81. (index 0))
  82. (cond
  83. ((list-prefix? sublist list)
  84. index)
  85. ((null? list)
  86. #f)
  87. (else (loop (cdr list) (+ 1 index))))))
  88. (define (list-prefix? list-1 list-2)
  89. (cond
  90. ((null? list-1) #t)
  91. ((null? list-2) #f)
  92. ((equal? (car list-1) (car list-2))
  93. (list-prefix? (cdr list-1) (cdr list-2)))
  94. (else #f)))
  95. (define (take-upto list count)
  96. (let loop ((list list) (count count) (rev-result '()))
  97. (if (or (zero? count)
  98. (null? list))
  99. (reverse rev-result)
  100. (loop (cdr list) (- count 1) (cons (car list) rev-result)))))