vhsort.scm 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
  1. ;;; The sort package -- vector heap sort -*- Scheme -*-
  2. ;;; Copyright (c) 2002 by Olin Shivers.
  3. ;;; This code is open-source; see the end of the file for porting and
  4. ;;; more copyright information.
  5. ;;; Olin Shivers 10/98.
  6. ;;; Exports:
  7. ;;; (vector-heap-sort! elt< v [start end]) -> unspecified
  8. ;;; (vector-heap-sort elt< v [start end]) -> vector
  9. ;;; Two key facts
  10. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  11. ;;; If a heap structure is embedded into a vector at indices [start,end), then:
  12. ;;; 1. The two children of index k are start + 2*(k-start) + 1 = k*2-start+1
  13. ;;; and start + 2*(k-start) + 2 = k*2-start+2.
  14. ;;;
  15. ;;; 2. The first index of a leaf node in the range [start,end) is
  16. ;;; first-leaf = floor[(start+end)/2]
  17. ;;; (You can deduce this from fact #1 above.)
  18. ;;; Any index before FIRST-LEAF is an internal node.
  19. (define (really-vector-heap-sort! elt< v start end)
  20. ;; Vector V contains a heap at indices [START,END). The heap is in heap
  21. ;; order in the range (I,END) -- i.e., every element in this range is >=
  22. ;; its children. Bubble HEAP[I] down into the heap to impose heap order on
  23. ;; the range [I,END).
  24. (define (restore-heap! end i)
  25. (let* ((vi (vector-ref v i))
  26. (first-leaf (quotient (+ start end) 2)) ; Can fixnum overflow.
  27. (final-k (let lp ((k i))
  28. (if (>= k first-leaf)
  29. k ; Leaf, so done.
  30. (let* ((k*2-start (+ k (- k start))) ; Don't overflow.
  31. (child1 (+ 1 k*2-start))
  32. (child2 (+ 2 k*2-start))
  33. (child1-val (vector-ref v child1)))
  34. (call-with-values
  35. (lambda ()
  36. (if (< child2 end)
  37. (let ((child2-val (vector-ref v child2)))
  38. (if (elt< child2-val child1-val)
  39. (values child1 child1-val)
  40. (values child2 child2-val)))
  41. (values child1 child1-val)))
  42. (lambda (max-child max-child-val)
  43. (cond ((elt< vi max-child-val)
  44. (vector-set! v k max-child-val)
  45. (lp max-child))
  46. (else k))))))))) ; Done.
  47. (vector-set! v final-k vi)))
  48. ;; Put the unsorted subvector V[start,end) into heap order.
  49. (let ((first-leaf (quotient (+ start end) 2))) ; Can fixnum overflow.
  50. (do ((i (- first-leaf 1) (- i 1)))
  51. ((< i start))
  52. (restore-heap! end i)))
  53. (do ((i (- end 1) (- i 1)))
  54. ((<= i start))
  55. (let ((top (vector-ref v start)))
  56. (vector-set! v start (vector-ref v i))
  57. (vector-set! v i top)
  58. (restore-heap! i start))))
  59. ;;; Here are the two exported interfaces.
  60. (define (vector-heap-sort! elt< v . maybe-start+end)
  61. (call-with-values
  62. (lambda () (vector-start+end v maybe-start+end))
  63. (lambda (start end)
  64. (really-vector-heap-sort! elt< v start end))))
  65. (define (vector-heap-sort elt< v . maybe-start+end)
  66. (call-with-values
  67. (lambda () (vector-start+end v maybe-start+end))
  68. (lambda (start end)
  69. (let ((ans (vector-portion-copy v start end)))
  70. (really-vector-heap-sort! elt< ans 0 (- end start))
  71. ans))))
  72. ;;; Notes on porting
  73. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  74. ;;;
  75. ;;; Bumming the code for speed
  76. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  77. ;;; If you can use a module system to lock up the internal function
  78. ;;; REALLY-VECTOR-HEAP-SORT! so that it can only be called from VECTOR-HEAP-SORT and
  79. ;;; VECTOR-HEAP-SORT!, then you can hack the internal functions to run with no safety
  80. ;;; checks. The safety checks performed by the exported functions VECTOR-HEAP-SORT &
  81. ;;; VECTOR-HEAP-SORT! guarantee that there will be no type errors or array-indexing
  82. ;;; errors. In addition, with the exception of the two computations of
  83. ;;; FIRST-LEAF, all arithmetic will be fixnum arithmetic that never overflows
  84. ;;; into bignums, assuming your Scheme provides that you can't allocate an
  85. ;;; array so large you might need a bignum to index an element, which is
  86. ;;; definitely the case for every implementation with which I am familiar.
  87. ;;;
  88. ;;; If you want to code up the first-leaf = (quotient (+ s e) 2) computation
  89. ;;; so that it will never fixnum overflow when S & E are fixnums, you can do
  90. ;;; it this way:
  91. ;;; - compute floor(e/2), which throws away e's low-order bit.
  92. ;;; - add e's low-order bit to s, and divide that by two:
  93. ;;; floor[(s + e mod 2) / 2]
  94. ;;; - add these two parts together.
  95. ;;; giving you
  96. ;;; (+ (quotient e 2)
  97. ;;; (quotient (+ s (modulo e 2)) 2))
  98. ;;; If we know that e & s are fixnums, and that 0 <= s <= e, then this
  99. ;;; can only fixnum-overflow when s = e = max-fixnum. Note that the
  100. ;;; two divides and one modulo op can be done very quickly with two
  101. ;;; right-shifts and a bitwise and.
  102. ;;;
  103. ;;; I suspect there has never been a heapsort written in the history of
  104. ;;; the world in C that got this detail right.
  105. ;;;
  106. ;;; If your Scheme has a faster mechanism for handling optional arguments
  107. ;;; (e.g., Chez), you should definitely port over to it. Note that argument
  108. ;;; defaulting and error-checking are interleaved -- you don't have to
  109. ;;; error-check defaulted START/END args to see if they are fixnums that are
  110. ;;; legal vector indices for the corresponding vector, etc.