lmsort.scm 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387
  1. ;;; list merge & list merge-sort -*- Scheme -*-
  2. ;;; Copyright (c) 1998 by Olin Shivers.
  3. ;;; This code is open-source; see the end of the file for porting and
  4. ;;; more copyright information.
  5. ;;; Olin Shivers
  6. ;;; Exports:
  7. ;;; (list-merge < lis lis) -> list
  8. ;;; (list-merge! < lis lis) -> list
  9. ;;; (list-merge-sort < lis) -> list
  10. ;;; (list-merge-sort! < lis) -> list
  11. ;;; A stable list merge sort of my own device
  12. ;;; Two variants: pure & destructive
  13. ;;;
  14. ;;; This list merge sort is opportunistic (a "natural" sort) -- it exploits
  15. ;;; existing order in the input set. Instead of recursing all the way down to
  16. ;;; individual elements, the leaves of the merge tree are maximal contiguous
  17. ;;; runs of elements from the input list. So the algorithm does very well on
  18. ;;; data that is mostly ordered, with a best-case time of O(n) when the input
  19. ;;; list is already completely sorted. In any event, worst-case time is
  20. ;;; O(n lg n).
  21. ;;;
  22. ;;; The destructive variant is "in place," meaning that it allocates no new
  23. ;;; cons cells at all; it just rearranges the pairs of the input list with
  24. ;;; SET-CDR! to order it.
  25. ;;;
  26. ;;; The interesting control structure is the combination recursion/iteration
  27. ;;; of the core GROW function that does an "opportunistic" DFS walk of the
  28. ;;; merge tree, adaptively subdividing in response to the length of the
  29. ;;; merges, without requiring any auxiliary data structures beyond the
  30. ;;; recursion stack. It's actually quite simple -- ten lines of code.
  31. ;;; -Olin Shivers 10/20/98
  32. ;;; (mlet ((var-list mv-exp) ...) body ...)
  33. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  34. ;;; A LET* form that handles multiple values. Move this into the two clients
  35. ;;; if you don't have a module system handy to restrict its visibility...
  36. (define-syntax mlet ; Multiple-value LET*
  37. (syntax-rules ()
  38. ((mlet ((() exp) rest ...) body ...)
  39. (begin exp (mlet (rest ...) body ...)))
  40. ((mlet (((var) exp) rest ...) body ...)
  41. (let ((var exp)) (mlet (rest ...) body ...)))
  42. ((mlet ((vars exp) rest ...) body ...)
  43. (call-with-values (lambda () exp)
  44. (lambda vars (mlet (rest ...) body ...))))
  45. ((mlet () body ...) (begin body ...))))
  46. ;;; (list-merge-sort < lis)
  47. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  48. ;;; A natural, stable list merge sort.
  49. ;;; - natural: picks off maximal contiguous runs of pre-ordered data.
  50. ;;; - stable: won't invert the order of equal elements in the input list.
  51. (define (list-merge-sort elt< lis)
  52. ;; (getrun lis) -> run runlen rest
  53. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  54. ;; Pick a run of non-decreasing data off of non-empty list LIS.
  55. ;; Return the length of this run, and the following list.
  56. (define (getrun lis)
  57. (let lp ((ans '()) (i 1) (prev (car lis)) (xs (cdr lis)))
  58. (if (pair? xs)
  59. (let ((x (car xs)))
  60. (if (elt< x prev)
  61. (values (append-reverse ans (cons prev '())) i xs)
  62. (lp (cons prev ans) (+ i 1) x (cdr xs))))
  63. (values (append-reverse ans (cons prev '())) i xs))))
  64. (define (append-reverse rev-head tail)
  65. (let lp ((rev-head rev-head) (tail tail))
  66. (if (null-list? rev-head) tail
  67. (lp (cdr rev-head) (cons (car rev-head) tail)))))
  68. (define (null-list? l)
  69. (cond ((pair? l) #f)
  70. ((null? l) #t)
  71. (else (assertion-violation 'null-list? "argument out of domain" l))))
  72. ;; (merge a b) -> list
  73. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  74. ;; List merge -- stably merge lists A (length > 0) & B (length > 0).
  75. ;; This version requires up to |a|+|b| stack frames.
  76. (define (merge a b)
  77. (let recur ((x (car a)) (a a)
  78. (y (car b)) (b b))
  79. (if (elt< y x)
  80. (cons y (let ((b (cdr b)))
  81. (if (pair? b)
  82. (recur x a (car b) b)
  83. a)))
  84. (cons x (let ((a (cdr a)))
  85. (if (pair? a)
  86. (recur (car a) a y b)
  87. b))))))
  88. ;; (grow s ls ls2 u lw) -> [a la unused]
  89. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  90. ;; The core routine. Read the next 20 lines of comments & all is obvious.
  91. ;; - S is a sorted list of length LS > 1.
  92. ;; - LS2 is some power of two <= LS.
  93. ;; - U is an unsorted list.
  94. ;; - LW is a positive integer.
  95. ;; Starting with S, and taking data from U as needed, produce
  96. ;; a sorted list of *at least* length LW, if there's enough data
  97. ;; (LW <= LS + length(U)), or use all of U if not.
  98. ;;
  99. ;; GROW takes maximal contiguous runs of data from U at a time;
  100. ;; it is allowed to return a list *longer* than LW if it gets lucky
  101. ;; with a long run.
  102. ;;
  103. ;; The key idea: If you want a merge operation to "pay for itself," the two
  104. ;; lists being merged should be about the same length. Remember that.
  105. ;;
  106. ;; Returns:
  107. ;; - A: The result list
  108. ;; - LA: The length of the result list
  109. ;; - UNUSED: The unused tail of U.
  110. (define (grow s ls ls2 u lw) ; The core of the sort algorithm.
  111. (if (or (<= lw ls) (not (pair? u))) ; Met quota or out of data?
  112. (values s ls u) ; If so, we're done.
  113. (mlet (((ls2) (let lp ((ls2 ls2))
  114. (let ((ls2*2 (+ ls2 ls2)))
  115. (if (<= ls2*2 ls) (lp ls2*2) ls2))))
  116. ;; LS2 is now the largest power of two <= LS.
  117. ;; (Just think of it as being roughly LS.)
  118. ((r lr u2) (getrun u)) ; Get a run, then
  119. ((t lt u3) (grow r lr 1 u2 ls2))) ; grow it up to be T.
  120. (grow (merge s t) (+ ls lt) ; Merge S & T,
  121. (+ ls2 ls2) u3 lw)))) ; and loop.
  122. ;; Note: (LENGTH LIS) or any constant guaranteed
  123. ;; to be greater can be used in place of INFINITY.
  124. (if (pair? lis) ; Don't sort an empty list.
  125. (mlet (((r lr tail) (getrun lis)) ; Pick off an initial run,
  126. ((infinity) #o100000000) ; then grow it up maximally.
  127. ((a la v) (grow r lr 1 tail infinity)))
  128. a)
  129. '()))
  130. ;;; (list-merge-sort! < lis)
  131. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  132. ;;; A natural, stable, destructive, in-place list merge sort.
  133. ;;; - natural: picks off maximal contiguous runs of pre-ordered data.
  134. ;;; - stable: won't invert the order of equal elements in the input list.
  135. ;;; - destructive, in-place: this routine allocates no extra working memory;
  136. ;;; it simply rearranges the list with SET-CDR! operations.
  137. (define (list-merge-sort! elt< lis)
  138. ;; (getrun lis) -> runlen last rest
  139. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  140. ;; Pick a run of non-decreasing data off of non-empty list LIS.
  141. ;; Return the length of this run, the last cons cell of the run,
  142. ;; and the following list.
  143. (define (getrun lis)
  144. (let lp ((lis lis) (x (car lis)) (i 1) (next (cdr lis)))
  145. (if (pair? next)
  146. (let ((y (car next)))
  147. (if (elt< y x)
  148. (values i lis next)
  149. (lp next y (+ i 1) (cdr next))))
  150. (values i lis next))))
  151. ;; (merge! a enda b endb) -> [m endm]
  152. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  153. ;; Destructively and stably merge non-empty lists A & B.
  154. ;; The last cons of A is ENDA. (The cdr of ENDA can be non-nil.)
  155. ;; the last cons of B is ENDB. (The cdr of ENDB can be non-nil.)
  156. ;;
  157. ;; Return the first and last cons cells of the merged list.
  158. ;; This routine is iterative & in-place: it runs in constant stack and
  159. ;; doesn't allocate any cons cells. It is also tedious but simple; don't
  160. ;; bother reading it unless necessary.
  161. (define (merge! a enda b endb)
  162. ;; The logic of these two loops is completely driven by these invariants:
  163. ;; SCAN-A: (CDR PREV) = A. X = (CAR A). Y = (CAR B).
  164. ;; SCAN-B: (CDR PREV) = B. X = (CAR A). Y = (CAR B).
  165. (letrec ((scan-a (lambda (prev x a y b) ; Zip down A until we
  166. (cond ((elt< y x) ; find an elt > (CAR B).
  167. (set-cdr! prev b)
  168. (let ((next-b (cdr b)))
  169. (if (eq? b endb)
  170. (begin (set-cdr! b a) enda) ; Done.
  171. (scan-b b x a (car next-b) next-b))))
  172. ((eq? a enda) (maybe-set-cdr! a b) endb) ; Done.
  173. (else (let ((next-a (cdr a))) ; Continue scan.
  174. (scan-a a (car next-a) next-a y b))))))
  175. (scan-b (lambda (prev x a y b) ; Zip down B while its
  176. (cond ((elt< y x) ; elts are < (CAR A).
  177. (if (eq? b endb)
  178. (begin (set-cdr! b a) enda) ; Done.
  179. (let ((next-b (cdr b))) ; Continue scan.
  180. (scan-b b x a (car next-b) next-b))))
  181. (else (set-cdr! prev a)
  182. (if (eq? a enda)
  183. (begin (maybe-set-cdr! a b) endb) ; Done.
  184. (let ((next-a (cdr a)))
  185. (scan-a a (car next-a) next-a y b)))))))
  186. ;; This guy only writes if he has to. Called at most once.
  187. ;; Pointer equality rules; pure languages are for momma's boys.
  188. (maybe-set-cdr! (lambda (pair val) (if (not (eq? (cdr pair) val))
  189. (set-cdr! pair val)))))
  190. (let ((x (car a)) (y (car b)))
  191. (if (elt< y x)
  192. ;; B starts the answer list.
  193. (values b (if (eq? b endb)
  194. (begin (set-cdr! b a) enda)
  195. (let ((next-b (cdr b)))
  196. (scan-b b x a (car next-b) next-b))))
  197. ;; A starts the answer list.
  198. (values a (if (eq? a enda)
  199. (begin (maybe-set-cdr! a b) endb)
  200. (let ((next-a (cdr a)))
  201. (scan-a a (car next-a) next-a y b))))))))
  202. ;; (grow s ends ls ls2 u lw) -> [a enda la unused]
  203. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  204. ;; The core routine.
  205. ;; - S is a sorted list of length LS > 1, with final cons cell ENDS.
  206. ;; (CDR ENDS) doesn't have to be nil.
  207. ;; - LS2 is some power of two <= LS.
  208. ;; - U is an unsorted list.
  209. ;; - LW is a positive integer.
  210. ;; Starting with S, and taking data from U as needed, produce
  211. ;; a sorted list of *at least* length LW, if there's enough data
  212. ;; (LW <= LS + length(U)), or use all of U if not.
  213. ;;
  214. ;; GROW takes maximal contiguous runs of data from U at a time;
  215. ;; it is allowed to return a list *longer* than LW if it gets lucky
  216. ;; with a long run.
  217. ;;
  218. ;; The key idea: If you want a merge operation to "pay for itself," the two
  219. ;; lists being merged should be about the same length. Remember that.
  220. ;;
  221. ;; Returns:
  222. ;; - A: The result list (not properly terminated)
  223. ;; - ENDA: The last cons cell of the result list.
  224. ;; - LA: The length of the result list
  225. ;; - UNUSED: The unused tail of U.
  226. (define (grow s ends ls ls2 u lw)
  227. (if (and (pair? u) (< ls lw))
  228. ;; We haven't met the LW quota but there's still some U data to use.
  229. (mlet (((ls2) (let lp ((ls2 ls2))
  230. (let ((ls2*2 (+ ls2 ls2)))
  231. (if (<= ls2*2 ls) (lp ls2*2) ls2))))
  232. ;; LS2 is now the largest power of two <= LS.
  233. ;; (Just think of it as being roughly LS.)
  234. ((lr endr u2) (getrun u)) ; Get a run from U;
  235. ((t endt lt u3) (grow u endr lr 1 u2 ls2)) ; grow it up to be T.
  236. ((st end-st) (merge! s ends t endt))) ; Merge S & T,
  237. (grow st end-st (+ ls lt) (+ ls2 ls2) u3 lw)) ; then loop.
  238. (values s ends ls u))) ; Done -- met LW quota or ran out of data.
  239. ;; Note: (LENGTH LIS) or any constant guaranteed
  240. ;; to be greater can be used in place of INFINITY.
  241. (if (pair? lis)
  242. (mlet (((lr endr rest) (getrun lis)) ; Pick off an initial run.
  243. ((infinity) #o100000000) ; Then grow it up maximally.
  244. ((a enda la v) (grow lis endr lr 1 rest infinity)))
  245. (set-cdr! enda '()) ; Nil-terminate answer.
  246. a) ; We're done.
  247. '())) ; Don't sort an empty list.
  248. ;;; Merge
  249. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  250. ;;; These two merge procedures are stable -- ties favor list A.
  251. (define (list-merge < a b)
  252. (cond ((not (pair? a)) b)
  253. ((not (pair? b)) a)
  254. (else (let recur ((x (car a)) (a a) ; A is a pair; X = (CAR A).
  255. (y (car b)) (b b)) ; B is a pair; Y = (CAR B).
  256. (if (< y x)
  257. (let ((b (cdr b)))
  258. (if (pair? b)
  259. (cons y (recur x a (car b) b))
  260. (cons y a)))
  261. (let ((a (cdr a)))
  262. (if (pair? a)
  263. (cons x (recur (car a) a y b))
  264. (cons x b))))))))
  265. ;;; This destructive merge does as few SET-CDR!s as it can -- for example, if
  266. ;;; the list is already sorted, it does no SET-CDR!s at all. It is also
  267. ;;; iterative, running in constant stack.
  268. (define (list-merge! < a b)
  269. ;; The logic of these two loops is completely driven by these invariants:
  270. ;; SCAN-A: (CDR PREV) = A. X = (CAR A). Y = (CAR B).
  271. ;; SCAN-B: (CDR PREV) = B. X = (CAR A). Y = (CAR B).
  272. (letrec ((scan-a (lambda (prev a x b y) ; Zip down A doing
  273. (if (< y x) ; no SET-CDR!s until
  274. (let ((next-b (cdr b))) ; we hit a B elt that
  275. (set-cdr! prev b) ; has to be inserted.
  276. (if (pair? next-b)
  277. (scan-b b a x next-b (car next-b))
  278. (set-cdr! b a)))
  279. (let ((next-a (cdr a)))
  280. (if (pair? next-a)
  281. (scan-a a next-a (car next-a) b y)
  282. (set-cdr! a b))))))
  283. (scan-b (lambda (prev a x b y) ; Zip down B doing
  284. (if (< y x) ; no SET-CDR!s until
  285. (let ((next-b (cdr b))) ; we hit an A elt that
  286. (if (pair? next-b) ; has to be
  287. (scan-b b a x next-b (car next-b)) ; inserted.
  288. (set-cdr! b a)))
  289. (let ((next-a (cdr a)))
  290. (set-cdr! prev a)
  291. (if (pair? next-a)
  292. (scan-a a next-a (car next-a) b y)
  293. (set-cdr! a b)))))))
  294. (cond ((not (pair? a)) b)
  295. ((not (pair? b)) a)
  296. ;; B starts the answer list.
  297. ((< (car b) (car a))
  298. (let ((next-b (cdr b)))
  299. (if (null? next-b)
  300. (set-cdr! b a)
  301. (scan-b b a (car a) next-b (car next-b))))
  302. b)
  303. ;; A starts the answer list.
  304. (else (let ((next-a (cdr a)))
  305. (if (null? next-a)
  306. (set-cdr! a b)
  307. (scan-a a next-a (car next-a) b (car b))))
  308. a))))
  309. ;;; Copyright
  310. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  311. ;;; This code is
  312. ;;; Copyright (c) 1998 by Olin Shivers.
  313. ;;; The terms are: You may do as you please with this code, as long as
  314. ;;; you do not delete this notice or hold me responsible for any outcome
  315. ;;; related to its use.
  316. ;;;
  317. ;;; Blah blah blah.
  318. ;;; Code tuning & porting
  319. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  320. ;;; This is very portable code. It's R4RS with the following exceptions:
  321. ;;; - The R5RS multiple-value VALUES & CALL-WITH-VALUES procedures for
  322. ;;; handling multiple-value return.
  323. ;;;
  324. ;;; This code is *tightly* bummed as far as I can go in portable Scheme.
  325. ;;;
  326. ;;; - The fixnum arithmetic in LIST-MERGE-SORT! and COUNTED-LIST-MERGE!
  327. ;;; that could be safely switched over to unsafe, fixnum-specific ops,
  328. ;;; if you're sure that 2*maxlen is a fixnum, where maxlen is the length
  329. ;;; of the longest list you could ever have.
  330. ;;;
  331. ;;; - I typically write my code in a style such that every CAR and CDR
  332. ;;; application is protected by an upstream PAIR?. This is the case in this
  333. ;;; code, so all the CAR's and CDR's could safely switched over to unsafe
  334. ;;; versions. But check over the code before you do it, in case the source
  335. ;;; has been altered since I wrote this.