123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475 |
- // Copyright 2009 The Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- // Package sort provides primitives for sorting slices and user-defined
- // collections.
- package sort
- // A type, typically a collection, that satisfies sort.Interface can be
- // sorted by the routines in this package. The methods require that the
- // elements of the collection be enumerated by an integer index.
- type Interface interface {
- // Len is the number of elements in the collection.
- Len() int
- // Less reports whether the element with
- // index i should sort before the element with index j.
- Less(i, j int) bool
- // Swap swaps the elements with indexes i and j.
- Swap(i, j int)
- }
- func min(a, b int) int {
- if a < b {
- return a
- }
- return b
- }
- // Insertion sort
- func insertionSort(data Interface, a, b int) {
- for i := a + 1; i < b; i++ {
- for j := i; j > a && data.Less(j, j-1); j-- {
- data.Swap(j, j-1)
- }
- }
- }
- // siftDown implements the heap property on data[lo, hi).
- // first is an offset into the array where the root of the heap lies.
- func siftDown(data Interface, lo, hi, first int) {
- root := lo
- for {
- child := 2*root + 1
- if child >= hi {
- break
- }
- if child+1 < hi && data.Less(first+child, first+child+1) {
- child++
- }
- if !data.Less(first+root, first+child) {
- return
- }
- data.Swap(first+root, first+child)
- root = child
- }
- }
- func heapSort(data Interface, a, b int) {
- first := a
- lo := 0
- hi := b - a
- // Build heap with greatest element at top.
- for i := (hi - 1) / 2; i >= 0; i-- {
- siftDown(data, i, hi, first)
- }
- // Pop elements, largest first, into end of data.
- for i := hi - 1; i >= 0; i-- {
- data.Swap(first, first+i)
- siftDown(data, lo, i, first)
- }
- }
- // Quicksort, following Bentley and McIlroy,
- // ``Engineering a Sort Function,'' SP&E November 1993.
- // medianOfThree moves the median of the three values data[a], data[b], data[c] into data[a].
- func medianOfThree(data Interface, a, b, c int) {
- m0 := b
- m1 := a
- m2 := c
- // bubble sort on 3 elements
- if data.Less(m1, m0) {
- data.Swap(m1, m0)
- }
- if data.Less(m2, m1) {
- data.Swap(m2, m1)
- }
- if data.Less(m1, m0) {
- data.Swap(m1, m0)
- }
- // now data[m0] <= data[m1] <= data[m2]
- }
- func swapRange(data Interface, a, b, n int) {
- for i := 0; i < n; i++ {
- data.Swap(a+i, b+i)
- }
- }
- func doPivot(data Interface, lo, hi int) (midlo, midhi int) {
- m := lo + (hi-lo)/2 // Written like this to avoid integer overflow.
- if hi-lo > 40 {
- // Tukey's ``Ninther,'' median of three medians of three.
- s := (hi - lo) / 8
- medianOfThree(data, lo, lo+s, lo+2*s)
- medianOfThree(data, m, m-s, m+s)
- medianOfThree(data, hi-1, hi-1-s, hi-1-2*s)
- }
- medianOfThree(data, lo, m, hi-1)
- // Invariants are:
- // data[lo] = pivot (set up by ChoosePivot)
- // data[lo <= i < a] = pivot
- // data[a <= i < b] < pivot
- // data[b <= i < c] is unexamined
- // data[c <= i < d] > pivot
- // data[d <= i < hi] = pivot
- //
- // Once b meets c, can swap the "= pivot" sections
- // into the middle of the slice.
- pivot := lo
- a, b, c, d := lo+1, lo+1, hi, hi
- for {
- for b < c {
- if data.Less(b, pivot) { // data[b] < pivot
- b++
- } else if !data.Less(pivot, b) { // data[b] = pivot
- data.Swap(a, b)
- a++
- b++
- } else {
- break
- }
- }
- for b < c {
- if data.Less(pivot, c-1) { // data[c-1] > pivot
- c--
- } else if !data.Less(c-1, pivot) { // data[c-1] = pivot
- data.Swap(c-1, d-1)
- c--
- d--
- } else {
- break
- }
- }
- if b >= c {
- break
- }
- // data[b] > pivot; data[c-1] < pivot
- data.Swap(b, c-1)
- b++
- c--
- }
- n := min(b-a, a-lo)
- swapRange(data, lo, b-n, n)
- n = min(hi-d, d-c)
- swapRange(data, c, hi-n, n)
- return lo + b - a, hi - (d - c)
- }
- func quickSort(data Interface, a, b, maxDepth int) {
- for b-a > 7 {
- if maxDepth == 0 {
- heapSort(data, a, b)
- return
- }
- maxDepth--
- mlo, mhi := doPivot(data, a, b)
- // Avoiding recursion on the larger subproblem guarantees
- // a stack depth of at most lg(b-a).
- if mlo-a < b-mhi {
- quickSort(data, a, mlo, maxDepth)
- a = mhi // i.e., quickSort(data, mhi, b)
- } else {
- quickSort(data, mhi, b, maxDepth)
- b = mlo // i.e., quickSort(data, a, mlo)
- }
- }
- if b-a > 1 {
- insertionSort(data, a, b)
- }
- }
- // Sort sorts data.
- // It makes one call to data.Len to determine n, and O(n*log(n)) calls to
- // data.Less and data.Swap. The sort is not guaranteed to be stable.
- func Sort(data Interface) {
- // Switch to heapsort if depth of 2*ceil(lg(n+1)) is reached.
- n := data.Len()
- maxDepth := 0
- for i := n; i > 0; i >>= 1 {
- maxDepth++
- }
- maxDepth *= 2
- quickSort(data, 0, n, maxDepth)
- }
- type reverse struct {
- // This embedded Interface permits Reverse to use the methods of
- // another Interface implementation.
- Interface
- }
- // Less returns the opposite of the embedded implementation's Less method.
- func (r reverse) Less(i, j int) bool {
- return r.Interface.Less(j, i)
- }
- // Reverse returns the reverse order for data.
- func Reverse(data Interface) Interface {
- return &reverse{data}
- }
- // IsSorted reports whether data is sorted.
- func IsSorted(data Interface) bool {
- n := data.Len()
- for i := n - 1; i > 0; i-- {
- if data.Less(i, i-1) {
- return false
- }
- }
- return true
- }
- // Convenience types for common cases
- // IntSlice attaches the methods of Interface to []int, sorting in increasing order.
- type IntSlice []int
- func (p IntSlice) Len() int { return len(p) }
- func (p IntSlice) Less(i, j int) bool { return p[i] < p[j] }
- func (p IntSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
- // Sort is a convenience method.
- func (p IntSlice) Sort() { Sort(p) }
- // Float64Slice attaches the methods of Interface to []float64, sorting in increasing order.
- type Float64Slice []float64
- func (p Float64Slice) Len() int { return len(p) }
- func (p Float64Slice) Less(i, j int) bool { return p[i] < p[j] || isNaN(p[i]) && !isNaN(p[j]) }
- func (p Float64Slice) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
- // isNaN is a copy of math.IsNaN to avoid a dependency on the math package.
- func isNaN(f float64) bool {
- return f != f
- }
- // Sort is a convenience method.
- func (p Float64Slice) Sort() { Sort(p) }
- // StringSlice attaches the methods of Interface to []string, sorting in increasing order.
- type StringSlice []string
- func (p StringSlice) Len() int { return len(p) }
- func (p StringSlice) Less(i, j int) bool { return p[i] < p[j] }
- func (p StringSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
- // Sort is a convenience method.
- func (p StringSlice) Sort() { Sort(p) }
- // Convenience wrappers for common cases
- // Ints sorts a slice of ints in increasing order.
- func Ints(a []int) { Sort(IntSlice(a)) }
- // Float64s sorts a slice of float64s in increasing order.
- func Float64s(a []float64) { Sort(Float64Slice(a)) }
- // Strings sorts a slice of strings in increasing order.
- func Strings(a []string) { Sort(StringSlice(a)) }
- // IntsAreSorted tests whether a slice of ints is sorted in increasing order.
- func IntsAreSorted(a []int) bool { return IsSorted(IntSlice(a)) }
- // Float64sAreSorted tests whether a slice of float64s is sorted in increasing order.
- func Float64sAreSorted(a []float64) bool { return IsSorted(Float64Slice(a)) }
- // StringsAreSorted tests whether a slice of strings is sorted in increasing order.
- func StringsAreSorted(a []string) bool { return IsSorted(StringSlice(a)) }
- // Notes on stable sorting:
- // The used algorithms are simple and provable correct on all input and use
- // only logarithmic additional stack space. They perform well if compared
- // experimentally to other stable in-place sorting algorithms.
- //
- // Remarks on other algorithms evaluated:
- // - GCC's 4.6.3 stable_sort with merge_without_buffer from libstdc++:
- // Not faster.
- // - GCC's __rotate for block rotations: Not faster.
- // - "Practical in-place mergesort" from Jyrki Katajainen, Tomi A. Pasanen
- // and Jukka Teuhola; Nordic Journal of Computing 3,1 (1996), 27-40:
- // The given algorithms are in-place, number of Swap and Assignments
- // grow as n log n but the algorithm is not stable.
- // - "Fast Stable In-Plcae Sorting with O(n) Data Moves" J.I. Munro and
- // V. Raman in Algorithmica (1996) 16, 115-160:
- // This algorithm either needs additional 2n bits or works only if there
- // are enough different elements available to encode some permutations
- // which have to be undone later (so not stable an any input).
- // - All the optimal in-place sorting/merging algorithms I found are either
- // unstable or rely on enough different elements in each step to encode the
- // performed block rearrangements. See also "In-Place Merging Algorithms",
- // Denham Coates-Evely, Department of Computer Science, Kings College,
- // January 2004 and the reverences in there.
- // - Often "optimal" algorithms are optimal in the number of assignments
- // but Interface has only Swap as operation.
- // Stable sorts data while keeping the original order of equal elements.
- //
- // It makes one call to data.Len to determine n, O(n*log(n)) calls to
- // data.Less and O(n*log(n)*log(n)) calls to data.Swap.
- func Stable(data Interface) {
- n := data.Len()
- blockSize := 20
- a, b := 0, blockSize
- for b <= n {
- insertionSort(data, a, b)
- a = b
- b += blockSize
- }
- insertionSort(data, a, n)
- for blockSize < n {
- a, b = 0, 2*blockSize
- for b <= n {
- symMerge(data, a, a+blockSize, b)
- a = b
- b += 2 * blockSize
- }
- symMerge(data, a, a+blockSize, n)
- blockSize *= 2
- }
- }
- // SymMerge merges the two sorted subsequences data[a:m] and data[m:b] using
- // the SymMerge algorithm from Pok-Son Kim and Arne Kutzner, "Stable Minimum
- // Storage Merging by Symmetric Comparisons", in Susanne Albers and Tomasz
- // Radzik, editors, Algorithms - ESA 2004, volume 3221 of Lecture Notes in
- // Computer Science, pages 714-723. Springer, 2004.
- //
- // Let M = m-a and N = b-n. Wolog M < N.
- // The recursion depth is bound by ceil(log(N+M)).
- // The algorithm needs O(M*log(N/M + 1)) calls to data.Less.
- // The algorithm needs O((M+N)*log(M)) calls to data.Swap.
- //
- // The paper gives O((M+N)*log(M)) as the number of assignments assuming a
- // rotation algorithm which uses O(M+N+gcd(M+N)) assignments. The argumentation
- // in the paper carries through for Swap operations, especially as the block
- // swapping rotate uses only O(M+N) Swaps.
- func symMerge(data Interface, a, m, b int) {
- if a >= m || m >= b {
- return
- }
- mid := a + (b-a)/2
- n := mid + m
- start := 0
- if m > mid {
- start = n - b
- r, p := mid, n-1
- for start < r {
- c := start + (r-start)/2
- if !data.Less(p-c, c) {
- start = c + 1
- } else {
- r = c
- }
- }
- } else {
- start = a
- r, p := m, n-1
- for start < r {
- c := start + (r-start)/2
- if !data.Less(p-c, c) {
- start = c + 1
- } else {
- r = c
- }
- }
- }
- end := n - start
- rotate(data, start, m, end)
- symMerge(data, a, start, mid)
- symMerge(data, mid, end, b)
- }
- // Rotate two consecutives blocks u = data[a:m] and v = data[m:b] in data:
- // Data of the form 'x u v y' is changed to 'x v u y'.
- // Rotate performs at most b-a many calls to data.Swap.
- func rotate(data Interface, a, m, b int) {
- i := m - a
- if i == 0 {
- return
- }
- j := b - m
- if j == 0 {
- return
- }
- if i == j {
- swapRange(data, a, m, i)
- return
- }
- p := a + i
- for i != j {
- if i > j {
- swapRange(data, p-i, p, j)
- i -= j
- } else {
- swapRange(data, p-i, p+j-i, i)
- j -= i
- }
- }
- swapRange(data, p-i, p, i)
- }
- /*
- Complexity of Stable Sorting
- Complexity of block swapping rotation
- Each Swap puts one new element into its correct, final position.
- Elements which reach their final position are no longer moved.
- Thus block swapping rotation needs |u|+|v| calls to Swaps.
- This is best possible as each element might need a move.
- Pay attention when comparing to other optimal algorithms which
- typically count the number of assignments instead of swaps:
- E.g. the optimal algorithm of Dudzinski and Dydek for in-place
- rotations uses O(u + v + gcd(u,v)) assignments which is
- better than our O(3 * (u+v)) as gcd(u,v) <= u.
- Stable sorting by SymMerge and BlockSwap rotations
- SymMerg complexity for same size input M = N:
- Calls to Less: O(M*log(N/M+1)) = O(N*log(2)) = O(N)
- Calls to Swap: O((M+N)*log(M)) = O(2*N*log(N)) = O(N*log(N))
- (The following argument does not fuzz over a missing -1 or
- other stuff which does not impact the final result).
- Let n = data.Len(). Assume n = 2^k.
- Plain merge sort performs log(n) = k iterations.
- On iteration i the algorithm merges 2^(k-i) blocks, each of size 2^i.
- Thus iteration i of merge sort performs:
- Calls to Less O(2^(k-i) * 2^i) = O(2^k) = O(2^log(n)) = O(n)
- Calls to Swap O(2^(k-i) * 2^i * log(2^i)) = O(2^k * i) = O(n*i)
- In total k = log(n) iterations are performed; so in total:
- Calls to Less O(log(n) * n)
- Calls to Swap O(n + 2*n + 3*n + ... + (k-1)*n + k*n)
- = O((k/2) * k * n) = O(n * k^2) = O(n * log^2(n))
- Above results should generalize to arbitrary n = 2^k + p
- and should not be influenced by the initial insertion sort phase:
- Insertion sort is O(n^2) on Swap and Less, thus O(bs^2) per block of
- size bs at n/bs blocks: O(bs*n) Swaps and Less during insertion sort.
- Merge sort iterations start at i = log(bs). With t = log(bs) constant:
- Calls to Less O((log(n)-t) * n + bs*n) = O(log(n)*n + (bs-t)*n)
- = O(n * log(n))
- Calls to Swap O(n * log^2(n) - (t^2+t)/2*n) = O(n * log^2(n))
- */
|