cholesky.c 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253
  1. /*Daala video codec
  2. Copyright (c) 2008-2012 Daala project contributors. All rights reserved.
  3. Redistribution and use in source and binary forms, with or without
  4. modification, are permitted provided that the following conditions are met:
  5. - Redistributions of source code must retain the above copyright notice, this
  6. list of conditions and the following disclaimer.
  7. - Redistributions in binary form must reproduce the above copyright notice,
  8. this list of conditions and the following disclaimer in the documentation
  9. and/or other materials provided with the distribution.
  10. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS”
  11. AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  12. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  13. DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
  14. FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  15. DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  16. SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  17. CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  18. OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  19. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
  20. #ifdef HAVE_CONFIG_H
  21. #include "config.h"
  22. #endif
  23. #include <stdio.h>
  24. #include <stdlib.h>
  25. #include <float.h>
  26. #include <math.h>
  27. #include <string.h>
  28. #include "cholesky.h"
  29. /*Implements several Cholesky factorization routines, including an unpivoted
  30. routine (for well-conditioned positive definite matrices), a basic diagonal
  31. pivoting routine, and a strong rank-revealing routine \cite{GM04}, a
  32. complete orthogonal decomposition for the rank deficient case, and a minimum
  33. norm least squares solver.
  34. @ARTICLE{GM04,
  35. author="Ming Gu and Luiza Miranian",
  36. title="Strong Rank Revealing {Cholesky} Factorization",
  37. journal="Electronic Transactions on Numerical Analysis",
  38. volume=17,
  39. pages="76--92",
  40. month=Feb,
  41. year=2004
  42. }*/
  43. /*Expand the factorization to encompass the next row of the input matrix.*/
  44. static void ch_update(double *_rr,double _alpha,int _k,int _n){
  45. int i;
  46. int j;
  47. _rr[UT_IDX(_k,_k,_n)]=_alpha;
  48. for(i=_k+1;i<_n;i++)_rr[UT_IDX(_k,i,_n)]/=_alpha;
  49. for(i=_k+1;i<_n;i++){
  50. double t;
  51. t=_rr[UT_IDX(_k,i,_n)];
  52. for(j=i;j<_n;j++)_rr[UT_IDX(i,j,_n)]-=t*_rr[UT_IDX(_k,j,_n)];
  53. }
  54. }
  55. static int cholesky_unpivoted(double *_rr,double _tol,int _n){
  56. double akk;
  57. int k;
  58. /*We derive the tolerance from \cite{High90}.
  59. Higham reported that akk*50*DBL_EPSILON was always sufficient in his
  60. numerical experiments on matrices up to 50x50.
  61. We use the empirical bound of 10 on ||W||_2 observed when pivoting, even
  62. though we do no pivoting here, so this is optimistic.
  63. @INPROCEEDINGS{High90,
  64. author="Nicholas J. Higham",
  65. title="Chapter 9: Analysis of the {Cholesky} Decomposition of a
  66. Semi-Definite Matrix",
  67. editor="Maurice G. Cox and Sven J. Hammarling",
  68. booktitle="Reliable Numerical Computation",
  69. publisher="Oxford University Press",
  70. pages="161--185",
  71. year=1990
  72. }*/
  73. akk=0;
  74. for(k=0;k<_n;k++)if(_rr[UT_IDX(k,k,_n)]>akk)akk=_rr[UT_IDX(k,k,_n)];
  75. _tol*=40*_n*(_n+1)*akk;
  76. for(k=0;k<_n&&_rr[UT_IDX(k,k,_n)]>_tol;k++){
  77. ch_update(_rr,sqrt(_rr[UT_IDX(k,k,_n)]),k,_n);
  78. }
  79. return k;
  80. }
  81. /*Pivot: swap row and column _i of C^(_k) with row and column _k.
  82. Note _rr[UT_IDX(_k,_k,_n)] is not set: it is assumed this will be set by the
  83. caller, and the appropriate value must have already been saved.*/
  84. static void ch_pivot(double *_rr,double *_wwt,int *_pivot,
  85. int _k,int _i,int _n){
  86. double t;
  87. int j;
  88. for(j=0;j<_k;j++)CP_SWAP(_rr[UT_IDX(j,_k,_n)],_rr[UT_IDX(j,_i,_n)],t);
  89. for(j=_k+1;j<_i;j++)CP_SWAP(_rr[UT_IDX(_k,j,_n)],_rr[UT_IDX(j,_i,_n)],t);
  90. for(j=_i+1;j<_n;j++)CP_SWAP(_rr[UT_IDX(_k,j,_n)],_rr[UT_IDX(_i,j,_n)],t);
  91. _rr[UT_IDX(_i,_i,_n)]=_rr[UT_IDX(_k,_k,_n)];
  92. if(_wwt!=NULL){
  93. for(j=0;j<_k;j++)CP_SWAP(_wwt[SLT_IDX(_i,j)],_wwt[SLT_IDX(_k,j)],t);
  94. }
  95. CP_SWAP(_pivot[_k],_pivot[_i],j);
  96. }
  97. int cholesky(double *_rr,int *_pivot,double _tol,int _n){
  98. double akk;
  99. int pi;
  100. int j;
  101. int k;
  102. if(_pivot==NULL)return cholesky_unpivoted(_rr,_tol,_n);
  103. /*Find the first pivot element.*/
  104. akk=0;
  105. pi=-1;
  106. for(j=0;j<_n;j++)if(_rr[UT_IDX(j,j,_n)]>akk){
  107. pi=j;
  108. akk=_rr[UT_IDX(j,j,_n)];
  109. }
  110. _tol*=40*_n*(_n+1)*akk;
  111. /*Initialize the pivot list.*/
  112. for(k=0;k<_n;k++)_pivot[k]=k;
  113. for(k=0;pi>=0;k++){
  114. if(pi!=k)ch_pivot(_rr,NULL,_pivot,k,pi,_n);
  115. ch_update(_rr,sqrt(akk),k,_n);
  116. /*Find the next pivot element.*/
  117. akk=_tol;
  118. pi=-1;
  119. for(j=k+1;j<_n;j++)if(_rr[UT_IDX(j,j,_n)]>akk){
  120. akk=_rr[UT_IDX(j,j,_n)];
  121. pi=j;
  122. }
  123. }
  124. return k;
  125. }
  126. /*Forward substitution.*/
  127. static void ch_fwd_sub(const double *_rr,double *_x,int _k,int _n){
  128. int i;
  129. for(i=0;i<_k;i++){
  130. int j;
  131. _x[i]/=_rr[UT_IDX(i,i,_n)];
  132. for(j=i+1;j<_k;j++)_x[j]-=_rr[UT_IDX(i,j,_n)]*_x[i];
  133. }
  134. }
  135. /*Back substitution.*/
  136. static void ch_back_sub(const double *_rr,double *_x,int _k,int _n){
  137. int i;
  138. for(i=_k;i-->0;){
  139. int j;
  140. for(j=i+1;j<_k;j++)_x[i]-=_rr[UT_IDX(i,j,_n)]*_x[j];
  141. _x[i]/=_rr[UT_IDX(i,i,_n)];
  142. }
  143. }
  144. void chdecomp(double *_rr,double *_tau,int _r,int _n){
  145. int k;
  146. int i;
  147. int j;
  148. /*See Section 4 of \cite{HL69} for a derivation for a general matrix.
  149. We ignore the orthogonal matrix Q on the left, since we're already upper
  150. trapezoidal (and it would cancel with its transpose in the product R^T.R).
  151. @ARTICLE{HL69,
  152. author="Richard J. Hanson and Charles L. Lawson",
  153. title="Extensions and Applications of the Householder Algorithm for
  154. Solving Linear Least Squares",
  155. journal="Mathematics of Computation",
  156. volume=23,
  157. number=108,
  158. pages="787--812",
  159. month=Oct,
  160. year=1969
  161. }*/
  162. for(k=_r;k-->0;){
  163. double alpha;
  164. double beta;
  165. double s;
  166. double d2;
  167. /*Apply the Householder reflections from the previous rows.*/
  168. for(i=_r;--i>k;){
  169. s=_rr[UT_IDX(k,i,_n)];
  170. for(j=_r;j<_n;j++)s+=_rr[UT_IDX(k,j,_n)]*_rr[UT_IDX(i,j,_n)];
  171. s*=_tau[i];
  172. /*Note the negative here: we add an extra scale by -1 to the i'th column
  173. so that the diagonal entry remains positive.*/
  174. _rr[UT_IDX(k,i,_n)]=s-_rr[UT_IDX(k,i,_n)];
  175. for(j=_r;j<_n;j++)_rr[UT_IDX(k,j,_n)]-=s*_rr[UT_IDX(i,j,_n)];
  176. }
  177. /*Compute the reflection which zeros the right part of this row.*/
  178. alpha=_rr[UT_IDX(k,k,_n)];
  179. beta=alpha;
  180. for(j=_r;j<_n;j++){
  181. if(fabs(_rr[UT_IDX(k,j,_n)])>beta)beta=fabs(_rr[UT_IDX(k,j,_n)]);
  182. }
  183. s=1/beta;
  184. d2=(alpha*s)*(alpha*s);
  185. for(j=_r;j<_n;j++)d2+=(_rr[UT_IDX(k,j,_n)]*s)*(_rr[UT_IDX(k,j,_n)]*s);
  186. beta*=sqrt(d2);
  187. _tau[k]=alpha/beta+1;
  188. s=1/(alpha+beta);
  189. _rr[UT_IDX(k,k,_n)]=beta;
  190. for(j=_r;j<_n;j++)_rr[UT_IDX(k,j,_n)]*=s;
  191. }
  192. }
  193. int chsolve_worksz(const int *_pivot,int _n){
  194. return _pivot!=NULL?_n:0;
  195. }
  196. void chsolve(const double *_rr,const int *_pivot,const double *_tau,double *_x,
  197. const double *_b,double *_work,int _r,int _n){
  198. double *y;
  199. double s;
  200. int i;
  201. int j;
  202. int k;
  203. if(_pivot!=NULL){
  204. y=_work!=NULL?_work:(double *)malloc(chsolve_worksz(_pivot,_n)*sizeof(*y));
  205. for(i=0;i<_n;i++)y[i]=_b[_pivot[i]];
  206. }
  207. else{
  208. memmove(_x,_b,_n*sizeof(*_x));
  209. y=_x;
  210. }
  211. if(_r<_n){
  212. for(k=_r;k-->0;){
  213. s=y[k];
  214. for(j=_r;j<_n;j++)s+=y[j]*_rr[UT_IDX(k,j,_n)];
  215. s*=_tau[k];
  216. y[k]=s-y[k];
  217. for(j=_r;j<_n;j++)y[j]-=s*_rr[UT_IDX(k,j,_n)];
  218. }
  219. }
  220. ch_fwd_sub(_rr,y,_r,_n);
  221. ch_back_sub(_rr,y,_r,_n);
  222. if(_r<_n){
  223. memset(y+_r,0,(_n-_r)*sizeof(*y));
  224. for(k=0;k<_r;k++){
  225. s=-y[k];
  226. for(j=_r;j<_n;j++)s+=y[j]*_rr[UT_IDX(k,j,_n)];
  227. s*=_tau[k];
  228. y[k]=-(s+y[k]);
  229. for(j=_r;j<_n;j++)y[j]-=s*_rr[UT_IDX(k,j,_n)];
  230. }
  231. }
  232. if(_pivot!=NULL){
  233. for(i=0;i<_n;i++)_x[_pivot[i]]=y[i];
  234. if(_work==NULL)free(y);
  235. }
  236. }