trans_gain.c 35 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354
  1. /* Daala video codec
  2. Copyright (c) 2013 Daala project contributors. All rights reserved.
  3. Redistribution and use in source and binary forms, with or without
  4. modification, are permitted provided that the following conditions are met:
  5. - Redistributions of source code must retain the above copyright notice, this
  6. list of conditions and the following disclaimer.
  7. - Redistributions in binary form must reproduce the above copyright notice,
  8. this list of conditions and the following disclaimer in the documentation
  9. and/or other materials provided with the distribution.
  10. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS”
  11. AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  12. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  13. DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
  14. FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  15. DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  16. SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  17. CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  18. OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  19. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
  20. /* 1D coding gain (dB) **********************
  21. AR p=.95 4x4 8x8 16x16
  22. ------------------------------------------
  23. KLT 7.5825 8.8462 9.4781
  24. DCT 7.5701 8.8259 9.4555
  25. CDF(9/7) 8.4687 9.4592 9.7866
  26. LappedKLT 8.5633 9.4908 9.8951
  27. LappedDCT 8.5523 9.4871 9.8929
  28. Subset 1 4x4 8x8 16x16
  29. ------------------------------------------
  30. KLT original 8.7714 10.2588 11.0039
  31. collapsed 8.7714 10.2588 11.0039
  32. monty 8.7654 10.2628 11.0292
  33. DCT 8.7620 10.2427 10.9861
  34. 8.7620 10.2427 10.9861
  35. 8.7561 10.2467 11.0115
  36. CDF(9/7) 9.3794 10.5932 11.0685
  37. 9.3845 10.5957 11.0825
  38. 9.4155 10.6576 11.1965
  39. LappedKLT 9.6276 10.7860 11.3254
  40. 9.6277 10.7867 11.3296
  41. 9.6295 10.8056 11.3722
  42. LappedDCT 9.6213 10.7832 11.3230
  43. 9.6214 10.7839 11.3272
  44. 9.6232 10.8028 11.3698
  45. Subset 3 4x4 8x8 16x16
  46. ------------------------------------------
  47. KLT original 10.5669 12.3711 13.2694
  48. collapsed 10.5669 12.3711 13.2694
  49. monty 10.5495 12.3573 13.2729
  50. DCT 10.5546 12.3532 13.2535
  51. 10.5547 12.3532 13.2535
  52. 10.5373 12.3395 13.2572
  53. CDF(9/7) 11.3102 12.6838 13.1845
  54. 11.3106 12.6871 13.2009
  55. 11.3389 12.7764 13.4084
  56. LappedKLT 11.6048 13.0138 13.6488
  57. 11.6046 13.0136 13.6491
  58. 11.5922 13.0126 13.6790
  59. LappedDCT 11.5970 13.0111 13.6464
  60. 11.5968 13.0110 13.6467
  61. 11.5844 13.0099 13.6766
  62. */
  63. /* 2D coding gain (dB) **********************
  64. AR p=.95 4x4 8x8 16x16
  65. ------------------------------------------
  66. KLT 15.1649 17.6924 18.9562
  67. DCT 15.1403 17.6518 18.9109
  68. CDF(9/7) 16.9374 18.9183 19.5731
  69. LappedKLT 17.1265 18.9816 19.7902
  70. LappedDCT 17.1047 18.9741 19.7858
  71. Subset 1 4x4 8x8 16x16
  72. ------------------------------------------
  73. KLT original 12.4432 ------- -------
  74. collapsed 12.4428 ------- -------
  75. monty 12.4732 13.6167 14.1170
  76. DCT 12.3695 ------- -------
  77. 12.3698 ------- -------
  78. 12.4182 13.5473 14.0536
  79. CDF(9/7) ------- ------- -------
  80. ------- ------- -------
  81. 13.1425 13.8184 14.0110
  82. LappedKLT 13.2807 ------- -------
  83. 13.2808 ------- -------
  84. 13.3452 14.1273 14.4041
  85. LappedDCT 13.2682 ------- -------
  86. 13.2685 ------- -------
  87. 13.3330 14.1215 14.3981
  88. Subset 3 4x4 8x8 16x16
  89. ------------------------------------------
  90. KLT monty 14.9078 16.2416 16.7839
  91. DCT 14.8313 16.1578 16.7221
  92. CDF(9/7) 15.7553 16.4760 16.6656
  93. LappedKLT 15.9763 16.8549 17.1181
  94. LappedDCT 15.9627 16.8507 17.1152
  95. */
  96. #ifdef HAVE_CONFIG_H
  97. #include "config.h"
  98. #endif
  99. #include <stdlib.h>
  100. #include "od_defs.h"
  101. #include "od_filter.h"
  102. #include "trans_tools.h"
  103. #define BLOCKSIZE_LOG (4)
  104. #define USE_LAPPING (1)
  105. #define USE_KLT (1)
  106. #define USE_DCT (0)
  107. #define USE_WAVELET (0)
  108. #define USE_2D (1)
  109. #define USE_FILES (1)
  110. #define USE_AR95 (0)
  111. #define COMPUTE_NATHAN (1)
  112. #define PRINT_COV (0)
  113. #define BLOCKSIZE (1<<BLOCKSIZE_LOG)
  114. #if USE_WAVELET
  115. #if BLOCKSIZE_LOG==1
  116. # define SUPPORT (20)
  117. #else
  118. # if BLOCKSIZE_LOG==2
  119. # define SUPPORT (40)
  120. # else
  121. # if BLOCKSIZE_LOG==3
  122. # define SUPPORT (80)
  123. # else
  124. # if BLOCKSIZE_LOG==4
  125. # define SUPPORT (160)
  126. # else
  127. # error "no support configuration for transform size"
  128. # endif
  129. # endif
  130. # endif
  131. #endif
  132. #else
  133. #if USE_LAPPING||COMPUTE_NATHAN
  134. /* larger than needed for 'new' covariance code, but it won't alter
  135. the answer, just produce a larger than needed covariance matrix.
  136. It is needed to make the boundary conditions of the 'old'
  137. covariance code match the trans and trans2d utils */
  138. #define SUPPORT (BLOCKSIZE*2)
  139. #else
  140. #define SUPPORT (BLOCKSIZE)
  141. #endif
  142. #endif
  143. const int *f;
  144. typedef void (*ne_fdct_func_1d)(double *_out,const double *_in,int _in_stride);
  145. typedef void (*ne_idct_func_1d)(double *_out,int _out_stride,const double *_in);
  146. extern const ne_idct_func_1d OD_IDCT_1D_DOUBLE[OD_NBSIZES];
  147. extern const ne_fdct_func_1d OD_FDCT_1D_DOUBLE[OD_NBSIZES];
  148. #if USE_FILES
  149. typedef struct {
  150. int sz;
  151. u_int64_t *n;
  152. u_int64_t *acc_i;
  153. u_int64_t *acc_j;
  154. u_int64_t *acc_ij;
  155. double *cov;
  156. } cov_state;
  157. static void cov_init(cov_state *_this, int _sz){
  158. _this->sz = _sz;
  159. _this->n = (u_int64_t *)calloc(_sz,sizeof(*_this->n));
  160. _this->acc_i = (u_int64_t *)calloc(_sz,sizeof(*_this->acc_i));
  161. _this->acc_j = (u_int64_t *)calloc(_sz,sizeof(*_this->acc_j));
  162. _this->acc_ij= (u_int64_t *)calloc(_sz,sizeof(*_this->acc_ij));
  163. _this->cov = (double *)calloc(_sz,sizeof(*_this->cov));
  164. }
  165. static void cov_clear(cov_state *_this){
  166. if(_this){
  167. if(_this->n) free(_this->n);
  168. if(_this->acc_i) free(_this->acc_i);
  169. if(_this->acc_j) free(_this->acc_j);
  170. if(_this->acc_ij) free(_this->acc_ij);
  171. if(_this->cov) free(_this->cov);
  172. }
  173. }
  174. #if USE_2D
  175. /* 1D and 2D could both use the same generalized code, but it would be
  176. harder to read */
  177. static void cov_accumulate_2d(cov_state *_this,
  178. const unsigned char *_data,
  179. int _stride, int _w, int _h){
  180. int x,y,i,j;
  181. int sz = sqrt(_this->sz);
  182. for(i=0;i<sz;i++){
  183. for(j=0;j<sz;j++){
  184. int ij = i*sz+j;
  185. for(y=0;y<_h-i;y++){
  186. const unsigned char *di=_data+y*_stride;
  187. const unsigned char *dj=_data+(y+i)*_stride+j;
  188. for(x=0;x<_w-j;x++){
  189. ++_this->n[ij];
  190. _this->acc_i[ij] += di[x];
  191. _this->acc_j[ij] += dj[x];
  192. _this->acc_ij[ij] += di[x]*dj[x];
  193. }
  194. }
  195. }
  196. }
  197. }
  198. #else
  199. static void cov_accumulate_1d(cov_state *_this,
  200. const unsigned char *_data,
  201. int _stride, int _n){
  202. int i,j;
  203. for(i=0;i<_this->sz;i++){
  204. const unsigned char *di=_data;
  205. const unsigned char *dj=_data+i*_stride;
  206. for(j=0;j<_n-i;j++){
  207. ++_this->n[i];
  208. _this->acc_i[i] += di[j*_stride];
  209. _this->acc_j[i] += dj[j*_stride];
  210. _this->acc_ij[i] += di[j*_stride]*dj[j*_stride];
  211. }
  212. }
  213. }
  214. #endif
  215. static void cov_combine(cov_state *_a,const cov_state *_b){
  216. int i;
  217. for(i=0;i<_a->sz;i++){
  218. _a->acc_i[i] += _b->acc_i[i];
  219. _a->acc_j[i] += _b->acc_j[i];
  220. _a->acc_ij[i] += _b->acc_ij[i];
  221. _a->n[i] += _b->n[i];
  222. }
  223. }
  224. static void cov_compute(cov_state *_this){
  225. int i;
  226. for(i=0;i<_this->sz;i++)
  227. _this->cov[i] =
  228. ((double)_this->acc_ij[i] -
  229. (double)_this->acc_i[i]*
  230. _this->acc_j[i]/_this->n[i])/_this->n[i];
  231. for(i=1;i<_this->sz;i++)
  232. _this->cov[i] /= _this->cov[0];
  233. _this->cov[0]=1.;
  234. }
  235. static void process_files(trans_ctx *_ctx,
  236. cov_state *_cov,
  237. int _argc,
  238. const char *_argv[]){
  239. int ai;
  240. #pragma omp parallel for schedule(dynamic)
  241. for(ai=1;ai<_argc;ai++){
  242. FILE *fin;
  243. video_input vid;
  244. video_input_info info;
  245. video_input_ycbcr ycbcr;
  246. int tid;
  247. cov_state *cov;
  248. int x0,y0,x1,y1;
  249. fin=fopen(_argv[ai],"rb");
  250. if(fin==NULL){
  251. fprintf(stderr,"Could not open '%s' for reading.\n",_argv[ai]);
  252. continue;
  253. }
  254. if(video_input_open(&vid,fin)<0){
  255. fprintf(stderr,"Error reading video info from '%s'.\n",_argv[ai]);
  256. continue;
  257. }
  258. video_input_get_info(&vid,&info);
  259. if(video_input_fetch_frame(&vid,ycbcr,NULL)<0){
  260. fprintf(stderr,"Error reading first frame from '%s'.\n",_argv[ai]);
  261. continue;
  262. }
  263. tid=OD_OMP_GET_THREAD;
  264. cov=_cov+tid;
  265. x0 = info.pic_x;
  266. y0 = info.pic_y;
  267. x1 = x0 + info.pic_w;
  268. y1 = y0 + info.pic_h;
  269. fprintf(stderr,"%s\n",_argv[ai]);
  270. /* map */
  271. {
  272. int stride=ycbcr[0].stride;
  273. const unsigned char *data=ycbcr[0].data;
  274. #if COMPUTE_NATHAN
  275. /* block-based full covariance computation (unlord style) */
  276. int nxblocks=info.pic_w>>BLOCKSIZE_LOG;
  277. int nyblocks=info.pic_h>>BLOCKSIZE_LOG;
  278. trans_ctx *ctx=_ctx+tid;
  279. # if USE_2D
  280. unsigned char buf[SUPPORT][SUPPORT];
  281. int x,y,i,j;
  282. image_ctx_init(&ctx->img,_argv[ai],nxblocks,nyblocks);
  283. for(y=0;y<nyblocks*BLOCKSIZE-SUPPORT+1;y++){
  284. for(x=0;x<nxblocks*BLOCKSIZE-SUPPORT+1;x++){
  285. for(j=0;j<SUPPORT;j++){
  286. for(i=0;i<SUPPORT;i++){
  287. buf[j][i]=data[(y0+y+j)*stride+(x0+x+i)];
  288. }
  289. }
  290. trans_data_add(&ctx->td,(unsigned char *)buf);
  291. }
  292. }
  293. # else
  294. unsigned char buf[SUPPORT];
  295. int x,y,z;
  296. image_ctx_init(&ctx->img,_argv[ai],nxblocks,nyblocks);
  297. /* add the rows */
  298. for(y=0;y<nyblocks*BLOCKSIZE;y++){
  299. for(x=0;x<nxblocks*BLOCKSIZE-SUPPORT+1;x++){
  300. for(z=0;z<SUPPORT;z++){
  301. buf[z]=data[(y+y0)*stride+x+x0+z];
  302. }
  303. trans_data_add(&ctx->td,buf);
  304. }
  305. }
  306. /* add the columns */
  307. for(y=0;y<nyblocks*BLOCKSIZE-SUPPORT+1;y++){
  308. for(x=0;x<nxblocks*BLOCKSIZE;x++){
  309. for(z=0;z<SUPPORT;z++){
  310. buf[z]=data[(y0+y+z)*stride+x+x0];
  311. }
  312. trans_data_add(&ctx->td,buf);
  313. }
  314. }
  315. # endif
  316. #endif
  317. /* Direct computation of collapsed covariance matrix (monty style) */
  318. #if USE_2D
  319. cov_accumulate_2d(cov,data+y0*stride+x0,stride,x1-x0,y1-y0);
  320. #else
  321. {
  322. int x,y;
  323. for(y=y0;y<y1;y++)
  324. cov_accumulate_1d(cov,data+y*stride+x0,1,x1-x0);
  325. for(x=x0;x<x1;x++)
  326. cov_accumulate_1d(cov,data+y0*stride+x,stride,y1-y0);
  327. }
  328. #endif
  329. }
  330. video_input_close(&vid);
  331. }
  332. }
  333. #endif
  334. #if USE_WAVELET
  335. /* some lifting CDF (9/7) wavelet code from Google Code's axonlib */
  336. /* http://code.google.com/p/axonlib/source/browse/trunk/extern/dwt97.c?spec=svn19&r=19 */
  337. /* single stage of decomposition */
  338. static void fwt97_i(double* x,int n){
  339. double temp[SUPPORT];
  340. double a;
  341. int i;
  342. /* Predict 1 */
  343. a=-1.586134342;
  344. for (i=1;i<n-2;i+=2)
  345. x[i]+=a*(x[i-1]+x[i+1]);
  346. x[n-1]+=2*a*x[n-2];
  347. /* Update 1 */
  348. a=-0.05298011854;
  349. for (i=2;i<n;i+=2)
  350. x[i]+=a*(x[i-1]+x[i+1]);
  351. x[0]+=2*a*x[1];
  352. /* Predict 2 */
  353. a=0.8829110762;
  354. for (i=1;i<n-2;i+=2)
  355. x[i]+=a*(x[i-1]+x[i+1]);
  356. x[n-1]+=2*a*x[n-2];
  357. /* Update 2 */
  358. a=0.4435068522;
  359. for (i=2;i<n;i+=2)
  360. x[i]+=a*(x[i-1]+x[i+1]);
  361. x[0]+=2*a*x[1];
  362. /* Scale */
  363. a=1/1.149604398;
  364. for (i=0;i<n;i++)
  365. {
  366. if (i%2) x[i]*=a;
  367. else x[i]/=a;
  368. }
  369. /* Pack */
  370. for (i=0;i<n;i++){
  371. if (i%2==0)
  372. temp[i/2]=x[i];
  373. else
  374. temp[n/2+i/2]=x[i];
  375. }
  376. for (i=0;i<n;i++) x[i]=temp[i];
  377. }
  378. /* single stage of reconstruction */
  379. void iwt97_i(double* x,int n){
  380. double temp[SUPPORT];
  381. double a;
  382. int i;
  383. /* Unpack */
  384. for (i=0;i<n/2;i++){
  385. temp[i*2]=x[i];
  386. temp[i*2+1]=x[i+n/2];
  387. }
  388. for (i=0;i<n;i++) x[i]=temp[i];
  389. /* Undo scale */
  390. a=1.149604398;
  391. for (i=0;i<n;i++)
  392. {
  393. if (i%2) x[i]*=a;
  394. else x[i]/=a;
  395. }
  396. /* Undo update 2 */
  397. a=-0.4435068522;
  398. for (i=2;i<n;i+=2)
  399. x[i]+=a*(x[i-1]+x[i+1]);
  400. x[0]+=2*a*x[1];
  401. /* Undo predict 2 */
  402. a=-0.8829110762;
  403. for (i=1;i<n-2;i+=2)
  404. x[i]+=a*(x[i-1]+x[i+1]);
  405. x[n-1]+=2*a*x[n-2];
  406. /* Undo update 1 */
  407. a=0.05298011854;
  408. for (i=2;i<n;i+=2)
  409. x[i]+=a*(x[i-1]+x[i+1]);
  410. x[0]+=2*a*x[1];
  411. /* Undo predict 1 */
  412. a=1.586134342;
  413. for (i=1;i<n-2;i+=2)
  414. x[i]+=a*(x[i-1]+x[i+1]);
  415. x[n-1]+=2*a*x[n-2];
  416. }
  417. /* multistage decomposition */
  418. void fwt97(double *out, int n, double *in, int support){
  419. int i=n,j=support,k;
  420. while((i&1)==0){
  421. fwt97_i(in,j);
  422. i>>=1;
  423. for(k=0;k<i;k++)
  424. out[i+k] = in[((((j*3)>>1)-i)>>1) + k];
  425. j>>=1;
  426. }
  427. for(k=0;k<i;k++)
  428. out[k] = in[((j-i)>>1) + k];
  429. }
  430. /* multistage reconstruction */
  431. void iwt97(double *out, int support, double *in, int n){
  432. int i=n,j=support,k;
  433. for(k=0;k<support;k++)
  434. out[k]=0;
  435. while((i&1)==0){
  436. i>>=1;
  437. for(k=0;k<i;k++)
  438. out[((((j*3)>>1)-i)>>1) + k]=in[i+k];
  439. j>>=1;
  440. }
  441. for(k=0;k<i;k++)
  442. out[((j-i)>>1) + k]=in[k];
  443. i<<=1;
  444. j<<=1;
  445. while(j<=support){
  446. iwt97_i(out,j);
  447. i<<=1;
  448. j<<=1;
  449. }
  450. }
  451. #endif
  452. #if USE_KLT
  453. void symeigen(double *out,
  454. double *cov,
  455. int support){
  456. int i;
  457. int j;
  458. int k;
  459. for(i=0;i<support;i++)
  460. for(j=0;j<support;j++)
  461. out[i*support+j]=i==j;
  462. for(;;){
  463. double mod=0.;
  464. for(i=0,j=0,k=0;k<support;k++){
  465. int m;
  466. for(m=k+1;m<support;m++){
  467. double q;
  468. q=fabs(cov[k*support+m]);
  469. if(q>mod){
  470. mod=q;
  471. i=k;
  472. j=m;
  473. }
  474. }
  475. }
  476. if(mod<1E-11)break;
  477. {
  478. double th=0.5*atan2(2*cov[i*support+j],cov[i*support+i]-cov[j*support+j]);
  479. double c=cos(th);
  480. double s=sin(th);
  481. for(k=0;k<support;k++){
  482. double t;
  483. t=c*cov[k*support+i]+s*cov[k*support+j];
  484. cov[k*support+j]=-s*cov[k*support+i]+c*cov[k*support+j];
  485. cov[k*support+i]=t;
  486. }
  487. for(k=0;k<support;k++){
  488. double t;
  489. t=c*cov[i*support+k]+s*cov[j*support+k];
  490. cov[j*support+k]=-s*cov[i*support+k]+c*cov[j*support+k];
  491. cov[i*support+k]=t;
  492. }
  493. for(k=0;k<support;k++){
  494. double t;
  495. t=c*out[i*support+k]+s*out[j*support+k];
  496. out[j*support+k]=-s*out[i*support+k]+c*out[j*support+k];
  497. out[i*support+k]=t;
  498. }
  499. }
  500. }
  501. /* for(j=0;j<BLOCKSIZE;j++)eigenvalue[j]=cov[j][j]; don't need eigenvalues */
  502. }
  503. void flap_2d(double out[BLOCKSIZE][BLOCKSIZE],
  504. double in[SUPPORT][SUPPORT],
  505. const int _f[]){
  506. int i,j;
  507. #if USE_LAPPING
  508. #if BLOCKSIZE_LOG>=OD_LOG_BSIZE0&&BLOCKSIZE_LOG<OD_LOG_BSIZE0+OD_NBSIZES
  509. /* columns */
  510. for(i=SUPPORT/2-BLOCKSIZE;i<SUPPORT/2+BLOCKSIZE;i++){
  511. double work[BLOCKSIZE*2];
  512. for(j=0;j<BLOCKSIZE*2;j++)
  513. work[j]=in[j+SUPPORT/2-BLOCKSIZE][i];
  514. (*NE_PRE_FILTER_DOUBLE[BLOCKSIZE_LOG-OD_LOG_BSIZE0])
  515. (&work[0],&work[0],_f);
  516. (*NE_PRE_FILTER_DOUBLE[BLOCKSIZE_LOG-OD_LOG_BSIZE0])
  517. (&work[BLOCKSIZE],&work[BLOCKSIZE],_f);
  518. for(j=0;j<BLOCKSIZE*2;j++)
  519. in[j+SUPPORT/2-BLOCKSIZE][i]=work[j];
  520. }
  521. /* rows */
  522. for(i=SUPPORT/2-BLOCKSIZE;i<SUPPORT/2+BLOCKSIZE;i++){
  523. (*NE_PRE_FILTER_DOUBLE[BLOCKSIZE_LOG-OD_LOG_BSIZE0])
  524. (&in[i][SUPPORT/2-BLOCKSIZE],&in[i][SUPPORT/2-BLOCKSIZE],_f);
  525. (*NE_PRE_FILTER_DOUBLE[BLOCKSIZE_LOG-OD_LOG_BSIZE0])
  526. (&in[i][SUPPORT/2],&in[i][SUPPORT/2],_f);
  527. }
  528. #else
  529. # error "Need a prefilter implementation for this block size."
  530. #endif
  531. #endif
  532. for(i=0;i<BLOCKSIZE;i++)
  533. for(j=0;j<BLOCKSIZE;j++)
  534. out[i][j]=in[i+SUPPORT/2-BLOCKSIZE/2][j+SUPPORT/2-BLOCKSIZE/2];
  535. }
  536. void ilap_2d(double out[SUPPORT][SUPPORT],
  537. double in[BLOCKSIZE][BLOCKSIZE],
  538. const int _f[]){
  539. int i,j;
  540. for(i=0;i<SUPPORT;i++)
  541. for(j=0;j<SUPPORT;j++)
  542. out[i][j]=0;
  543. for(i=0;i<BLOCKSIZE;i++)
  544. for(j=0;j<BLOCKSIZE;j++)
  545. out[i+SUPPORT/2-BLOCKSIZE/2][j+SUPPORT/2-BLOCKSIZE/2]=in[i][j];
  546. #if USE_LAPPING
  547. #if BLOCKSIZE_LOG>=OD_LOG_BSIZE0&&BLOCKSIZE_LOG<OD_LOG_BSIZE0+OD_NBSIZES
  548. /* columns */
  549. for(i=SUPPORT/2-BLOCKSIZE;i<SUPPORT/2+BLOCKSIZE;i++){
  550. double work[BLOCKSIZE*2];
  551. for(j=0;j<BLOCKSIZE*2;j++)
  552. work[j]=out[j+SUPPORT/2-BLOCKSIZE][i];
  553. (*NE_POST_FILTER_DOUBLE[BLOCKSIZE_LOG-OD_LOG_BSIZE0])
  554. (&work[0],&work[0],_f);
  555. (*NE_POST_FILTER_DOUBLE[BLOCKSIZE_LOG-OD_LOG_BSIZE0])
  556. (&work[BLOCKSIZE],&work[BLOCKSIZE],_f);
  557. for(j=0;j<BLOCKSIZE*2;j++)
  558. out[j+SUPPORT/2-BLOCKSIZE][i]=work[j];
  559. }
  560. /* rows */
  561. for(i=SUPPORT/2-BLOCKSIZE;i<SUPPORT/2+BLOCKSIZE;i++){
  562. (*NE_POST_FILTER_DOUBLE[BLOCKSIZE_LOG-OD_LOG_BSIZE0])
  563. (&out[i][SUPPORT/2-BLOCKSIZE],&out[i][SUPPORT/2-BLOCKSIZE],_f);
  564. (*NE_POST_FILTER_DOUBLE[BLOCKSIZE_LOG-OD_LOG_BSIZE0])
  565. (&out[i][SUPPORT/2],&out[i][SUPPORT/2],_f);
  566. }
  567. #else
  568. # error "Need a prefilter implementation for this block size."
  569. #endif
  570. #endif
  571. }
  572. void flap_4d(double out[BLOCKSIZE*BLOCKSIZE][BLOCKSIZE*BLOCKSIZE],
  573. double in[SUPPORT][SUPPORT][SUPPORT][SUPPORT],
  574. const int _f[]){
  575. int i,j,k,l;
  576. #if USE_LAPPING
  577. #if BLOCKSIZE_LOG>=OD_LOG_BSIZE0&&BLOCKSIZE_LOG<OD_LOG_BSIZE0+OD_NBSIZES
  578. for(i=SUPPORT/2-BLOCKSIZE;i<SUPPORT/2+BLOCKSIZE;i++){
  579. for(j=SUPPORT/2-BLOCKSIZE;j<SUPPORT/2+BLOCKSIZE;j++){
  580. for(k=SUPPORT/2-BLOCKSIZE;k<SUPPORT/2+BLOCKSIZE;k++){
  581. double work[BLOCKSIZE*2];
  582. /* [ ][i][j][k] */
  583. for(l=0;l<BLOCKSIZE*2;l++)
  584. work[l]=in[l+SUPPORT/2-BLOCKSIZE][i][j][k];
  585. (*NE_PRE_FILTER_DOUBLE[BLOCKSIZE_LOG-OD_LOG_BSIZE0])
  586. (&work[0],&work[0],_f);
  587. (*NE_PRE_FILTER_DOUBLE[BLOCKSIZE_LOG-OD_LOG_BSIZE0])
  588. (&work[BLOCKSIZE],&work[BLOCKSIZE],_f);
  589. for(l=0;l<BLOCKSIZE*2;l++)
  590. in[l+SUPPORT/2-BLOCKSIZE][i][j][k]=work[l];
  591. }
  592. }
  593. }
  594. for(i=SUPPORT/2-BLOCKSIZE;i<SUPPORT/2+BLOCKSIZE;i++){
  595. for(j=SUPPORT/2-BLOCKSIZE;j<SUPPORT/2+BLOCKSIZE;j++){
  596. for(k=SUPPORT/2-BLOCKSIZE;k<SUPPORT/2+BLOCKSIZE;k++){
  597. double work[BLOCKSIZE*2];
  598. /* [i][ ][j][k] */
  599. for(l=0;l<BLOCKSIZE*2;l++)
  600. work[l]=in[i][l+SUPPORT/2-BLOCKSIZE][j][k];
  601. (*NE_PRE_FILTER_DOUBLE[BLOCKSIZE_LOG-OD_LOG_BSIZE0])
  602. (&work[0],&work[0],_f);
  603. (*NE_PRE_FILTER_DOUBLE[BLOCKSIZE_LOG-OD_LOG_BSIZE0])
  604. (&work[BLOCKSIZE],&work[BLOCKSIZE],_f);
  605. for(l=0;l<BLOCKSIZE*2;l++)
  606. in[i][l+SUPPORT/2-BLOCKSIZE][j][k]=work[l];
  607. }
  608. }
  609. }
  610. for(i=SUPPORT/2-BLOCKSIZE;i<SUPPORT/2+BLOCKSIZE;i++){
  611. for(j=SUPPORT/2-BLOCKSIZE;j<SUPPORT/2+BLOCKSIZE;j++){
  612. for(k=SUPPORT/2-BLOCKSIZE;k<SUPPORT/2+BLOCKSIZE;k++){
  613. double work[BLOCKSIZE*2];
  614. /* [i][j][ ][k] */
  615. for(l=0;l<BLOCKSIZE*2;l++)
  616. work[l]=in[i][j][l+SUPPORT/2-BLOCKSIZE][k];
  617. (*NE_PRE_FILTER_DOUBLE[BLOCKSIZE_LOG-OD_LOG_BSIZE0])
  618. (&work[0],&work[0],_f);
  619. (*NE_PRE_FILTER_DOUBLE[BLOCKSIZE_LOG-OD_LOG_BSIZE0])
  620. (&work[BLOCKSIZE],&work[BLOCKSIZE],_f);
  621. for(l=0;l<BLOCKSIZE*2;l++)
  622. in[i][j][l+SUPPORT/2-BLOCKSIZE][k]=work[l];
  623. }
  624. }
  625. }
  626. for(i=SUPPORT/2-BLOCKSIZE;i<SUPPORT/2+BLOCKSIZE;i++){
  627. for(j=SUPPORT/2-BLOCKSIZE;j<SUPPORT/2+BLOCKSIZE;j++){
  628. for(k=SUPPORT/2-BLOCKSIZE;k<SUPPORT/2+BLOCKSIZE;k++){
  629. /* [i][j][k][ ] */
  630. (*NE_PRE_FILTER_DOUBLE[BLOCKSIZE_LOG-OD_LOG_BSIZE0])
  631. (&in[i][j][k][SUPPORT/2-BLOCKSIZE],&in[i][j][k][SUPPORT/2-BLOCKSIZE],_f);
  632. (*NE_PRE_FILTER_DOUBLE[BLOCKSIZE_LOG-OD_LOG_BSIZE0])
  633. (&in[i][j][k][SUPPORT/2],&in[i][j][k][SUPPORT/2],_f);
  634. }
  635. }
  636. }
  637. #else
  638. # error "Need a prefilter implementation for this block size."
  639. #endif
  640. #endif
  641. for(i=0;i<BLOCKSIZE;i++)
  642. for(j=0;j<BLOCKSIZE;j++)
  643. for(k=0;k<BLOCKSIZE;k++)
  644. for(l=0;l<BLOCKSIZE;l++)
  645. out[i*BLOCKSIZE+j][k*BLOCKSIZE+l]=in
  646. [i+SUPPORT/2-BLOCKSIZE/2]
  647. [j+SUPPORT/2-BLOCKSIZE/2]
  648. [k+SUPPORT/2-BLOCKSIZE/2]
  649. [l+SUPPORT/2-BLOCKSIZE/2];
  650. }
  651. void gklt_1d(double klt[BLOCKSIZE][BLOCKSIZE],
  652. double cov[SUPPORT][SUPPORT],
  653. const int *_f){
  654. static double workA[SUPPORT][SUPPORT];
  655. static double workB[BLOCKSIZE][BLOCKSIZE];
  656. int i,j;
  657. for(i=0;i<SUPPORT;i++)
  658. for(j=0;j<SUPPORT;j++)
  659. workA[i][j]=cov[i][j];
  660. flap_2d(workB,workA,_f);
  661. symeigen(&klt[0][0],&workB[0][0],BLOCKSIZE);
  662. }
  663. void gklt_2d(double klt[BLOCKSIZE*BLOCKSIZE][BLOCKSIZE*BLOCKSIZE],
  664. double cov[SUPPORT][SUPPORT][SUPPORT][SUPPORT],
  665. const int *_f){
  666. static double workA[SUPPORT][SUPPORT][SUPPORT][SUPPORT];
  667. static double workB[BLOCKSIZE*BLOCKSIZE][BLOCKSIZE*BLOCKSIZE];
  668. int i,j,k,l;
  669. for(i=0;i<SUPPORT;i++)
  670. for(j=0;j<SUPPORT;j++)
  671. for(k=0;k<SUPPORT;k++)
  672. for(l=0;l<SUPPORT;l++)
  673. workA[i][j][k][l]=cov[i][j][k][l];
  674. flap_4d(workB,workA,_f);
  675. symeigen(&klt[0][0],&workB[0][0],BLOCKSIZE*BLOCKSIZE);
  676. }
  677. void gklt_1d_collapsed(double klt[BLOCKSIZE][BLOCKSIZE],
  678. double cov[SUPPORT],
  679. const int *_f){
  680. static double workA[SUPPORT][SUPPORT];
  681. static double workB[BLOCKSIZE][BLOCKSIZE];
  682. int i,j;
  683. for(i=0;i<SUPPORT;i++)
  684. for(j=0;j<SUPPORT;j++)
  685. workA[i][j]=cov[abs(i-j)];
  686. flap_2d(workB,workA,_f);
  687. symeigen(&klt[0][0],&workB[0][0],BLOCKSIZE);
  688. }
  689. void gklt_2d_collapsed(double klt[BLOCKSIZE*BLOCKSIZE][BLOCKSIZE*BLOCKSIZE],
  690. double cov[SUPPORT][SUPPORT],
  691. const int *_f){
  692. static double workA[SUPPORT][SUPPORT][SUPPORT][SUPPORT];
  693. static double workB[BLOCKSIZE*BLOCKSIZE][BLOCKSIZE*BLOCKSIZE];
  694. int i,j,k,l;
  695. for(i=0;i<SUPPORT;i++)
  696. for(j=0;j<SUPPORT;j++)
  697. for(k=0;k<SUPPORT;k++)
  698. for(l=0;l<SUPPORT;l++)
  699. workA[i][j][k][l]=cov[abs(i-k)][abs(j-l)];
  700. flap_4d(workB,workA,_f);
  701. symeigen(&klt[0][0],&workB[0][0],BLOCKSIZE*BLOCKSIZE);
  702. }
  703. void fklt(double *out,
  704. double *in,
  705. double *klt,
  706. int support){
  707. int i,j;
  708. for(i=0;i<support;i++){
  709. double acc=0.;
  710. for(j=0;j<support;j++)
  711. acc += klt[i*support+j]*in[j];
  712. out[i]=acc;
  713. }
  714. }
  715. void iklt(double *out,
  716. double *in,
  717. double *klt,
  718. int support){
  719. int i,j;
  720. for(i=0;i<support;i++){
  721. double acc=0.;
  722. for(j=0;j<support;j++)
  723. acc+=klt[j*support+i]*in[j];
  724. out[i]=acc;
  725. }
  726. }
  727. #endif
  728. void b_analysis_1d(double *_out,int _out_stride,const double *_in,int _in_stride,
  729. const int *_f, double _klt[BLOCKSIZE][BLOCKSIZE]){
  730. int j;
  731. double t[SUPPORT];
  732. double w[BLOCKSIZE];
  733. for(j=0;j<SUPPORT;j++)
  734. t[j]=_in[j*_in_stride];
  735. #if USE_WAVELET
  736. fwt97(w,BLOCKSIZE,t,SUPPORT);
  737. for(j=0;j<BLOCKSIZE;j++){
  738. _out[j*_out_stride]=w[j];
  739. }
  740. #else
  741. # if USE_LAPPING
  742. # if BLOCKSIZE_LOG>=OD_LOG_BSIZE0&&BLOCKSIZE_LOG<OD_LOG_BSIZE0+OD_NBSIZES
  743. (*NE_PRE_FILTER_DOUBLE[BLOCKSIZE_LOG-OD_LOG_BSIZE0])
  744. (&t[SUPPORT/2-BLOCKSIZE],&t[SUPPORT/2-BLOCKSIZE],_f);
  745. (*NE_PRE_FILTER_DOUBLE[BLOCKSIZE_LOG-OD_LOG_BSIZE0])
  746. (&t[SUPPORT/2],&t[SUPPORT/2],_f);
  747. # else
  748. # error "Need a prefilter implementation for this block size."
  749. # endif
  750. # endif
  751. # if USE_KLT
  752. fklt(&w[0],&t[SUPPORT/2-BLOCKSIZE/2],&_klt[0][0],BLOCKSIZE);
  753. # elif USE_DCT
  754. # if BLOCKSIZE_LOG>=OD_LOG_BSIZE0&&BLOCKSIZE_LOG<OD_LOG_BSIZE0+OD_NBSIZES
  755. (*OD_FDCT_1D_DOUBLE[BLOCKSIZE_LOG-OD_LOG_BSIZE0])
  756. (w,&t[SUPPORT/2-BLOCKSIZE/2],1);
  757. # else
  758. # error "Need an fDCT implementation for this block size."
  759. # endif
  760. # else
  761. for(j=0;j<BLOCKSIZE;j++)
  762. w[j]=t[j+SUPPORT/2-BLOCKSIZE/2];
  763. # endif
  764. for(j=0;j<BLOCKSIZE;j++)
  765. _out[j*_out_stride]=w[j];
  766. #endif
  767. }
  768. void b_analysis_2d(double *_out,int _out_stride_i,int _out_stride_j,
  769. const double *_in,int _in_stride_i,int _in_stride_j,
  770. const int *_f, double _klt[BLOCKSIZE*BLOCKSIZE][BLOCKSIZE*BLOCKSIZE]){
  771. #if USE_KLT
  772. /* KLT is a non-separable 2D transform */
  773. double lap[SUPPORT][SUPPORT];
  774. double work[BLOCKSIZE][BLOCKSIZE];
  775. double temp[BLOCKSIZE][BLOCKSIZE];
  776. int i,j;
  777. for(i=0;i<SUPPORT;i++)
  778. for(j=0;j<SUPPORT;j++)
  779. lap[i][j]=*(_in+i*_in_stride_i+j*_in_stride_j);
  780. flap_2d(work,lap,_f);
  781. fklt(&temp[0][0],&work[0][0],&_klt[0][0],BLOCKSIZE*BLOCKSIZE);
  782. for(i=0;i<BLOCKSIZE;i++)
  783. for(j=0;j<BLOCKSIZE;j++)
  784. *(_out+i*_out_stride_i+j*_out_stride_j)=temp[i][j];
  785. #else
  786. double work[SUPPORT][BLOCKSIZE];
  787. int i;
  788. /* DCT and DWT are separable 1D transforms */
  789. /* lapping performed inside b_analysis */
  790. for(i=0;i<SUPPORT;i++)
  791. b_analysis_1d(&work[i][0],1,_in+i*_in_stride_i,_in_stride_j,_f,NULL);
  792. for(i=0;i<BLOCKSIZE;i++)
  793. b_analysis_1d(_out+_out_stride_i*i,_out_stride_j,&work[0][i],BLOCKSIZE,_f,NULL);
  794. #endif
  795. }
  796. void b_synthesis_1d(double *_out,int _out_stride,const double *_in,int _in_stride,
  797. const int *_f, double _klt[BLOCKSIZE][BLOCKSIZE]){
  798. int j;
  799. double w[SUPPORT];
  800. double t[SUPPORT];
  801. for(j=0;j<SUPPORT;j++){
  802. t[j]=0;
  803. w[j]=0;
  804. }
  805. #if USE_WAVELET
  806. for(j=0;j<BLOCKSIZE;j++)
  807. w[j]=_in[j*_in_stride];
  808. iwt97(t,SUPPORT,w,BLOCKSIZE);
  809. #else
  810. for(j=0;j<BLOCKSIZE;j++){
  811. w[SUPPORT/2-BLOCKSIZE/2+j]=_in[j*_in_stride];
  812. }
  813. # if USE_KLT
  814. iklt(&t[SUPPORT/2-BLOCKSIZE/2],&w[SUPPORT/2-BLOCKSIZE/2],&_klt[0][0],BLOCKSIZE);
  815. # elif USE_DCT
  816. # if BLOCKSIZE_LOG>=OD_LOG_BSIZE0&&BLOCKSIZE_LOG<OD_LOG_BSIZE0+OD_NBSIZES
  817. (*OD_IDCT_1D_DOUBLE[BLOCKSIZE_LOG-OD_LOG_BSIZE0])
  818. (&t[SUPPORT/2-BLOCKSIZE/2],1,&w[SUPPORT/2-BLOCKSIZE/2]);
  819. # else
  820. # error "Need an iDCT implementation for this block size."
  821. # endif
  822. # else
  823. for(j=0;j<SUPPORT;j++)
  824. t[j]=w[j];
  825. # endif
  826. # if USE_LAPPING
  827. # if BLOCKSIZE_LOG>=OD_LOG_BSIZE0&&BLOCKSIZE_LOG<OD_LOG_BSIZE0+OD_NBSIZES
  828. (*NE_POST_FILTER_DOUBLE[BLOCKSIZE_LOG-OD_LOG_BSIZE0])
  829. (&t[SUPPORT/2-BLOCKSIZE],&t[SUPPORT/2-BLOCKSIZE],_f);
  830. (*NE_POST_FILTER_DOUBLE[BLOCKSIZE_LOG-OD_LOG_BSIZE0])
  831. (&t[SUPPORT/2],&t[SUPPORT/2],_f);
  832. # else
  833. # error "Need a postfilter implementation for this block size."
  834. # endif
  835. # endif
  836. #endif
  837. for(j=0;j<SUPPORT;j++)
  838. _out[j*_out_stride]=t[j];
  839. }
  840. void b_synthesis_2d(double *_out,int _out_stride_i,int _out_stride_j,
  841. const double *_in,int _in_stride_i,int _in_stride_j,
  842. const int *_f, double _klt[BLOCKSIZE*BLOCKSIZE][BLOCKSIZE*BLOCKSIZE]){
  843. #if USE_KLT
  844. /* KLT is a non-separable 2D transform */
  845. double temp[BLOCKSIZE][BLOCKSIZE];
  846. double work[BLOCKSIZE][BLOCKSIZE];
  847. double lap[SUPPORT][SUPPORT];
  848. int i,j;
  849. for(i=0;i<BLOCKSIZE;i++)
  850. for(j=0;j<BLOCKSIZE;j++)
  851. temp[i][j]=*(_in+i*_in_stride_i+j*_in_stride_j);
  852. iklt(&work[0][0],&temp[0][0],&_klt[0][0],BLOCKSIZE*BLOCKSIZE);
  853. ilap_2d(lap,work,_f);
  854. for(i=0;i<SUPPORT;i++)
  855. for(j=0;j<SUPPORT;j++)
  856. *(_out+i*_out_stride_i+j*_out_stride_j)=lap[i][j];
  857. #else
  858. double work[SUPPORT][BLOCKSIZE];
  859. int i;
  860. /* DCT and DWT are separable 1D transforms */
  861. /* lapping performed inside b_analysis */
  862. for(i=0;i<BLOCKSIZE;i++)
  863. b_synthesis_1d(&work[0][i],BLOCKSIZE,_in+i*_in_stride_i,_in_stride_j,_f,NULL);
  864. for(i=0;i<SUPPORT;i++)
  865. b_synthesis_1d(_out+_out_stride_i*i,_out_stride_j,&work[i][0],1,_f,NULL);
  866. #endif
  867. }
  868. #if USE_2D
  869. static double cg_2d_i(double rggt[SUPPORT][SUPPORT][BLOCKSIZE][BLOCKSIZE],
  870. const int *_f,
  871. double _klt[BLOCKSIZE*BLOCKSIZE][BLOCKSIZE*BLOCKSIZE]){
  872. double r[BLOCKSIZE][BLOCKSIZE][BLOCKSIZE][BLOCKSIZE];
  873. double s[SUPPORT][BLOCKSIZE];
  874. double ggrggt[BLOCKSIZE][BLOCKSIZE][BLOCKSIZE][BLOCKSIZE];
  875. double cg=0;
  876. int i;
  877. int j;
  878. int v;
  879. int u;
  880. int k;
  881. int l;
  882. /* G1*P*G2*R*(G2*P*G1)^T */
  883. for(v=0;v<BLOCKSIZE;v++)
  884. for(j=0;j<BLOCKSIZE;j++)
  885. b_analysis_2d(&ggrggt[v][j][0][0],
  886. 1,BLOCKSIZE,
  887. &rggt[0][0][v][j],
  888. BLOCKSIZE*BLOCKSIZE*SUPPORT,
  889. BLOCKSIZE*BLOCKSIZE,
  890. f,_klt);
  891. /* H1*P*H2 */
  892. for(i=0;i<BLOCKSIZE;i++)
  893. for(j=0;j<BLOCKSIZE;j++)
  894. for(k=0;k<BLOCKSIZE;k++)
  895. for(l=0;l<BLOCKSIZE;l++)
  896. r[i][j][k][l] = (i*BLOCKSIZE+j==k*BLOCKSIZE+l)?1:0;
  897. for(i=0;i<BLOCKSIZE;i++)
  898. for(j=0;j<BLOCKSIZE;j++)
  899. b_synthesis_2d(&rggt[0][0][i][j],
  900. BLOCKSIZE*BLOCKSIZE,
  901. SUPPORT*BLOCKSIZE*BLOCKSIZE,
  902. &r[i][j][0][0],
  903. BLOCKSIZE,1,
  904. _f,_klt);
  905. /* ((H1*P*H2)^T*H1*P*H2)_ii */
  906. for(i=0;i<BLOCKSIZE;i++){
  907. for(j=0;j<BLOCKSIZE;j++){
  908. s[i][j]=0;
  909. for(u=0;u<SUPPORT;u++){
  910. for(v=0;v<SUPPORT;v++){
  911. s[i][j]+=rggt[u][v][i][j]*rggt[u][v][i][j];
  912. }
  913. }
  914. }
  915. }
  916. /* (G1*P*G2*R*(G1*P*G2)^T)_ii * ((H1*P*H2)^T*H1*P*H2)_ii */
  917. for(i=0;i<BLOCKSIZE;i++)
  918. for(j=0;j<BLOCKSIZE;j++)
  919. cg-=10*log10(ggrggt[i][j][i][j]*s[i][j]);
  920. return cg/(BLOCKSIZE*BLOCKSIZE);
  921. }
  922. double cg_2d(double _in[SUPPORT][SUPPORT][SUPPORT][SUPPORT],
  923. const int *_f){
  924. int v;
  925. int j;
  926. double ret;
  927. double (*rggt)[SUPPORT][BLOCKSIZE][BLOCKSIZE] =
  928. (double (*)[SUPPORT][BLOCKSIZE][BLOCKSIZE])
  929. malloc(SUPPORT*SUPPORT*BLOCKSIZE*BLOCKSIZE*sizeof(****rggt));
  930. double klt[BLOCKSIZE*BLOCKSIZE][BLOCKSIZE*BLOCKSIZE];
  931. #if USE_KLT
  932. gklt_2d(klt,_in,_f);
  933. #endif
  934. /* R*(G2*P*G1)^T */
  935. for(v=0;v<SUPPORT;v++)
  936. for(j=0;j<SUPPORT;j++)
  937. b_analysis_2d(&rggt[v][j][0][0],
  938. 1,BLOCKSIZE,
  939. &_in[0][0][v][j],
  940. SUPPORT*SUPPORT*SUPPORT,
  941. SUPPORT*SUPPORT,
  942. _f,klt);
  943. ret = cg_2d_i(rggt,f,klt);
  944. free(rggt);
  945. return ret;
  946. }
  947. double cg_2d_collapsed(double _in[SUPPORT][SUPPORT],const int *_f){
  948. int v;
  949. int u;
  950. int j;
  951. int i;
  952. double ret;
  953. double r[SUPPORT][SUPPORT];
  954. double (*rggt)[SUPPORT][BLOCKSIZE][BLOCKSIZE] =
  955. (double (*)[SUPPORT][BLOCKSIZE][BLOCKSIZE])
  956. malloc(SUPPORT*SUPPORT*BLOCKSIZE*BLOCKSIZE*sizeof(****rggt));
  957. double klt[BLOCKSIZE*BLOCKSIZE][BLOCKSIZE*BLOCKSIZE];
  958. #if USE_KLT
  959. gklt_2d_collapsed(klt,_in,_f);
  960. #endif
  961. /* R*(G2*P*G1)^T */
  962. for(v=0;v<SUPPORT;v++){
  963. for(j=0;j<SUPPORT;j++){
  964. for(u=0;u<SUPPORT;u++)
  965. for(i=0;i<SUPPORT;i++)
  966. r[u][i]=_in[abs(u-v)][abs(i-j)];
  967. b_analysis_2d(&rggt[v][j][0][0],
  968. 1,BLOCKSIZE,
  969. &r[0][0],SUPPORT,1,_f,klt);
  970. }
  971. }
  972. ret = cg_2d_i(rggt,f,klt);
  973. free(rggt);
  974. return ret;
  975. }
  976. #else
  977. static double cg_1d_i(double rgt[SUPPORT][BLOCKSIZE],
  978. const int *_f,
  979. double klt[BLOCKSIZE][BLOCKSIZE]){
  980. int j;
  981. int i;
  982. double r[BLOCKSIZE];
  983. double grgt[BLOCKSIZE][BLOCKSIZE];
  984. double cg=0;
  985. /* G*R*G^T */
  986. for(i=0;i<BLOCKSIZE;i++)
  987. b_analysis_1d(&grgt[0][i],BLOCKSIZE,&rgt[0][i],BLOCKSIZE,_f,klt);
  988. /* H */
  989. for(j=0;j<BLOCKSIZE;j++){
  990. for(i=0;i<BLOCKSIZE;i++){
  991. r[i]=i==j?1:0;
  992. }
  993. b_synthesis_1d(&rgt[0][j],BLOCKSIZE,r,1,_f,klt);
  994. }
  995. /* (G*R*G^T)_ii * (H^T*H)_ii */
  996. for(j=0;j<BLOCKSIZE;j++){
  997. double h=0;
  998. for(i=0;i<SUPPORT;i++){
  999. h+=rgt[i][j]*rgt[i][j];
  1000. }
  1001. cg-=10*log10(grgt[j][j]*h);
  1002. }
  1003. return cg/BLOCKSIZE;
  1004. }
  1005. double cg_1d(double in[SUPPORT][SUPPORT],const int *_f){
  1006. int j;
  1007. double rgt[SUPPORT][BLOCKSIZE];
  1008. double klt[BLOCKSIZE][BLOCKSIZE];
  1009. #if USE_KLT
  1010. gklt_1d(klt,in,_f);
  1011. #endif
  1012. /* R*G^T */
  1013. for(j=0;j<SUPPORT;j++){
  1014. b_analysis_1d(&rgt[j][0],1,in[j],1,_f,klt);
  1015. }
  1016. return cg_1d_i(rgt,f,klt);
  1017. }
  1018. double cg_1d_collapsed(double in[SUPPORT],const int *_f){
  1019. int j;
  1020. int i;
  1021. double r[SUPPORT];
  1022. double rgt[SUPPORT][BLOCKSIZE];
  1023. double klt[BLOCKSIZE][BLOCKSIZE];
  1024. #if USE_KLT
  1025. gklt_1d_collapsed(klt,in,_f);
  1026. #endif
  1027. /* R*G^T */
  1028. for(j=0;j<SUPPORT;j++){
  1029. for(i=0;i<SUPPORT;i++){
  1030. r[i]=in[abs(i-j)];
  1031. }
  1032. b_analysis_1d(&rgt[j][0],1,r,1,_f,klt);
  1033. }
  1034. return cg_1d_i(rgt,f,klt);
  1035. }
  1036. #endif
  1037. #if USE_FILES
  1038. int main(int _argc,const char *_argv[]){
  1039. cov_state cvs[NUM_PROCS];
  1040. #if COMPUTE_NATHAN
  1041. trans_ctx ctx[NUM_PROCS];
  1042. double r[SUPPORT*SUPPORT]; /* maximum for 2d */
  1043. #else
  1044. trans_ctx *ctx=NULL;
  1045. #endif
  1046. int i;
  1047. #if BLOCKSIZE==4
  1048. f=OD_FILTER_PARAMS4;
  1049. #elif BLOCKSIZE==8
  1050. f=OD_FILTER_PARAMS8;
  1051. #elif BLOCKSIZE==16
  1052. f=OD_FILTER_PARAMS16;
  1053. #else
  1054. # error "Need filter params for this block size."
  1055. #endif
  1056. for(i=0;i<NUM_PROCS;i++){
  1057. #if USE_2D
  1058. cov_init(&cvs[i],SUPPORT*SUPPORT);
  1059. #else
  1060. cov_init(&cvs[i],SUPPORT);
  1061. #endif
  1062. }
  1063. #if COMPUTE_NATHAN
  1064. for(i=0;i<NUM_PROCS;i++){
  1065. #if USE_2D
  1066. trans_data_init(&ctx[i].td,SUPPORT*SUPPORT);
  1067. #else
  1068. trans_data_init(&ctx[i].td,SUPPORT);
  1069. #endif
  1070. }
  1071. #endif
  1072. OD_OMP_SET_THREADS(NUM_PROCS);
  1073. process_files(ctx,cvs,_argc,_argv);
  1074. for(i=1;i<NUM_PROCS;i++)
  1075. cov_combine(&cvs[0],&cvs[i]);
  1076. cov_compute(&cvs[0]);
  1077. #if COMPUTE_NATHAN
  1078. for(i=1;i<NUM_PROCS;i++)
  1079. trans_data_combine(&ctx[0].td,&ctx[i].td);
  1080. trans_data_normalize(&ctx[0].td);
  1081. #endif
  1082. #if PRINT_COV
  1083. {
  1084. int i,j;
  1085. fprintf(stdout,"collapsed_cov=\n");
  1086. for(j=0;j<cvs[0].sz/SUPPORT;j++){
  1087. for(i=0;i<SUPPORT;i++){
  1088. fprintf(stdout,"%s %- 12.6G",i>0?",":"",cvs[0].cov[j*SUPPORT+i]);
  1089. }
  1090. fprintf(stdout,"\n");
  1091. }
  1092. }
  1093. #endif
  1094. #if USE_2D
  1095. #if COMPUTE_NATHAN
  1096. fprintf(stdout,"original cg=%-24.16G\n",
  1097. cg_2d((double(*)[SUPPORT][SUPPORT][SUPPORT])ctx[0].td.cov,f));
  1098. trans_data_collapse(&ctx[0].td,SUPPORT,r);
  1099. fprintf(stdout,"collapse cg=%-24.16G\n",
  1100. cg_2d_collapsed((double(*)[SUPPORT])r,f));
  1101. #endif
  1102. fprintf(stdout,"monty cg=%-24.16G\n",
  1103. cg_2d_collapsed((double(*)[SUPPORT])cvs[0].cov,f));
  1104. #else
  1105. #if COMPUTE_NATHAN
  1106. fprintf(stdout,"original cg=%-24.16G\n",
  1107. cg_1d((double (*)[SUPPORT])ctx[0].td.cov,f));
  1108. trans_data_collapse(&ctx[0].td,1,r);
  1109. fprintf(stdout,"collapse cg=%-24.16G\n",
  1110. cg_1d_collapsed(r,f));
  1111. #endif
  1112. fprintf(stdout,"monty cg=%-24.16G\n",
  1113. cg_1d_collapsed(cvs[0].cov,f));
  1114. #endif
  1115. for(i=0;i<NUM_PROCS;i++)
  1116. cov_clear(&cvs[i]);
  1117. #if COMPUTE_NATHAN
  1118. for(i=0;i<NUM_PROCS;i++)
  1119. trans_data_clear(&ctx[i].td);
  1120. #endif
  1121. return EXIT_SUCCESS;
  1122. }
  1123. #else
  1124. int main(int _argc,const char *_argv[]){
  1125. #if USE_2D
  1126. double cov[SUPPORT][SUPPORT];
  1127. double *r=&cov[0][0];
  1128. #else
  1129. double cov[SUPPORT];
  1130. double *r=&cov[0];
  1131. #endif
  1132. #if BLOCKSIZE==4
  1133. f=OD_FILTER_PARAMS4;
  1134. #elif BLOCKSIZE==8
  1135. f=OD_FILTER_PARAMS8;
  1136. #elif BLOCKSIZE==16
  1137. f=OD_FILTER_PARAMS16;
  1138. #else
  1139. # error "Need filter params for this block size."
  1140. #endif
  1141. # if USE_2D
  1142. auto_regressive_collapsed(r,SUPPORT*SUPPORT,SUPPORT,0.95);
  1143. fprintf(stdout,"AR p=.95 cg=%-24.18G\n",cg_2d_collapsed(cov,f));
  1144. # else
  1145. auto_regressive_collapsed(r,SUPPORT,1,0.95);
  1146. fprintf(stdout,"AR p=.95 cg=%-24.18G\n",cg_1d_collapsed(cov,f));
  1147. # endif
  1148. return EXIT_SUCCESS;
  1149. }
  1150. #endif