trans_gain.c 35 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364
  1. /* Daala video codec
  2. Copyright (c) 2013 Daala project contributors. All rights reserved.
  3. Redistribution and use in source and binary forms, with or without
  4. modification, are permitted provided that the following conditions are met:
  5. - Redistributions of source code must retain the above copyright notice, this
  6. list of conditions and the following disclaimer.
  7. - Redistributions in binary form must reproduce the above copyright notice,
  8. this list of conditions and the following disclaimer in the documentation
  9. and/or other materials provided with the distribution.
  10. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS”
  11. AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  12. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  13. DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
  14. FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  15. DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  16. SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  17. CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  18. OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  19. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
  20. /* 1D coding gain (dB) **********************
  21. AR p=.95 4x4 8x8 16x16
  22. ------------------------------------------
  23. KLT 7.5825 8.8462 9.4781
  24. DCT 7.5701 8.8259 9.4555
  25. CDF(9/7) 8.4687 9.4592 9.7866
  26. LappedKLT 8.5633 9.4908 9.8951
  27. LappedDCT 8.5523 9.4871 9.8929
  28. Subset 1 4x4 8x8 16x16
  29. ------------------------------------------
  30. KLT original 8.7714 10.2588 11.0039
  31. collapsed 8.7714 10.2588 11.0039
  32. monty 8.7654 10.2628 11.0292
  33. DCT 8.7620 10.2427 10.9861
  34. 8.7620 10.2427 10.9861
  35. 8.7561 10.2467 11.0115
  36. CDF(9/7) 9.3794 10.5932 11.0685
  37. 9.3845 10.5957 11.0825
  38. 9.4155 10.6576 11.1965
  39. LappedKLT 9.6276 10.7860 11.3254
  40. 9.6277 10.7867 11.3296
  41. 9.6295 10.8056 11.3722
  42. LappedDCT 9.6213 10.7832 11.3230
  43. 9.6214 10.7839 11.3272
  44. 9.6232 10.8028 11.3698
  45. Subset 3 4x4 8x8 16x16
  46. ------------------------------------------
  47. KLT original 10.5669 12.3711 13.2694
  48. collapsed 10.5669 12.3711 13.2694
  49. monty 10.5495 12.3573 13.2729
  50. DCT 10.5546 12.3532 13.2535
  51. 10.5547 12.3532 13.2535
  52. 10.5373 12.3395 13.2572
  53. CDF(9/7) 11.3102 12.6838 13.1845
  54. 11.3106 12.6871 13.2009
  55. 11.3389 12.7764 13.4084
  56. LappedKLT 11.6048 13.0138 13.6488
  57. 11.6046 13.0136 13.6491
  58. 11.5922 13.0126 13.6790
  59. LappedDCT 11.5970 13.0111 13.6464
  60. 11.5968 13.0110 13.6467
  61. 11.5844 13.0099 13.6766
  62. */
  63. /* 2D coding gain (dB) **********************
  64. AR p=.95 4x4 8x8 16x16
  65. ------------------------------------------
  66. KLT 15.1649 17.6924 18.9562
  67. DCT 15.1403 17.6518 18.9109
  68. CDF(9/7) 16.9374 18.9183 19.5731
  69. LappedKLT 17.1265 18.9816 19.7902
  70. LappedDCT 17.1047 18.9741 19.7858
  71. Subset 1 4x4 8x8 16x16
  72. ------------------------------------------
  73. KLT original 12.4432 ------- -------
  74. collapsed 12.4428 ------- -------
  75. monty 12.4732 13.6167 14.1170
  76. DCT 12.3695 ------- -------
  77. 12.3698 ------- -------
  78. 12.4182 13.5473 14.0536
  79. CDF(9/7) ------- ------- -------
  80. ------- ------- -------
  81. 13.1425 13.8184 14.0110
  82. LappedKLT 13.2807 ------- -------
  83. 13.2808 ------- -------
  84. 13.3452 14.1273 14.4041
  85. LappedDCT 13.2682 ------- -------
  86. 13.2685 ------- -------
  87. 13.3330 14.1215 14.3981
  88. Subset 3 4x4 8x8 16x16
  89. ------------------------------------------
  90. KLT monty 14.9078 16.2416 16.7839
  91. DCT 14.8313 16.1578 16.7221
  92. CDF(9/7) 15.7553 16.4760 16.6656
  93. LappedKLT 15.9763 16.8549 17.1181
  94. LappedDCT 15.9627 16.8507 17.1152
  95. */
  96. #ifdef HAVE_CONFIG_H
  97. #include "config.h"
  98. #endif
  99. #include <stdlib.h>
  100. #include "od_defs.h"
  101. #include "od_filter.h"
  102. #include "stats_tools.h"
  103. #include "trans_tools.h"
  104. #define BLOCKSIZE_LOG (4)
  105. #define USE_LAPPING (1)
  106. #define USE_KLT (1)
  107. #define USE_DCT (0)
  108. #define USE_WAVELET (0)
  109. #define USE_2D (1)
  110. #define USE_FILES (1)
  111. #define USE_AR95 (0)
  112. #define COMPUTE_NATHAN (1)
  113. #define PRINT_COV (0)
  114. #define BLOCKSIZE (1<<BLOCKSIZE_LOG)
  115. #if USE_WAVELET
  116. #if BLOCKSIZE_LOG==1
  117. # define SUPPORT (20)
  118. #else
  119. # if BLOCKSIZE_LOG==2
  120. # define SUPPORT (40)
  121. # else
  122. # if BLOCKSIZE_LOG==3
  123. # define SUPPORT (80)
  124. # else
  125. # if BLOCKSIZE_LOG==4
  126. # define SUPPORT (160)
  127. # else
  128. # error "no support configuration for transform size"
  129. # endif
  130. # endif
  131. # endif
  132. #endif
  133. #else
  134. #if USE_LAPPING||COMPUTE_NATHAN
  135. /* larger than needed for 'new' covariance code, but it won't alter
  136. the answer, just produce a larger than needed covariance matrix.
  137. It is needed to make the boundary conditions of the 'old'
  138. covariance code match the trans and trans2d utils */
  139. #define SUPPORT (BLOCKSIZE*2)
  140. #else
  141. #define SUPPORT (BLOCKSIZE)
  142. #endif
  143. #endif
  144. const int *f;
  145. typedef void (*ne_fdct_func_1d)(double *_out,const double *_in,int _in_stride);
  146. typedef void (*ne_idct_func_1d)(double *_out,int _out_stride,const double *_in);
  147. extern const ne_idct_func_1d OD_IDCT_1D_DOUBLE[OD_NBSIZES];
  148. extern const ne_fdct_func_1d OD_FDCT_1D_DOUBLE[OD_NBSIZES];
  149. #if USE_FILES
  150. typedef struct {
  151. int sz;
  152. u_int64_t *n;
  153. u_int64_t *acc_i;
  154. u_int64_t *acc_j;
  155. u_int64_t *acc_ij;
  156. double *cov;
  157. } cov_state;
  158. static void cov_init(cov_state *_this, int _sz){
  159. _this->sz = _sz;
  160. _this->n = calloc(_sz,sizeof(*_this->n));
  161. _this->acc_i = calloc(_sz,sizeof(*_this->acc_i));
  162. _this->acc_j = calloc(_sz,sizeof(*_this->acc_j));
  163. _this->acc_ij= calloc(_sz,sizeof(*_this->acc_ij));
  164. _this->cov = calloc(_sz,sizeof(*_this->cov));
  165. }
  166. static void cov_clear(cov_state *_this){
  167. if(_this){
  168. if(_this->n) free(_this->n);
  169. if(_this->acc_i) free(_this->acc_i);
  170. if(_this->acc_j) free(_this->acc_j);
  171. if(_this->acc_ij) free(_this->acc_ij);
  172. if(_this->cov) free(_this->cov);
  173. }
  174. }
  175. #if USE_2D
  176. /* 1D and 2D could both use the same generalized code, but it would be
  177. harder to read */
  178. static void cov_accumulate_2d(cov_state *_this,
  179. const unsigned char *_data,
  180. int _stride, int _w, int _h){
  181. int x,y,i,j;
  182. int sz = sqrt(_this->sz);
  183. for(i=0;i<sz;i++){
  184. for(j=0;j<sz;j++){
  185. int ij = i*sz+j;
  186. for(y=0;y<_h-i;y++){
  187. const unsigned char *di=_data+y*_stride;
  188. const unsigned char *dj=_data+(y+i)*_stride+j;
  189. for(x=0;x<_w-j;x++){
  190. ++_this->n[ij];
  191. _this->acc_i[ij] += di[x];
  192. _this->acc_j[ij] += dj[x];
  193. _this->acc_ij[ij] += di[x]*dj[x];
  194. }
  195. }
  196. }
  197. }
  198. }
  199. #else
  200. static void cov_accumulate_1d(cov_state *_this,
  201. const unsigned char *_data,
  202. int _stride, int _n){
  203. int i,j;
  204. for(i=0;i<_this->sz;i++){
  205. const unsigned char *di=_data;
  206. const unsigned char *dj=_data+i*_stride;
  207. for(j=0;j<_n-i;j++){
  208. ++_this->n[i];
  209. _this->acc_i[i] += di[j*_stride];
  210. _this->acc_j[i] += dj[j*_stride];
  211. _this->acc_ij[i] += di[j*_stride]*dj[j*_stride];
  212. }
  213. }
  214. }
  215. #endif
  216. static void cov_combine(cov_state *_a,const cov_state *_b){
  217. int i;
  218. for(i=0;i<_a->sz;i++){
  219. _a->acc_i[i] += _b->acc_i[i];
  220. _a->acc_j[i] += _b->acc_j[i];
  221. _a->acc_ij[i] += _b->acc_ij[i];
  222. _a->n[i] += _b->n[i];
  223. }
  224. }
  225. static void cov_compute(cov_state *_this){
  226. int i;
  227. for(i=0;i<_this->sz;i++)
  228. _this->cov[i] =
  229. ((double)_this->acc_ij[i] -
  230. (double)_this->acc_i[i]*
  231. _this->acc_j[i]/_this->n[i])/_this->n[i];
  232. for(i=1;i<_this->sz;i++)
  233. _this->cov[i] /= _this->cov[0];
  234. _this->cov[0]=1.;
  235. }
  236. static void process_files(trans_ctx *_ctx,
  237. cov_state *_cov,
  238. int _argc,
  239. const char *_argv[]){
  240. int ai;
  241. #pragma omp parallel for schedule(dynamic)
  242. for(ai=1;ai<_argc;ai++){
  243. FILE *fin;
  244. video_input vid;
  245. video_input_info info;
  246. video_input_ycbcr ycbcr;
  247. int tid;
  248. cov_state *cov;
  249. int x0,y0,x1,y1;
  250. fin=fopen(_argv[ai],"rb");
  251. if(fin==NULL){
  252. fprintf(stderr,"Could not open '%s' for reading.\n",_argv[ai]);
  253. continue;
  254. }
  255. if(video_input_open(&vid,fin)<0){
  256. fprintf(stderr,"Error reading video info from '%s'.\n",_argv[ai]);
  257. continue;
  258. }
  259. video_input_get_info(&vid,&info);
  260. if(video_input_fetch_frame(&vid,ycbcr,NULL)<0){
  261. fprintf(stderr,"Error reading first frame from '%s'.\n",_argv[ai]);
  262. continue;
  263. }
  264. tid=OD_OMP_GET_THREAD;
  265. cov=_cov+tid;
  266. x0 = info.pic_x;
  267. y0 = info.pic_y;
  268. x1 = x0 + info.pic_w;
  269. y1 = y0 + info.pic_h;
  270. fprintf(stderr,"%s\n",_argv[ai]);
  271. /* map */
  272. {
  273. int stride=ycbcr[0].stride;
  274. const unsigned char *data=ycbcr[0].data;
  275. #if COMPUTE_NATHAN
  276. /* block-based full covariance computation (unlord style) */
  277. int nxblocks=info.pic_w>>BLOCKSIZE_LOG;
  278. int nyblocks=info.pic_h>>BLOCKSIZE_LOG;
  279. trans_ctx *ctx=_ctx+tid;
  280. # if USE_2D
  281. unsigned char buf[SUPPORT][SUPPORT];
  282. int x,y,i,j;
  283. image_ctx_init(&ctx->img,_argv[ai],nxblocks,nyblocks);
  284. for(y=0;y<nyblocks*BLOCKSIZE-SUPPORT+1;y++){
  285. for(x=0;x<nxblocks*BLOCKSIZE-SUPPORT+1;x++){
  286. for(j=0;j<SUPPORT;j++){
  287. for(i=0;i<SUPPORT;i++){
  288. buf[j][i]=data[(y0+y+j)*stride+(x0+x+i)];
  289. }
  290. }
  291. trans_data_add(&ctx->td,(unsigned char *)buf);
  292. }
  293. }
  294. # else
  295. unsigned char buf[SUPPORT];
  296. int x,y,z;
  297. image_ctx_init(&ctx->img,_argv[ai],nxblocks,nyblocks);
  298. /* add the rows */
  299. for(y=0;y<nyblocks*BLOCKSIZE;y++){
  300. for(x=0;x<nxblocks*BLOCKSIZE-SUPPORT+1;x++){
  301. for(z=0;z<SUPPORT;z++){
  302. buf[z]=data[(y+y0)*stride+x+x0+z];
  303. }
  304. trans_data_add(&ctx->td,buf);
  305. }
  306. }
  307. /* add the columns */
  308. for(y=0;y<nyblocks*BLOCKSIZE-SUPPORT+1;y++){
  309. for(x=0;x<nxblocks*BLOCKSIZE;x++){
  310. for(z=0;z<SUPPORT;z++){
  311. buf[z]=data[(y0+y+z)*stride+x+x0];
  312. }
  313. trans_data_add(&ctx->td,buf);
  314. }
  315. }
  316. # endif
  317. #endif
  318. /* Direct computation of collapsed covariance matrix (monty style) */
  319. #if USE_2D
  320. cov_accumulate_2d(cov,data+y0*stride+x0,stride,x1-x0,y1-y0);
  321. #else
  322. {
  323. int x,y;
  324. for(y=y0;y<y1;y++)
  325. cov_accumulate_1d(cov,data+y*stride+x0,1,x1-x0);
  326. for(x=x0;x<x1;x++)
  327. cov_accumulate_1d(cov,data+y0*stride+x,stride,y1-y0);
  328. }
  329. #endif
  330. }
  331. video_input_close(&vid);
  332. }
  333. }
  334. #endif
  335. #if USE_WAVELET
  336. /* some lifting CDF (9/7) wavelet code from Google Code's axonlib */
  337. /* http://code.google.com/p/axonlib/source/browse/trunk/extern/dwt97.c?spec=svn19&r=19 */
  338. /* single stage of decomposition */
  339. static void fwt97_i(double* x,int n){
  340. double temp[SUPPORT];
  341. double a;
  342. int i;
  343. /* Predict 1 */
  344. a=-1.586134342;
  345. for (i=1;i<n-2;i+=2)
  346. x[i]+=a*(x[i-1]+x[i+1]);
  347. x[n-1]+=2*a*x[n-2];
  348. /* Update 1 */
  349. a=-0.05298011854;
  350. for (i=2;i<n;i+=2)
  351. x[i]+=a*(x[i-1]+x[i+1]);
  352. x[0]+=2*a*x[1];
  353. /* Predict 2 */
  354. a=0.8829110762;
  355. for (i=1;i<n-2;i+=2)
  356. x[i]+=a*(x[i-1]+x[i+1]);
  357. x[n-1]+=2*a*x[n-2];
  358. /* Update 2 */
  359. a=0.4435068522;
  360. for (i=2;i<n;i+=2)
  361. x[i]+=a*(x[i-1]+x[i+1]);
  362. x[0]+=2*a*x[1];
  363. /* Scale */
  364. a=1/1.149604398;
  365. for (i=0;i<n;i++)
  366. {
  367. if (i%2) x[i]*=a;
  368. else x[i]/=a;
  369. }
  370. /* Pack */
  371. for (i=0;i<n;i++){
  372. if (i%2==0)
  373. temp[i/2]=x[i];
  374. else
  375. temp[n/2+i/2]=x[i];
  376. }
  377. for (i=0;i<n;i++) x[i]=temp[i];
  378. }
  379. /* single stage of reconstruction */
  380. void iwt97_i(double* x,int n){
  381. double temp[SUPPORT];
  382. double a;
  383. int i;
  384. /* Unpack */
  385. for (i=0;i<n/2;i++){
  386. temp[i*2]=x[i];
  387. temp[i*2+1]=x[i+n/2];
  388. }
  389. for (i=0;i<n;i++) x[i]=temp[i];
  390. /* Undo scale */
  391. a=1.149604398;
  392. for (i=0;i<n;i++)
  393. {
  394. if (i%2) x[i]*=a;
  395. else x[i]/=a;
  396. }
  397. /* Undo update 2 */
  398. a=-0.4435068522;
  399. for (i=2;i<n;i+=2)
  400. x[i]+=a*(x[i-1]+x[i+1]);
  401. x[0]+=2*a*x[1];
  402. /* Undo predict 2 */
  403. a=-0.8829110762;
  404. for (i=1;i<n-2;i+=2)
  405. x[i]+=a*(x[i-1]+x[i+1]);
  406. x[n-1]+=2*a*x[n-2];
  407. /* Undo update 1 */
  408. a=0.05298011854;
  409. for (i=2;i<n;i+=2)
  410. x[i]+=a*(x[i-1]+x[i+1]);
  411. x[0]+=2*a*x[1];
  412. /* Undo predict 1 */
  413. a=1.586134342;
  414. for (i=1;i<n-2;i+=2)
  415. x[i]+=a*(x[i-1]+x[i+1]);
  416. x[n-1]+=2*a*x[n-2];
  417. }
  418. /* multistage decomposition */
  419. void fwt97(double *out, int n, double *in, int support){
  420. int i=n,j=support,k;
  421. while((i&1)==0){
  422. fwt97_i(in,j);
  423. i>>=1;
  424. for(k=0;k<i;k++)
  425. out[i+k] = in[((((j*3)>>1)-i)>>1) + k];
  426. j>>=1;
  427. }
  428. for(k=0;k<i;k++)
  429. out[k] = in[((j-i)>>1) + k];
  430. }
  431. /* multistage reconstruction */
  432. void iwt97(double *out, int support, double *in, int n){
  433. int i=n,j=support,k;
  434. for(k=0;k<support;k++)
  435. out[k]=0;
  436. while((i&1)==0){
  437. i>>=1;
  438. for(k=0;k<i;k++)
  439. out[((((j*3)>>1)-i)>>1) + k]=in[i+k];
  440. j>>=1;
  441. }
  442. for(k=0;k<i;k++)
  443. out[((j-i)>>1) + k]=in[k];
  444. i<<=1;
  445. j<<=1;
  446. while(j<=support){
  447. iwt97_i(out,j);
  448. i<<=1;
  449. j<<=1;
  450. }
  451. }
  452. #endif
  453. #if USE_KLT
  454. void symeigen(double *out,
  455. double *cov,
  456. int support){
  457. int i;
  458. int j;
  459. int k;
  460. for(i=0;i<support;i++)
  461. for(j=0;j<support;j++)
  462. out[i*support+j]=i==j;
  463. for(;;){
  464. double mod=0.;
  465. for(i=0,j=0,k=0;k<support;k++){
  466. int m;
  467. for(m=k+1;m<support;m++){
  468. double q;
  469. q=fabs(cov[k*support+m]);
  470. if(q>mod){
  471. mod=q;
  472. i=k;
  473. j=m;
  474. }
  475. }
  476. }
  477. if(mod<1E-11)break;
  478. {
  479. double th=0.5*atan2(2*cov[i*support+j],cov[i*support+i]-cov[j*support+j]);
  480. double c=cos(th);
  481. double s=sin(th);
  482. for(k=0;k<support;k++){
  483. double t;
  484. t=c*cov[k*support+i]+s*cov[k*support+j];
  485. cov[k*support+j]=-s*cov[k*support+i]+c*cov[k*support+j];
  486. cov[k*support+i]=t;
  487. }
  488. for(k=0;k<support;k++){
  489. double t;
  490. t=c*cov[i*support+k]+s*cov[j*support+k];
  491. cov[j*support+k]=-s*cov[i*support+k]+c*cov[j*support+k];
  492. cov[i*support+k]=t;
  493. }
  494. for(k=0;k<support;k++){
  495. double t;
  496. t=c*out[i*support+k]+s*out[j*support+k];
  497. out[j*support+k]=-s*out[i*support+k]+c*out[j*support+k];
  498. out[i*support+k]=t;
  499. }
  500. }
  501. }
  502. /* for(j=0;j<BLOCKSIZE;j++)eigenvalue[j]=cov[j][j]; don't need eigenvalues */
  503. }
  504. void flap_2d(double out[BLOCKSIZE][BLOCKSIZE],
  505. double in[SUPPORT][SUPPORT],
  506. const int _f[]){
  507. int i,j;
  508. #if USE_LAPPING
  509. #if BLOCKSIZE_LOG>=OD_LOG_BSIZE0&&BLOCKSIZE_LOG<OD_LOG_BSIZE0+OD_NBSIZES
  510. /* columns */
  511. for(i=SUPPORT/2-BLOCKSIZE;i<SUPPORT/2+BLOCKSIZE;i++){
  512. double work[BLOCKSIZE*2];
  513. for(j=0;j<BLOCKSIZE*2;j++)
  514. work[j]=in[j+SUPPORT/2-BLOCKSIZE][i];
  515. (*NE_PRE_FILTER_DOUBLE[BLOCKSIZE_LOG-OD_LOG_BSIZE0])
  516. (&work[0],&work[0],_f);
  517. (*NE_PRE_FILTER_DOUBLE[BLOCKSIZE_LOG-OD_LOG_BSIZE0])
  518. (&work[BLOCKSIZE],&work[BLOCKSIZE],_f);
  519. for(j=0;j<BLOCKSIZE*2;j++)
  520. in[j+SUPPORT/2-BLOCKSIZE][i]=work[j];
  521. }
  522. /* rows */
  523. for(i=SUPPORT/2-BLOCKSIZE;i<SUPPORT/2+BLOCKSIZE;i++){
  524. (*NE_PRE_FILTER_DOUBLE[BLOCKSIZE_LOG-OD_LOG_BSIZE0])
  525. (&in[i][SUPPORT/2-BLOCKSIZE],&in[i][SUPPORT/2-BLOCKSIZE],_f);
  526. (*NE_PRE_FILTER_DOUBLE[BLOCKSIZE_LOG-OD_LOG_BSIZE0])
  527. (&in[i][SUPPORT/2],&in[i][SUPPORT/2],_f);
  528. }
  529. #else
  530. # error "Need a prefilter implementation for this block size."
  531. #endif
  532. #endif
  533. for(i=0;i<BLOCKSIZE;i++)
  534. for(j=0;j<BLOCKSIZE;j++)
  535. out[i][j]=in[i+SUPPORT/2-BLOCKSIZE/2][j+SUPPORT/2-BLOCKSIZE/2];
  536. }
  537. void ilap_2d(double out[SUPPORT][SUPPORT],
  538. double in[BLOCKSIZE][BLOCKSIZE],
  539. const int _f[]){
  540. int i,j;
  541. for(i=0;i<SUPPORT;i++)
  542. for(j=0;j<SUPPORT;j++)
  543. out[i][j]=0;
  544. for(i=0;i<BLOCKSIZE;i++)
  545. for(j=0;j<BLOCKSIZE;j++)
  546. out[i+SUPPORT/2-BLOCKSIZE/2][j+SUPPORT/2-BLOCKSIZE/2]=in[i][j];
  547. #if USE_LAPPING
  548. #if BLOCKSIZE_LOG>=OD_LOG_BSIZE0&&BLOCKSIZE_LOG<OD_LOG_BSIZE0+OD_NBSIZES
  549. /* columns */
  550. for(i=SUPPORT/2-BLOCKSIZE;i<SUPPORT/2+BLOCKSIZE;i++){
  551. double work[BLOCKSIZE*2];
  552. for(j=0;j<BLOCKSIZE*2;j++)
  553. work[j]=out[j+SUPPORT/2-BLOCKSIZE][i];
  554. (*NE_POST_FILTER_DOUBLE[BLOCKSIZE_LOG-OD_LOG_BSIZE0])
  555. (&work[0],&work[0],_f);
  556. (*NE_POST_FILTER_DOUBLE[BLOCKSIZE_LOG-OD_LOG_BSIZE0])
  557. (&work[BLOCKSIZE],&work[BLOCKSIZE],_f);
  558. for(j=0;j<BLOCKSIZE*2;j++)
  559. out[j+SUPPORT/2-BLOCKSIZE][i]=work[j];
  560. }
  561. /* rows */
  562. for(i=SUPPORT/2-BLOCKSIZE;i<SUPPORT/2+BLOCKSIZE;i++){
  563. (*NE_POST_FILTER_DOUBLE[BLOCKSIZE_LOG-OD_LOG_BSIZE0])
  564. (&out[i][SUPPORT/2-BLOCKSIZE],&out[i][SUPPORT/2-BLOCKSIZE],_f);
  565. (*NE_POST_FILTER_DOUBLE[BLOCKSIZE_LOG-OD_LOG_BSIZE0])
  566. (&out[i][SUPPORT/2],&out[i][SUPPORT/2],_f);
  567. }
  568. #else
  569. # error "Need a prefilter implementation for this block size."
  570. #endif
  571. #endif
  572. }
  573. void flap_4d(double out[BLOCKSIZE*BLOCKSIZE][BLOCKSIZE*BLOCKSIZE],
  574. double in[SUPPORT][SUPPORT][SUPPORT][SUPPORT],
  575. const int _f[]){
  576. int i,j,k,l;
  577. #if USE_LAPPING
  578. #if BLOCKSIZE_LOG>=OD_LOG_BSIZE0&&BLOCKSIZE_LOG<OD_LOG_BSIZE0+OD_NBSIZES
  579. for(i=SUPPORT/2-BLOCKSIZE;i<SUPPORT/2+BLOCKSIZE;i++){
  580. for(j=SUPPORT/2-BLOCKSIZE;j<SUPPORT/2+BLOCKSIZE;j++){
  581. for(k=SUPPORT/2-BLOCKSIZE;k<SUPPORT/2+BLOCKSIZE;k++){
  582. double work[BLOCKSIZE*2];
  583. /* [ ][i][j][k] */
  584. for(l=0;l<BLOCKSIZE*2;l++)
  585. work[l]=in[l+SUPPORT/2-BLOCKSIZE][i][j][k];
  586. (*NE_PRE_FILTER_DOUBLE[BLOCKSIZE_LOG-OD_LOG_BSIZE0])
  587. (&work[0],&work[0],_f);
  588. (*NE_PRE_FILTER_DOUBLE[BLOCKSIZE_LOG-OD_LOG_BSIZE0])
  589. (&work[BLOCKSIZE],&work[BLOCKSIZE],_f);
  590. for(l=0;l<BLOCKSIZE*2;l++)
  591. in[l+SUPPORT/2-BLOCKSIZE][i][j][k]=work[l];
  592. }
  593. }
  594. }
  595. for(i=SUPPORT/2-BLOCKSIZE;i<SUPPORT/2+BLOCKSIZE;i++){
  596. for(j=SUPPORT/2-BLOCKSIZE;j<SUPPORT/2+BLOCKSIZE;j++){
  597. for(k=SUPPORT/2-BLOCKSIZE;k<SUPPORT/2+BLOCKSIZE;k++){
  598. double work[BLOCKSIZE*2];
  599. /* [i][ ][j][k] */
  600. for(l=0;l<BLOCKSIZE*2;l++)
  601. work[l]=in[i][l+SUPPORT/2-BLOCKSIZE][j][k];
  602. (*NE_PRE_FILTER_DOUBLE[BLOCKSIZE_LOG-OD_LOG_BSIZE0])
  603. (&work[0],&work[0],_f);
  604. (*NE_PRE_FILTER_DOUBLE[BLOCKSIZE_LOG-OD_LOG_BSIZE0])
  605. (&work[BLOCKSIZE],&work[BLOCKSIZE],_f);
  606. for(l=0;l<BLOCKSIZE*2;l++)
  607. in[i][l+SUPPORT/2-BLOCKSIZE][j][k]=work[l];
  608. }
  609. }
  610. }
  611. for(i=SUPPORT/2-BLOCKSIZE;i<SUPPORT/2+BLOCKSIZE;i++){
  612. for(j=SUPPORT/2-BLOCKSIZE;j<SUPPORT/2+BLOCKSIZE;j++){
  613. for(k=SUPPORT/2-BLOCKSIZE;k<SUPPORT/2+BLOCKSIZE;k++){
  614. double work[BLOCKSIZE*2];
  615. /* [i][j][ ][k] */
  616. for(l=0;l<BLOCKSIZE*2;l++)
  617. work[l]=in[i][j][l+SUPPORT/2-BLOCKSIZE][k];
  618. (*NE_PRE_FILTER_DOUBLE[BLOCKSIZE_LOG-OD_LOG_BSIZE0])
  619. (&work[0],&work[0],_f);
  620. (*NE_PRE_FILTER_DOUBLE[BLOCKSIZE_LOG-OD_LOG_BSIZE0])
  621. (&work[BLOCKSIZE],&work[BLOCKSIZE],_f);
  622. for(l=0;l<BLOCKSIZE*2;l++)
  623. in[i][j][l+SUPPORT/2-BLOCKSIZE][k]=work[l];
  624. }
  625. }
  626. }
  627. for(i=SUPPORT/2-BLOCKSIZE;i<SUPPORT/2+BLOCKSIZE;i++){
  628. for(j=SUPPORT/2-BLOCKSIZE;j<SUPPORT/2+BLOCKSIZE;j++){
  629. for(k=SUPPORT/2-BLOCKSIZE;k<SUPPORT/2+BLOCKSIZE;k++){
  630. /* [i][j][k][ ] */
  631. (*NE_PRE_FILTER_DOUBLE[BLOCKSIZE_LOG-OD_LOG_BSIZE0])
  632. (&in[i][j][k][SUPPORT/2-BLOCKSIZE],&in[i][j][k][SUPPORT/2-BLOCKSIZE],_f);
  633. (*NE_PRE_FILTER_DOUBLE[BLOCKSIZE_LOG-OD_LOG_BSIZE0])
  634. (&in[i][j][k][SUPPORT/2],&in[i][j][k][SUPPORT/2],_f);
  635. }
  636. }
  637. }
  638. #else
  639. # error "Need a prefilter implementation for this block size."
  640. #endif
  641. #endif
  642. for(i=0;i<BLOCKSIZE;i++)
  643. for(j=0;j<BLOCKSIZE;j++)
  644. for(k=0;k<BLOCKSIZE;k++)
  645. for(l=0;l<BLOCKSIZE;l++)
  646. out[i*BLOCKSIZE+j][k*BLOCKSIZE+l]=in
  647. [i+SUPPORT/2-BLOCKSIZE/2]
  648. [j+SUPPORT/2-BLOCKSIZE/2]
  649. [k+SUPPORT/2-BLOCKSIZE/2]
  650. [l+SUPPORT/2-BLOCKSIZE/2];
  651. }
  652. void gklt_1d(double klt[BLOCKSIZE][BLOCKSIZE],
  653. double cov[SUPPORT][SUPPORT],
  654. const int *_f){
  655. static double workA[SUPPORT][SUPPORT];
  656. static double workB[BLOCKSIZE][BLOCKSIZE];
  657. int i,j;
  658. for(i=0;i<SUPPORT;i++)
  659. for(j=0;j<SUPPORT;j++)
  660. workA[i][j]=cov[i][j];
  661. flap_2d(workB,workA,_f);
  662. symeigen(&klt[0][0],&workB[0][0],BLOCKSIZE);
  663. }
  664. void gklt_2d(double klt[BLOCKSIZE*BLOCKSIZE][BLOCKSIZE*BLOCKSIZE],
  665. double cov[SUPPORT][SUPPORT][SUPPORT][SUPPORT],
  666. const int *_f){
  667. static double workA[SUPPORT][SUPPORT][SUPPORT][SUPPORT];
  668. static double workB[BLOCKSIZE*BLOCKSIZE][BLOCKSIZE*BLOCKSIZE];
  669. int i,j,k,l;
  670. for(i=0;i<SUPPORT;i++)
  671. for(j=0;j<SUPPORT;j++)
  672. for(k=0;k<SUPPORT;k++)
  673. for(l=0;l<SUPPORT;l++)
  674. workA[i][j][k][l]=cov[i][j][k][l];
  675. flap_4d(workB,workA,_f);
  676. symeigen(&klt[0][0],&workB[0][0],BLOCKSIZE*BLOCKSIZE);
  677. }
  678. void gklt_1d_collapsed(double klt[BLOCKSIZE][BLOCKSIZE],
  679. double cov[SUPPORT],
  680. const int *_f){
  681. static double workA[SUPPORT][SUPPORT];
  682. static double workB[BLOCKSIZE][BLOCKSIZE];
  683. int i,j;
  684. for(i=0;i<SUPPORT;i++)
  685. for(j=0;j<SUPPORT;j++)
  686. workA[i][j]=cov[abs(i-j)];
  687. flap_2d(workB,workA,_f);
  688. symeigen(&klt[0][0],&workB[0][0],BLOCKSIZE);
  689. }
  690. void gklt_2d_collapsed(double klt[BLOCKSIZE*BLOCKSIZE][BLOCKSIZE*BLOCKSIZE],
  691. double cov[SUPPORT][SUPPORT],
  692. const int *_f){
  693. static double workA[SUPPORT][SUPPORT][SUPPORT][SUPPORT];
  694. static double workB[BLOCKSIZE*BLOCKSIZE][BLOCKSIZE*BLOCKSIZE];
  695. int i,j,k,l;
  696. for(i=0;i<SUPPORT;i++)
  697. for(j=0;j<SUPPORT;j++)
  698. for(k=0;k<SUPPORT;k++)
  699. for(l=0;l<SUPPORT;l++)
  700. workA[i][j][k][l]=cov[abs(i-k)][abs(j-l)];
  701. flap_4d(workB,workA,_f);
  702. symeigen(&klt[0][0],&workB[0][0],BLOCKSIZE*BLOCKSIZE);
  703. }
  704. void fklt(double *out,
  705. double *in,
  706. double *klt,
  707. int support){
  708. int i,j;
  709. for(i=0;i<support;i++){
  710. double acc=0.;
  711. for(j=0;j<support;j++)
  712. acc += klt[i*support+j]*in[j];
  713. out[i]=acc;
  714. }
  715. }
  716. void iklt(double *out,
  717. double *in,
  718. double *klt,
  719. int support){
  720. int i,j;
  721. for(i=0;i<support;i++){
  722. double acc=0.;
  723. for(j=0;j<support;j++)
  724. acc+=klt[j*support+i]*in[j];
  725. out[i]=acc;
  726. }
  727. }
  728. #endif
  729. void b_analysis_1d(double *_out,int _out_stride,const double *_in,int _in_stride,
  730. const int *_f, double _klt[BLOCKSIZE][BLOCKSIZE]){
  731. int j;
  732. double t[SUPPORT];
  733. double w[BLOCKSIZE];
  734. for(j=0;j<SUPPORT;j++)
  735. t[j]=_in[j*_in_stride];
  736. #if USE_WAVELET
  737. fwt97(w,BLOCKSIZE,t,SUPPORT);
  738. for(j=0;j<BLOCKSIZE;j++){
  739. _out[j*_out_stride]=w[j];
  740. }
  741. #else
  742. # if USE_LAPPING
  743. # if BLOCKSIZE_LOG>=OD_LOG_BSIZE0&&BLOCKSIZE_LOG<OD_LOG_BSIZE0+OD_NBSIZES
  744. (*NE_PRE_FILTER_DOUBLE[BLOCKSIZE_LOG-OD_LOG_BSIZE0])
  745. (&t[SUPPORT/2-BLOCKSIZE],&t[SUPPORT/2-BLOCKSIZE],_f);
  746. (*NE_PRE_FILTER_DOUBLE[BLOCKSIZE_LOG-OD_LOG_BSIZE0])
  747. (&t[SUPPORT/2],&t[SUPPORT/2],_f);
  748. # else
  749. # error "Need a prefilter implementation for this block size."
  750. # endif
  751. # endif
  752. # if USE_KLT
  753. fklt(&w[0],&t[SUPPORT/2-BLOCKSIZE/2],&_klt[0][0],BLOCKSIZE);
  754. # elif USE_DCT
  755. # if BLOCKSIZE_LOG>=OD_LOG_BSIZE0&&BLOCKSIZE_LOG<OD_LOG_BSIZE0+OD_NBSIZES
  756. (*OD_FDCT_1D_DOUBLE[BLOCKSIZE_LOG-OD_LOG_BSIZE0])
  757. (w,&t[SUPPORT/2-BLOCKSIZE/2],1);
  758. # else
  759. # error "Need an fDCT implementation for this block size."
  760. # endif
  761. # else
  762. for(j=0;j<BLOCKSIZE;j++)
  763. w[j]=t[j+SUPPORT/2-BLOCKSIZE/2];
  764. # endif
  765. for(j=0;j<BLOCKSIZE;j++)
  766. _out[j*_out_stride]=w[j];
  767. #endif
  768. }
  769. void b_analysis_2d(double *_out,int _out_stride_i,int _out_stride_j,
  770. const double *_in,int _in_stride_i,int _in_stride_j,
  771. const int *_f, double _klt[BLOCKSIZE*BLOCKSIZE][BLOCKSIZE*BLOCKSIZE]){
  772. #if USE_KLT
  773. /* KLT is a non-separable 2D transform */
  774. double lap[SUPPORT][SUPPORT];
  775. double work[BLOCKSIZE][BLOCKSIZE];
  776. double temp[BLOCKSIZE][BLOCKSIZE];
  777. int i,j;
  778. for(i=0;i<SUPPORT;i++)
  779. for(j=0;j<SUPPORT;j++)
  780. lap[i][j]=*(_in+i*_in_stride_i+j*_in_stride_j);
  781. flap_2d(work,lap,_f);
  782. fklt(&temp[0][0],&work[0][0],&_klt[0][0],BLOCKSIZE*BLOCKSIZE);
  783. for(i=0;i<BLOCKSIZE;i++)
  784. for(j=0;j<BLOCKSIZE;j++)
  785. *(_out+i*_out_stride_i+j*_out_stride_j)=temp[i][j];
  786. #else
  787. double work[SUPPORT][BLOCKSIZE];
  788. int i;
  789. /* DCT and DWT are separable 1D transforms */
  790. /* lapping performed inside b_analysis */
  791. for(i=0;i<SUPPORT;i++)
  792. b_analysis_1d(&work[i][0],1,_in+i*_in_stride_i,_in_stride_j,_f,NULL);
  793. for(i=0;i<BLOCKSIZE;i++)
  794. b_analysis_1d(_out+_out_stride_i*i,_out_stride_j,&work[0][i],BLOCKSIZE,_f,NULL);
  795. #endif
  796. }
  797. void b_synthesis_1d(double *_out,int _out_stride,const double *_in,int _in_stride,
  798. const int *_f, double _klt[BLOCKSIZE][BLOCKSIZE]){
  799. int j;
  800. double w[SUPPORT];
  801. double t[SUPPORT];
  802. for(j=0;j<SUPPORT;j++){
  803. t[j]=0;
  804. w[j]=0;
  805. }
  806. #if USE_WAVELET
  807. for(j=0;j<BLOCKSIZE;j++)
  808. w[j]=_in[j*_in_stride];
  809. iwt97(t,SUPPORT,w,BLOCKSIZE);
  810. #else
  811. for(j=0;j<BLOCKSIZE;j++){
  812. w[SUPPORT/2-BLOCKSIZE/2+j]=_in[j*_in_stride];
  813. }
  814. # if USE_KLT
  815. iklt(&t[SUPPORT/2-BLOCKSIZE/2],&w[SUPPORT/2-BLOCKSIZE/2],&_klt[0][0],BLOCKSIZE);
  816. # elif USE_DCT
  817. # if BLOCKSIZE_LOG>=OD_LOG_BSIZE0&&BLOCKSIZE_LOG<OD_LOG_BSIZE0+OD_NBSIZES
  818. (*OD_IDCT_1D_DOUBLE[BLOCKSIZE_LOG-OD_LOG_BSIZE0])
  819. (&t[SUPPORT/2-BLOCKSIZE/2],1,&w[SUPPORT/2-BLOCKSIZE/2]);
  820. # else
  821. # error "Need an iDCT implementation for this block size."
  822. # endif
  823. # else
  824. for(j=0;j<SUPPORT;j++)
  825. t[j]=w[j];
  826. # endif
  827. # if USE_LAPPING
  828. # if BLOCKSIZE_LOG>=OD_LOG_BSIZE0&&BLOCKSIZE_LOG<OD_LOG_BSIZE0+OD_NBSIZES
  829. (*NE_POST_FILTER_DOUBLE[BLOCKSIZE_LOG-OD_LOG_BSIZE0])
  830. (&t[SUPPORT/2-BLOCKSIZE],&t[SUPPORT/2-BLOCKSIZE],_f);
  831. (*NE_POST_FILTER_DOUBLE[BLOCKSIZE_LOG-OD_LOG_BSIZE0])
  832. (&t[SUPPORT/2],&t[SUPPORT/2],_f);
  833. # else
  834. # error "Need a postfilter implementation for this block size."
  835. # endif
  836. # endif
  837. #endif
  838. for(j=0;j<SUPPORT;j++)
  839. _out[j*_out_stride]=t[j];
  840. }
  841. void b_synthesis_2d(double *_out,int _out_stride_i,int _out_stride_j,
  842. const double *_in,int _in_stride_i,int _in_stride_j,
  843. const int *_f, double _klt[BLOCKSIZE*BLOCKSIZE][BLOCKSIZE*BLOCKSIZE]){
  844. #if USE_KLT
  845. /* KLT is a non-separable 2D transform */
  846. double temp[BLOCKSIZE][BLOCKSIZE];
  847. double work[BLOCKSIZE][BLOCKSIZE];
  848. double lap[SUPPORT][SUPPORT];
  849. int i,j;
  850. for(i=0;i<BLOCKSIZE;i++)
  851. for(j=0;j<BLOCKSIZE;j++)
  852. temp[i][j]=*(_in+i*_in_stride_i+j*_in_stride_j);
  853. iklt(&work[0][0],&temp[0][0],&_klt[0][0],BLOCKSIZE*BLOCKSIZE);
  854. ilap_2d(lap,work,_f);
  855. for(i=0;i<SUPPORT;i++)
  856. for(j=0;j<SUPPORT;j++)
  857. *(_out+i*_out_stride_i+j*_out_stride_j)=lap[i][j];
  858. #else
  859. double work[SUPPORT][BLOCKSIZE];
  860. int i;
  861. /* DCT and DWT are separable 1D transforms */
  862. /* lapping performed inside b_analysis */
  863. for(i=0;i<BLOCKSIZE;i++)
  864. b_synthesis_1d(&work[0][i],BLOCKSIZE,_in+i*_in_stride_i,_in_stride_j,_f,NULL);
  865. for(i=0;i<SUPPORT;i++)
  866. b_synthesis_1d(_out+_out_stride_i*i,_out_stride_j,&work[i][0],1,_f,NULL);
  867. #endif
  868. }
  869. #if USE_2D
  870. static double cg_2d_i(double rggt[SUPPORT][SUPPORT][BLOCKSIZE][BLOCKSIZE],
  871. const int *_f,
  872. double _klt[BLOCKSIZE*BLOCKSIZE][BLOCKSIZE*BLOCKSIZE]){
  873. double r[BLOCKSIZE][BLOCKSIZE][BLOCKSIZE][BLOCKSIZE];
  874. double s[SUPPORT][BLOCKSIZE];
  875. double ggrggt[BLOCKSIZE][BLOCKSIZE][BLOCKSIZE][BLOCKSIZE];
  876. double cg=0;
  877. int i;
  878. int j;
  879. int v;
  880. int u;
  881. int k;
  882. int l;
  883. /* G1*P*G2*R*(G2*P*G1)^T */
  884. for(v=0;v<BLOCKSIZE;v++)
  885. for(j=0;j<BLOCKSIZE;j++)
  886. b_analysis_2d(&ggrggt[v][j][0][0],
  887. 1,BLOCKSIZE,
  888. &rggt[0][0][v][j],
  889. BLOCKSIZE*BLOCKSIZE*SUPPORT,
  890. BLOCKSIZE*BLOCKSIZE,
  891. f,_klt);
  892. /* H1*P*H2 */
  893. for(i=0;i<BLOCKSIZE;i++)
  894. for(j=0;j<BLOCKSIZE;j++)
  895. for(k=0;k<BLOCKSIZE;k++)
  896. for(l=0;l<BLOCKSIZE;l++)
  897. r[i][j][k][l] = (i*BLOCKSIZE+j==k*BLOCKSIZE+l)?1:0;
  898. for(i=0;i<BLOCKSIZE;i++)
  899. for(j=0;j<BLOCKSIZE;j++)
  900. b_synthesis_2d(&rggt[0][0][i][j],
  901. BLOCKSIZE*BLOCKSIZE,
  902. SUPPORT*BLOCKSIZE*BLOCKSIZE,
  903. &r[i][j][0][0],
  904. BLOCKSIZE,1,
  905. _f,_klt);
  906. /* ((H1*P*H2)^T*H1*P*H2)_ii */
  907. for(i=0;i<BLOCKSIZE;i++){
  908. for(j=0;j<BLOCKSIZE;j++){
  909. s[i][j]=0;
  910. for(u=0;u<SUPPORT;u++){
  911. for(v=0;v<SUPPORT;v++){
  912. s[i][j]+=rggt[u][v][i][j]*rggt[u][v][i][j];
  913. }
  914. }
  915. }
  916. }
  917. /* (G1*P*G2*R*(G1*P*G2)^T)_ii * ((H1*P*H2)^T*H1*P*H2)_ii */
  918. for(i=0;i<BLOCKSIZE;i++)
  919. for(j=0;j<BLOCKSIZE;j++)
  920. cg-=10*log10(ggrggt[i][j][i][j]*s[i][j]);
  921. return cg/(BLOCKSIZE*BLOCKSIZE);
  922. }
  923. double cg_2d(double _in[SUPPORT][SUPPORT][SUPPORT][SUPPORT],
  924. const int *_f){
  925. int v;
  926. int j;
  927. double ret;
  928. double (*rggt)[SUPPORT][BLOCKSIZE][BLOCKSIZE] =
  929. malloc(SUPPORT*SUPPORT*BLOCKSIZE*BLOCKSIZE*sizeof(****rggt));
  930. double klt[BLOCKSIZE*BLOCKSIZE][BLOCKSIZE*BLOCKSIZE];
  931. #if USE_KLT
  932. gklt_2d(klt,_in,_f);
  933. #endif
  934. /* R*(G2*P*G1)^T */
  935. for(v=0;v<SUPPORT;v++)
  936. for(j=0;j<SUPPORT;j++)
  937. b_analysis_2d(&rggt[v][j][0][0],
  938. 1,BLOCKSIZE,
  939. &_in[0][0][v][j],
  940. SUPPORT*SUPPORT*SUPPORT,
  941. SUPPORT*SUPPORT,
  942. _f,klt);
  943. ret = cg_2d_i(rggt,f,klt);
  944. free(rggt);
  945. return ret;
  946. }
  947. double cg_2d_collapsed(double _in[SUPPORT][SUPPORT],const int *_f){
  948. int v;
  949. int u;
  950. int j;
  951. int i;
  952. double ret;
  953. double r[SUPPORT][SUPPORT];
  954. double (*rggt)[SUPPORT][BLOCKSIZE][BLOCKSIZE] =
  955. malloc(SUPPORT*SUPPORT*BLOCKSIZE*BLOCKSIZE*sizeof(****rggt));
  956. double klt[BLOCKSIZE*BLOCKSIZE][BLOCKSIZE*BLOCKSIZE];
  957. #if USE_KLT
  958. gklt_2d_collapsed(klt,_in,_f);
  959. #endif
  960. /* R*(G2*P*G1)^T */
  961. for(v=0;v<SUPPORT;v++){
  962. for(j=0;j<SUPPORT;j++){
  963. for(u=0;u<SUPPORT;u++)
  964. for(i=0;i<SUPPORT;i++)
  965. r[u][i]=_in[abs(u-v)][abs(i-j)];
  966. b_analysis_2d(&rggt[v][j][0][0],
  967. 1,BLOCKSIZE,
  968. &r[0][0],SUPPORT,1,_f,klt);
  969. }
  970. }
  971. ret = cg_2d_i(rggt,f,klt);
  972. free(rggt);
  973. return ret;
  974. }
  975. #else
  976. static double cg_1d_i(double rgt[SUPPORT][BLOCKSIZE],
  977. const int *_f,
  978. double klt[BLOCKSIZE][BLOCKSIZE]){
  979. int j;
  980. int i;
  981. double r[BLOCKSIZE];
  982. double grgt[BLOCKSIZE][BLOCKSIZE];
  983. double cg=0;
  984. /* G*R*G^T */
  985. for(i=0;i<BLOCKSIZE;i++)
  986. b_analysis_1d(&grgt[0][i],BLOCKSIZE,&rgt[0][i],BLOCKSIZE,_f,klt);
  987. /* H */
  988. for(j=0;j<BLOCKSIZE;j++){
  989. for(i=0;i<BLOCKSIZE;i++){
  990. r[i]=i==j?1:0;
  991. }
  992. b_synthesis_1d(&rgt[0][j],BLOCKSIZE,r,1,_f,klt);
  993. }
  994. /* (G*R*G^T)_ii * (H^T*H)_ii */
  995. for(j=0;j<BLOCKSIZE;j++){
  996. double h=0;
  997. for(i=0;i<SUPPORT;i++){
  998. h+=rgt[i][j]*rgt[i][j];
  999. }
  1000. cg-=10*log10(grgt[j][j]*h);
  1001. }
  1002. return cg/BLOCKSIZE;
  1003. }
  1004. double cg_1d(double in[SUPPORT][SUPPORT],const int *_f){
  1005. int j;
  1006. double rgt[SUPPORT][BLOCKSIZE];
  1007. double klt[BLOCKSIZE][BLOCKSIZE];
  1008. #if USE_KLT
  1009. gklt_1d(klt,in,_f);
  1010. #endif
  1011. /* R*G^T */
  1012. for(j=0;j<SUPPORT;j++){
  1013. b_analysis_1d(&rgt[j][0],1,in[j],1,_f,klt);
  1014. }
  1015. return cg_1d_i(rgt,f,klt);
  1016. }
  1017. double cg_1d_collapsed(double in[SUPPORT],const int *_f){
  1018. int j;
  1019. int i;
  1020. double r[SUPPORT];
  1021. double rgt[SUPPORT][BLOCKSIZE];
  1022. double klt[BLOCKSIZE][BLOCKSIZE];
  1023. #if USE_KLT
  1024. gklt_1d_collapsed(klt,in,_f);
  1025. #endif
  1026. /* R*G^T */
  1027. for(j=0;j<SUPPORT;j++){
  1028. for(i=0;i<SUPPORT;i++){
  1029. r[i]=in[abs(i-j)];
  1030. }
  1031. b_analysis_1d(&rgt[j][0],1,r,1,_f,klt);
  1032. }
  1033. return cg_1d_i(rgt,f,klt);
  1034. }
  1035. #endif
  1036. #if USE_FILES
  1037. int main(int _argc,const char *_argv[]){
  1038. cov_state cvs[NUM_PROCS];
  1039. #if COMPUTE_NATHAN
  1040. trans_ctx ctx[NUM_PROCS];
  1041. double r[SUPPORT*SUPPORT]; /* maximum for 2d */
  1042. #else
  1043. trans_ctx *ctx=NULL;
  1044. #endif
  1045. int i;
  1046. #if BLOCKSIZE==4
  1047. f=OD_FILTER_PARAMS4;
  1048. #elif BLOCKSIZE==8
  1049. f=OD_FILTER_PARAMS8;
  1050. #elif BLOCKSIZE==16
  1051. f=OD_FILTER_PARAMS16;
  1052. #else
  1053. # error "Need filter params for this block size."
  1054. #endif
  1055. for(i=0;i<NUM_PROCS;i++){
  1056. #if USE_2D
  1057. cov_init(&cvs[i],SUPPORT*SUPPORT);
  1058. #else
  1059. cov_init(&cvs[i],SUPPORT);
  1060. #endif
  1061. }
  1062. #if COMPUTE_NATHAN
  1063. for(i=0;i<NUM_PROCS;i++){
  1064. #if USE_2D
  1065. trans_data_init(&ctx[i].td,SUPPORT*SUPPORT);
  1066. #else
  1067. trans_data_init(&ctx[i].td,SUPPORT);
  1068. #endif
  1069. }
  1070. #endif
  1071. OD_OMP_SET_THREADS(NUM_PROCS);
  1072. process_files(ctx,cvs,_argc,_argv);
  1073. for(i=1;i<NUM_PROCS;i++)
  1074. cov_combine(&cvs[0],&cvs[i]);
  1075. cov_compute(&cvs[0]);
  1076. #if COMPUTE_NATHAN
  1077. for(i=1;i<NUM_PROCS;i++)
  1078. trans_data_combine(&ctx[0].td,&ctx[i].td);
  1079. trans_data_normalize(&ctx[0].td);
  1080. #endif
  1081. #if PRINT_COV
  1082. {
  1083. int i,j;
  1084. fprintf(stdout,"collapsed_cov=\n");
  1085. for(j=0;j<cvs[0].sz/SUPPORT;j++){
  1086. for(i=0;i<SUPPORT;i++){
  1087. fprintf(stdout,"%s %- 12.6G",i>0?",":"",cvs[0].cov[j*SUPPORT+i]);
  1088. }
  1089. fprintf(stdout,"\n");
  1090. }
  1091. }
  1092. #endif
  1093. #if USE_2D
  1094. #if COMPUTE_NATHAN
  1095. fprintf(stdout,"original cg=%-24.16G\n",
  1096. cg_2d((double(*)[SUPPORT][SUPPORT][SUPPORT])ctx[0].td.cov,f));
  1097. trans_data_collapse(&ctx[0].td,SUPPORT,r);
  1098. fprintf(stdout,"collapse cg=%-24.16G\n",
  1099. cg_2d_collapsed((double(*)[SUPPORT])r,f));
  1100. #endif
  1101. fprintf(stdout,"monty cg=%-24.16G\n",
  1102. cg_2d_collapsed((double(*)[SUPPORT])cvs[0].cov,f));
  1103. #else
  1104. #if COMPUTE_NATHAN
  1105. fprintf(stdout,"original cg=%-24.16G\n",
  1106. cg_1d((double (*)[SUPPORT])ctx[0].td.cov,f));
  1107. trans_data_collapse(&ctx[0].td,1,r);
  1108. fprintf(stdout,"collapse cg=%-24.16G\n",
  1109. cg_1d_collapsed(r,f));
  1110. #endif
  1111. fprintf(stdout,"monty cg=%-24.16G\n",
  1112. cg_1d_collapsed(cvs[0].cov,f));
  1113. #endif
  1114. for(i=0;i<NUM_PROCS;i++)
  1115. cov_clear(&cvs[i]);
  1116. #if COMPUTE_NATHAN
  1117. for(i=0;i<NUM_PROCS;i++)
  1118. trans_data_clear(&ctx[i].td);
  1119. #endif
  1120. return EXIT_SUCCESS;
  1121. }
  1122. #else
  1123. int main(int _argc,const char *_argv[]){
  1124. #if USE_2D
  1125. double cov[SUPPORT][SUPPORT];
  1126. double *r=&cov[0][0];
  1127. #else
  1128. double cov[SUPPORT];
  1129. double *r=&cov[0];
  1130. #endif
  1131. #if BLOCKSIZE==4
  1132. f=OD_FILTER_PARAMS4;
  1133. #elif BLOCKSIZE==8
  1134. f=OD_FILTER_PARAMS8;
  1135. #elif BLOCKSIZE==16
  1136. f=OD_FILTER_PARAMS16;
  1137. #else
  1138. # error "Need filter params for this block size."
  1139. #endif
  1140. # if USE_2D
  1141. auto_regressive_collapsed(r,SUPPORT*SUPPORT,SUPPORT,0.95);
  1142. fprintf(stdout,"AR p=.95 cg=%-24.18G\n",cg_2d_collapsed(cov,f));
  1143. # else
  1144. auto_regressive_collapsed(r,SUPPORT,1,0.95);
  1145. fprintf(stdout,"AR p=.95 cg=%-24.18G\n",cg_1d_collapsed(cov,f));
  1146. # endif
  1147. return EXIT_SUCCESS;
  1148. }
  1149. #endif