trans2d.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388
  1. /*Daala video codec
  2. Copyright (c) 2013 Daala project contributors. All rights reserved.
  3. Redistribution and use in source and binary forms, with or without
  4. modification, are permitted provided that the following conditions are met:
  5. - Redistributions of source code must retain the above copyright notice, this
  6. list of conditions and the following disclaimer.
  7. - Redistributions in binary form must reproduce the above copyright notice,
  8. this list of conditions and the following disclaimer in the documentation
  9. and/or other materials provided with the distribution.
  10. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS”
  11. AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  12. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  13. DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
  14. FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  15. DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  16. SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  17. CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  18. OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  19. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
  20. #ifdef HAVE_CONFIG_H
  21. #include "config.h"
  22. #endif
  23. #include <stdlib.h>
  24. #include "od_defs.h"
  25. #include "od_filter.h"
  26. #include "stats_tools.h"
  27. #include "trans_tools.h"
  28. #include "int_search.h"
  29. #include "kiss99.h"
  30. #define USE_FILES (0)
  31. #define USE_AR95 (1)
  32. #define USE_SUBSET1 (0)
  33. #define USE_SUBSET3 (0)
  34. #define PRINT_COV (0)
  35. #define CG_SEARCH (0)
  36. #define USE_SIMPLEX (1)
  37. #define RAMP_DYADIC (0)
  38. #if CG_SEARCH
  39. # if USE_TYPE3 && RAMP_DYADIC
  40. # error "Dyadic ramp constraint not supported for Type-III transform."
  41. # endif
  42. # if USE_SIMPLEX && RAMP_DYADIC
  43. # error "Dyadic ramp constraint not supported with simplex search."
  44. # endif
  45. # if NN_SEARCH && !USE_SIMPLEX
  46. # error "Non-negative search requires simplex search."
  47. # endif
  48. static void coding_gain_search(const double _r[2*B_SZ*2*B_SZ]){
  49. # if !USE_SIMPLEX
  50. # if B_SZ==4
  51. {
  52. int f[4];
  53. int p0;
  54. int q0;
  55. int s0;
  56. int s1;
  57. double cg;
  58. double best_cg;
  59. best_cg=0;
  60. # if RAMP_DYADIC
  61. for(q0=(1<<FILTER_BITS);q0>=-(1<<FILTER_BITS);q0--){
  62. int t0;
  63. f[3]=q0;
  64. /* S0 = 4/1*(1-q0/64)
  65. * S0 >= 1 -> 64-q0 >= 16
  66. */
  67. t0=(1<<FILTER_BITS)-q0;
  68. s0=1*t0-0;
  69. if(s0>=(1<<FILTER_BITS-2)){
  70. s0*=4;
  71. f[0]=s0;
  72. for(p0=-(1<<FILTER_BITS);p0<=(1<<FILTER_BITS);p0++){
  73. f[2]=p0;
  74. /* S1 = 4/3*(1-(1-q0/64)*p0/64)
  75. * S1 >= 1 -> 64^2-(64-q0)*p0 >= 64*48
  76. * S1 = x/64 -> 64^2-(64-q0)*p0 = 0 MOD 48
  77. */
  78. s1=(1<<2*FILTER_BITS)-t0*p0;
  79. if(s1>=(1<<FILTER_BITS)*(3<<FILTER_BITS-2)&&s1%(3<<FILTER_BITS-2)==0){
  80. s1/=(3<<FILTER_BITS-2);
  81. f[1]=s1;
  82. cg=coding_gain_2d_collapsed(_r,f);
  83. if(cg>best_cg){
  84. best_cg=cg;
  85. printf("%i %i %i %i %G\n",p0,q0,s0,s1,cg);
  86. }
  87. }
  88. }
  89. }
  90. }
  91. # else
  92. for(p0=-(1<<FILTER_BITS);p0<=(1<<FILTER_BITS);p0++){
  93. f[2]=p0;
  94. for(q0=(1<<FILTER_BITS);q0>=-(1<<FILTER_BITS);q0--){
  95. f[3]=q0;
  96. for(s0=(1<<FILTER_BITS);s0<=2*(1<<FILTER_BITS);s0++){
  97. f[0]=s0;
  98. for(s1=(1<<FILTER_BITS);s1<=2*(1<<FILTER_BITS);s1++){
  99. f[1]=s1;
  100. cg=coding_gain_2d_collapsed(_r,f);
  101. if(cg>best_cg){
  102. best_cg=cg;
  103. printf("%i %i %i %i %G\n",p0,q0,s0,s1,cg);
  104. }
  105. }
  106. }
  107. }
  108. }
  109. # endif
  110. }
  111. # elif B_SZ==8
  112. {
  113. int f[10];
  114. int p0;
  115. int p1;
  116. int p2;
  117. int q0;
  118. int q1;
  119. int q2;
  120. int s0;
  121. int s1;
  122. int s2;
  123. int s3;
  124. double cg;
  125. double best_cg;
  126. best_cg=0;
  127. # if RAMP_DYADIC
  128. for(q0=(1<<FILTER_BITS);q0>=-(1<<FILTER_BITS);q0--){
  129. int t0;
  130. f[7]=q0;
  131. /* S0 = 8/1*(1-q0/64)
  132. * S0 >= 1 -> 64-q0 >= 8
  133. */
  134. t0=(1<<FILTER_BITS)-q0;
  135. s0=1*t0-0;
  136. if(s0>=(1<<FILTER_BITS-3)){
  137. s0*=8;
  138. f[0]=s0;
  139. for(p0=-(1<<FILTER_BITS);p0<=(1<<FILTER_BITS);p0++){
  140. f[4]=p0;
  141. for(q1=(1<<FILTER_BITS);q1>=-(1<<FILTER_BITS);q1--){
  142. int t1;
  143. f[8]=q1;
  144. /* S1 = 8/3*((1-q1/64)-(1-q0/64)*p0/64)
  145. * S1 >= 1 -> 64*t1-t0*p0 >= 64*24
  146. * S1 = x/64 -> 64*t1-t0*p0 = 0 MOD 24
  147. */
  148. t1=(1<<FILTER_BITS)-q1;
  149. s1=(1<<FILTER_BITS)*t1-t0*p0;
  150. if(s1>=(1<<FILTER_BITS)*(3<<FILTER_BITS-3)&&
  151. s1%(3<<FILTER_BITS-3)==0){
  152. s1/=(3<<FILTER_BITS-3);
  153. f[1]=s1;
  154. for(p1=-(1<<FILTER_BITS);p1<=(1<<FILTER_BITS);p1++){
  155. f[5]=p1;
  156. for(q2=(1<<FILTER_BITS);q2>=-(1<<FILTER_BITS);q2--){
  157. int t2;
  158. f[9]=q2;
  159. /* S2 = 8/5*((1-q2/64)-(1-q1/64)*p1/64)
  160. * S2 >= 1 -> 64*t2-t1*p1) >= 64*40
  161. * S2 = x/64 -> 64*t2-t1*p1 = 0 MOD 40
  162. */
  163. t2=(1<<FILTER_BITS)-q2;
  164. s2=(1<<FILTER_BITS)*t2-t1*p1;
  165. if(s2>=(1<<FILTER_BITS)*(5<<FILTER_BITS-3)&&
  166. s2%(5<<FILTER_BITS-3)==0){
  167. s2/=(5<<FILTER_BITS-3);
  168. f[2]=s2;
  169. for(p2=-(1<<FILTER_BITS);p2<=(1<<FILTER_BITS);p2++){
  170. f[6]=p2;
  171. /* S3 = 8/7*(1-(1-q2/64)*p2/64)
  172. * S3 >= 1 -> 64^2-t2*p2 >= 64*56
  173. * S3 = x/64 -> 64^2-t2*p2 = 0 MOD 56
  174. */
  175. s3=(1<<2*FILTER_BITS)-t2*p2;
  176. if(s3>=(1<<FILTER_BITS)*(7<<FILTER_BITS-3)&&
  177. s3%(7<<FILTER_BITS-3)==0){
  178. s3/=(7<<FILTER_BITS-3);
  179. f[3]=s3;
  180. cg=coding_gain_2d_collapsed(_r,f);
  181. if(cg>best_cg){
  182. best_cg=cg;
  183. printf("%i %i %i %i %i %i %i %i %i %i %-24.18G\n",
  184. p0,p1,p2,q0,q1,q2,s0,s1,s2,s3,cg);
  185. }
  186. }
  187. }
  188. }
  189. }
  190. }
  191. }
  192. }
  193. }
  194. }
  195. }
  196. # else
  197. # error "Exhaustive search for B_SZ==8 only supported using RAMP_DYADIC (1)."
  198. # endif
  199. }
  200. # else
  201. # error "Exhaustive search not supported for this block size."
  202. # endif
  203. # else
  204. {
  205. int dims;
  206. int i;
  207. kiss99_ctx ks[NUM_PROCS];
  208. int lb[22];
  209. int ub[22];
  210. # if B_SZ==4
  211. dims=3;
  212. # elif B_SZ==8
  213. dims=10;
  214. # elif B_SZ==16
  215. dims=22;
  216. # else
  217. # error "Unsupported block size."
  218. # endif
  219. for(i=0;i<dims;i++){
  220. # if B_SZ==4
  221. lb[i]=-2*(1<<FILTER_BITS);
  222. ub[i]=2*(1<<FILTER_BITS);
  223. # else
  224. lb[i]=i<(B_SZ>>1)?(1<<FILTER_BITS):-(1<<FILTER_BITS);
  225. ub[i]=i<(B_SZ>>1)?2*(1<<FILTER_BITS):(1<<FILTER_BITS);
  226. # endif
  227. }
  228. for(i=0;i<NUM_PROCS;i++){
  229. uint32_t srand;
  230. srand=i*16843009; /*Broadcast char to 4xchar*/
  231. kiss99_srand(&ks[i],(unsigned char *)&srand,sizeof(srand));
  232. }
  233. #pragma omp parallel for schedule(dynamic)
  234. for(i=0;i<1024;i++){
  235. int tid;
  236. int j;
  237. # if B_SZ==4
  238. int f[3];
  239. # elif B_SZ==8
  240. int f[10];
  241. # elif B_SZ==16
  242. int f[22];
  243. # else
  244. # error "Unsupported block size."
  245. # endif
  246. double cg;
  247. tid=OD_OMP_GET_THREAD;
  248. for(j=0;j<dims;j++){
  249. int range;
  250. int mask;
  251. int rng;
  252. range=ub[j]-lb[j];
  253. mask=(1<<OD_ILOG_NZ(range))-1;
  254. do {
  255. rng=((int)kiss99_rand(&ks[tid]))&mask;
  256. }
  257. while(rng>range);
  258. f[j]=lb[j]+rng;
  259. }
  260. j=int_simplex_max(&cg,dims,coding_gain_2d_collapsed,_r,lb,ub,f);
  261. fprintf(stdout,"obj=%-24.18G steps=%4d params={",cg,j);
  262. for(j=0;j<dims;j++){
  263. fprintf(stdout,"%3d%c",f[j],j==dims-1?'}':',');
  264. }
  265. fprintf(stdout,"\n");
  266. }
  267. }
  268. # endif
  269. }
  270. #endif
  271. #if USE_FILES
  272. static int t_start(void *_ctx,const char *_name,const th_info *_ti,int _pli,
  273. int _nxblocks,int _nyblocks){
  274. trans_ctx *ctx;
  275. fprintf(stdout,"%s %i %i\n",_name,_nxblocks,_nyblocks);
  276. fflush(stdout);
  277. ctx=(trans_ctx *)_ctx;
  278. image_ctx_init(&ctx->img,_name,_nxblocks,_nyblocks);
  279. return EXIT_SUCCESS;
  280. }
  281. static void t_load_data(void *_ctx,const unsigned char *_data,int _stride,
  282. int _bi,int _bj){
  283. trans_ctx *ctx;
  284. ctx=(trans_ctx *)_ctx;
  285. if(_bi==0&&_bj==0){
  286. int y;
  287. int x;
  288. int j;
  289. int i;
  290. unsigned char buf[2*B_SZ*2*B_SZ];
  291. for(y=0;y<ctx->img.nyblocks*B_SZ-(2*B_SZ-1);y++){
  292. for(x=0;x<ctx->img.nxblocks*B_SZ-(2*B_SZ-1);x++){
  293. for(j=0;j<2*B_SZ;j++){
  294. for(i=0;i<2*B_SZ;i++){
  295. buf[j*2*B_SZ+i]=_data[(y+j)*_stride+(x+i)];
  296. }
  297. }
  298. trans_data_add(&ctx->td,buf);
  299. }
  300. }
  301. }
  302. }
  303. #define PADDING (0)
  304. const block_func BLOCKS[]={
  305. t_load_data
  306. };
  307. const int NBLOCKS=sizeof(BLOCKS)/sizeof(*BLOCKS);
  308. #endif
  309. int main(int _argc,const char *_argv[]){
  310. trans_ctx ctx[NUM_PROCS];
  311. const int *f;
  312. int i;
  313. double r[2*B_SZ*2*B_SZ];
  314. const double *cov;
  315. (void)_argc;
  316. (void)_argv;
  317. #if B_SZ==4
  318. f=OD_FILTER_PARAMS4;
  319. #elif B_SZ==8
  320. f=OD_FILTER_PARAMS8;
  321. #elif B_SZ==16
  322. f=OD_FILTER_PARAMS16;
  323. #else
  324. # error "Need filter params for this block size."
  325. #endif
  326. for(i=0;i<NUM_PROCS;i++){
  327. trans_data_init(&ctx[i].td,2*B_SZ*2*B_SZ);
  328. }
  329. cov=r;
  330. #if USE_FILES
  331. OD_OMP_SET_THREADS(NUM_PROCS);
  332. ne_apply_to_blocks(ctx,sizeof(*ctx),0x1,PADDING,t_start,NBLOCKS,BLOCKS,NULL,
  333. _argc,_argv);
  334. for(i=1;i<NUM_PROCS;i++){
  335. trans_data_combine(&ctx[0].td,&ctx[i].td);
  336. }
  337. trans_data_normalize(&ctx[0].td);
  338. # if PRINT_COV
  339. trans_data_print(&ctx[0].td,stderr);
  340. # endif
  341. fprintf(stdout,"original cg=%- 24.16G\n",coding_gain_2d(ctx[0].td.cov,f));
  342. trans_data_collapse(&ctx[0].td,2*B_SZ,r);
  343. fprintf(stdout,"collapse cg=%- 24.16G\n",coding_gain_2d_collapsed(r,f));
  344. trans_data_expand(&ctx[0].td,2*B_SZ,r);
  345. fprintf(stdout,"expanded cg=%- 24.16G\n",coding_gain_2d(ctx[0].td.cov,f));
  346. #elif USE_AR95
  347. auto_regressive_collapsed(r,2*B_SZ*2*B_SZ,2*B_SZ,0.95);
  348. #elif USE_SUBSET1
  349. # if B_SZ_LOG>=OD_LOG_BSIZE0&&B_SZ_LOG<OD_LOG_BSIZE0+OD_NBSIZES
  350. cov=SUBSET1_2D[B_SZ_LOG-OD_LOG_BSIZE0];
  351. # else
  352. # error "Need auto-correlation matrix for subset1 for this block size."
  353. # endif
  354. #elif USE_SUBSET3
  355. # if B_SZ_LOG>=OD_LOG_BSIZE0&&B_SZ_LOG<OD_LOG_BSIZE0+OD_NBSIZES
  356. cov=SUBSET3_2D[B_SZ_LOG-OD_LOG_BSIZE0];
  357. # else
  358. # error "Need auto-correlation matrix for subset3 for this block size."
  359. # endif
  360. #endif
  361. #if CG_SEARCH
  362. coding_gain_search(cov);
  363. #else
  364. fprintf(stdout,"cg=%-24.18G\n",coding_gain_2d_collapsed(cov,f));
  365. #endif
  366. for(i=0;i<NUM_PROCS;i++){
  367. trans_data_clear(&ctx[i].td);
  368. }
  369. return EXIT_SUCCESS;
  370. }