trans.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385
  1. /*Daala video codec
  2. Copyright (c) 2013 Daala project contributors. All rights reserved.
  3. Redistribution and use in source and binary forms, with or without
  4. modification, are permitted provided that the following conditions are met:
  5. - Redistributions of source code must retain the above copyright notice, this
  6. list of conditions and the following disclaimer.
  7. - Redistributions in binary form must reproduce the above copyright notice,
  8. this list of conditions and the following disclaimer in the documentation
  9. and/or other materials provided with the distribution.
  10. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS”
  11. AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  12. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  13. DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
  14. FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  15. DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  16. SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  17. CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  18. OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  19. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
  20. #ifdef HAVE_CONFIG_H
  21. #include "config.h"
  22. #endif
  23. #include <stdlib.h>
  24. #include "od_defs.h"
  25. #include "od_filter.h"
  26. #include "trans_tools.h"
  27. #include "int_search.h"
  28. #include "kiss99.h"
  29. #define USE_FILES (0)
  30. #define USE_AR95 (1)
  31. #define USE_SUBSET1 (0)
  32. #define USE_SUBSET3 (0)
  33. #define PRINT_COV (0)
  34. #define CG_SEARCH (0)
  35. #define USE_SIMPLEX (1)
  36. #define RAMP_DYADIC (0)
  37. #if CG_SEARCH
  38. # if USE_TYPE3 && RAMP_DYADIC
  39. # error "Dyadic ramp constraint not supported for Type-III transform."
  40. # endif
  41. # if USE_SIMPLEX && RAMP_DYADIC
  42. # error "Dyadic ramp constraint not supported with simplex search."
  43. # endif
  44. static void coding_gain_search(const double _r[2*B_SZ]){
  45. # if !USE_SIMPLEX
  46. # if B_SZ==4
  47. {
  48. int f[4];
  49. int p0;
  50. int q0;
  51. int s0;
  52. int s1;
  53. double cg;
  54. double best_cg;
  55. best_cg=0;
  56. # if RAMP_DYADIC
  57. for(q0=(1<<FILTER_BITS);q0>=-(1<<FILTER_BITS);q0--){
  58. int t0;
  59. f[3]=q0;
  60. /* S0 = 4/1*(1-q0/64)
  61. * S0 >= 1 -> 64-q0 >= 16
  62. */
  63. t0=(1<<FILTER_BITS)-q0;
  64. s0=1*t0-0;
  65. if(s0>=(1<<FILTER_BITS-2)){
  66. s0*=4;
  67. f[0]=s0;
  68. for(p0=-(1<<FILTER_BITS);p0<=(1<<FILTER_BITS);p0++){
  69. f[2]=p0;
  70. /* S1 = 4/3*(1-(1-q0/64)*p0/64)
  71. * S1 >= 1 -> 64^2-(64-q0)*p0 >= 64*48
  72. * S1 = x/64 -> 64^2-(64-q0)*p0 = 0 MOD 48
  73. */
  74. s1=(1<<2*FILTER_BITS)-t0*p0;
  75. if(s1>=(1<<FILTER_BITS)*(3<<FILTER_BITS-2)&&s1%(3<<FILTER_BITS-2)==0){
  76. s1/=(3<<FILTER_BITS-2);
  77. f[1]=s1;
  78. cg=coding_gain_1d_collapsed(_r,f);
  79. if(cg>best_cg){
  80. best_cg=cg;
  81. printf("%i %i %i %i %G\n",p0,q0,s0,s1,cg);
  82. }
  83. }
  84. }
  85. }
  86. }
  87. # else
  88. for(p0=-(1<<FILTER_BITS);p0<=(1<<FILTER_BITS);p0++){
  89. f[2]=p0;
  90. for(q0=(1<<FILTER_BITS);q0>=-(1<<FILTER_BITS);q0--){
  91. f[3]=q0;
  92. for(s0=(1<<FILTER_BITS);s0<=2*(1<<FILTER_BITS);s0++){
  93. f[0]=s0;
  94. for(s1=(1<<FILTER_BITS);s1<=2*(1<<FILTER_BITS);s1++){
  95. f[1]=s1;
  96. cg=coding_gain_1d_collapsed(_r,f);
  97. if(cg>best_cg){
  98. best_cg=cg;
  99. printf("%i %i %i %i %G\n",p0,q0,s0,s1,cg);
  100. }
  101. }
  102. }
  103. }
  104. }
  105. # endif
  106. }
  107. # elif B_SZ==8
  108. {
  109. int f[10];
  110. int p0;
  111. int p1;
  112. int p2;
  113. int q0;
  114. int q1;
  115. int q2;
  116. int s0;
  117. int s1;
  118. int s2;
  119. int s3;
  120. double cg;
  121. double best_cg;
  122. best_cg=0;
  123. # if RAMP_DYADIC
  124. for(q0=(1<<FILTER_BITS);q0>=-(1<<FILTER_BITS);q0--){
  125. int t0;
  126. f[7]=q0;
  127. /* S0 = 8/1*(1-q0/64)
  128. * S0 >= 1 -> 64-q0 >= 8
  129. */
  130. t0=(1<<FILTER_BITS)-q0;
  131. s0=1*t0-0;
  132. if(s0>=(1<<FILTER_BITS-3)){
  133. s0*=8;
  134. f[0]=s0;
  135. for(p0=-(1<<FILTER_BITS);p0<=(1<<FILTER_BITS);p0++){
  136. f[4]=p0;
  137. for(q1=(1<<FILTER_BITS);q1>=-(1<<FILTER_BITS);q1--){
  138. int t1;
  139. f[8]=q1;
  140. /* S1 = 8/3*((1-q1/64)-(1-q0/64)*p0/64)
  141. * S1 >= 1 -> 64*t1-t0*p0 >= 64*24
  142. * S1 = x/64 -> 64*t1-t0*p0 = 0 MOD 24
  143. */
  144. t1=(1<<FILTER_BITS)-q1;
  145. s1=(1<<FILTER_BITS)*t1-t0*p0;
  146. if(s1>=(1<<FILTER_BITS)*(3<<FILTER_BITS-3)&&
  147. s1%(3<<FILTER_BITS-3)==0){
  148. s1/=(3<<FILTER_BITS-3);
  149. f[1]=s1;
  150. for(p1=-(1<<FILTER_BITS);p1<=(1<<FILTER_BITS);p1++){
  151. f[5]=p1;
  152. for(q2=(1<<FILTER_BITS);q2>=-(1<<FILTER_BITS);q2--){
  153. int t2;
  154. f[9]=q2;
  155. /* S2 = 8/5*((1-q2/64)-(1-q1/64)*p1/64)
  156. * S2 >= 1 -> 64*t2-t1*p1) >= 64*40
  157. * S2 = x/64 -> 64*t2-t1*p1 = 0 MOD 40
  158. */
  159. t2=(1<<FILTER_BITS)-q2;
  160. s2=(1<<FILTER_BITS)*t2-t1*p1;
  161. if(s2>=(1<<FILTER_BITS)*(5<<FILTER_BITS-3)&&
  162. s2%(5<<FILTER_BITS-3)==0){
  163. s2/=(5<<FILTER_BITS-3);
  164. f[2]=s2;
  165. for(p2=-(1<<FILTER_BITS);p2<=(1<<FILTER_BITS);p2++){
  166. f[6]=p2;
  167. /* S3 = 8/7*(1-(1-q2/64)*p2/64)
  168. * S3 >= 1 -> 64^2-t2*p2 >= 64*56
  169. * S3 = x/64 -> 64^2-t2*p2 = 0 MOD 56
  170. */
  171. s3=(1<<2*FILTER_BITS)-t2*p2;
  172. if(s3>=(1<<FILTER_BITS)*(7<<FILTER_BITS-3)&&
  173. s3%(7<<FILTER_BITS-3)==0){
  174. s3/=(7<<FILTER_BITS-3);
  175. f[3]=s3;
  176. cg=coding_gain_1d_collapsed(_r,f);
  177. if(cg>best_cg){
  178. best_cg=cg;
  179. printf("%i %i %i %i %i %i %i %i %i %i %-24.18G\n",
  180. p0,p1,p2,q0,q1,q2,s0,s1,s2,s3,cg);
  181. }
  182. }
  183. }
  184. }
  185. }
  186. }
  187. }
  188. }
  189. }
  190. }
  191. }
  192. # else
  193. # error "Exhaustive search for B_SZ==8 only supported using RAMP_DYADIC (1)."
  194. # endif
  195. }
  196. # else
  197. # error "Exhaustive search not supported for this block size."
  198. # endif
  199. # else
  200. {
  201. int dims;
  202. int i;
  203. kiss99_ctx ks[NUM_PROCS];
  204. int lb[22];
  205. int ub[22];
  206. # if B_SZ==4
  207. dims=4;
  208. # elif B_SZ==8
  209. dims=10;
  210. # elif B_SZ==16
  211. dims=22;
  212. # else
  213. # error "Unsupported block size."
  214. # endif
  215. for(i=0;i<dims;i++){
  216. lb[i]=i<(B_SZ>>1)?(1<<FILTER_BITS):-(1<<FILTER_BITS);
  217. ub[i]=i<(B_SZ>>1)?2*(1<<FILTER_BITS):(1<<FILTER_BITS);
  218. }
  219. for(i=0;i<NUM_PROCS;i++){
  220. uint32_t srand;
  221. srand=i*16843009; /*Broadcast char to 4xchar*/
  222. kiss99_srand(&ks[i],(unsigned char *)&srand,sizeof(srand));
  223. }
  224. #pragma omp parallel for schedule(dynamic)
  225. for(i=0;i<128;i++){
  226. int tid;
  227. int j;
  228. # if B_SZ==4
  229. int f[4];
  230. # elif B_SZ==8
  231. int f[10];
  232. # elif B_SZ==16
  233. int f[22];
  234. # else
  235. # error "Unsupported block size."
  236. # endif
  237. double cg;
  238. tid=OD_OMP_GET_THREAD;
  239. for(j=0;j<dims;j++){
  240. int range;
  241. int mask;
  242. int rng;
  243. range=ub[j]-lb[j];
  244. mask=(1<<OD_ILOG_NZ(range))-1;
  245. do {
  246. rng=((int)kiss99_rand(&ks[tid]))&mask;
  247. }
  248. while(rng>range);
  249. f[j]=lb[j]+rng;
  250. }
  251. j=int_simplex_max(&cg,dims,coding_gain_1d_collapsed,_r,lb,ub,f);
  252. fprintf(stdout,"obj=%-24.18G steps=%4d params={",cg,j);
  253. for(j=0;j<dims;j++){
  254. fprintf(stdout,"%3d%c",f[j],j==dims-1?'}':',');
  255. }
  256. fprintf(stdout,"\n");
  257. }
  258. }
  259. # endif
  260. }
  261. #endif
  262. #if USE_FILES
  263. static int t_start(void *_ctx,const char *_name,const th_info *_ti,int _pli,
  264. int _nxblocks,int _nyblocks){
  265. trans_ctx *ctx;
  266. fprintf(stdout,"%s %i %i\n",_name,_nxblocks,_nyblocks);
  267. fflush(stdout);
  268. ctx=(trans_ctx *)_ctx;
  269. image_ctx_init(&ctx->img,_name,_nxblocks,_nyblocks);
  270. return EXIT_SUCCESS;
  271. }
  272. static void t_load_data(void *_ctx,const unsigned char *_data,int _stride,
  273. int _bi,int _bj){
  274. trans_ctx *ctx;
  275. ctx=(trans_ctx *)_ctx;
  276. if(_bi==0&&_bj==0){
  277. int x;
  278. int y;
  279. int z;
  280. unsigned char buf[2*B_SZ];
  281. /* add the rows */
  282. for(y=0;y<ctx->img.nyblocks*B_SZ;y++){
  283. for(x=0;x<ctx->img.nxblocks*B_SZ-(2*B_SZ-1);x++){
  284. for(z=0;z<2*B_SZ;z++){
  285. buf[z]=_data[y*_stride+(x+z)];
  286. }
  287. trans_data_add(&ctx->td,buf);
  288. }
  289. }
  290. /* add the columns */
  291. for(y=0;y<ctx->img.nyblocks*B_SZ-(2*B_SZ-1);y++){
  292. for(x=0;x<ctx->img.nxblocks*B_SZ;x++){
  293. for(z=0;z<2*B_SZ;z++){
  294. buf[z]=_data[(y+z)*_stride+x];
  295. }
  296. trans_data_add(&ctx->td,buf);
  297. }
  298. }
  299. }
  300. }
  301. #define PADDING (0)
  302. const block_func BLOCKS[]={
  303. t_load_data
  304. };
  305. const int NBLOCKS=sizeof(BLOCKS)/sizeof(*BLOCKS);
  306. #endif
  307. int main(int _argc,const char *_argv[]){
  308. trans_ctx ctx[NUM_PROCS];
  309. const int *f;
  310. int i;
  311. double r[2*B_SZ];
  312. const double *cov;
  313. (void)_argc;
  314. (void)_argv;
  315. #if B_SZ==4
  316. f=OD_FILTER_PARAMS4;
  317. #elif B_SZ==8
  318. f=OD_FILTER_PARAMS8;
  319. #elif B_SZ==16
  320. f=OD_FILTER_PARAMS16;
  321. #else
  322. # error "Need filter params for this block size."
  323. #endif
  324. for(i=0;i<NUM_PROCS;i++){
  325. trans_data_init(&ctx[i].td,2*B_SZ);
  326. }
  327. cov=r;
  328. #if USE_FILES
  329. OD_OMP_SET_THREADS(NUM_PROCS);
  330. ne_apply_to_blocks(ctx,sizeof(*ctx),0x1,PADDING,t_start,NBLOCKS,BLOCKS,NULL,
  331. _argc,_argv);
  332. for(i=1;i<NUM_PROCS;i++){
  333. trans_data_combine(&ctx[0].td,&ctx[i].td);
  334. }
  335. trans_data_normalize(&ctx[0].td);
  336. # if PRINT_COV
  337. trans_data_print(&ctx[0].td,stderr);
  338. # endif
  339. fprintf(stdout,"original cg=%- 24.16G\n",coding_gain_1d(ctx[0].td.cov,f));
  340. trans_data_collapse(&ctx[0].td,1,r);
  341. fprintf(stdout,"collapse cg=%- 24.16G\n",coding_gain_1d_collapsed(r,f));
  342. trans_data_expand(&ctx[0].td,1,r);
  343. fprintf(stdout,"expanded cg=%- 24.16G\n",coding_gain_1d(ctx[0].td.cov,f));
  344. #elif USE_AR95
  345. auto_regressive_collapsed(r,2*B_SZ,1,0.95);
  346. #elif USE_SUBSET1
  347. # if B_SZ_LOG>=OD_LOG_BSIZE0&&B_SZ_LOG<OD_LOG_BSIZE0+OD_NBSIZES
  348. cov=SUBSET1_1D[B_SZ_LOG-OD_LOG_BSIZE0];
  349. # else
  350. # error "Need auto-correlation matrix for subset1 for this block size."
  351. # endif
  352. #elif USE_SUBSET3
  353. # if B_SZ_LOG>=OD_LOG_BSIZE0&&B_SZ_LOG<OD_LOG_BSIZE0+OD_NBSIZES
  354. cov=SUBSET3_1D[B_SZ_LOG-OD_LOG_BSIZE0];
  355. # else
  356. # error "Need auto-correlation matrix for subset3 for this block size."
  357. # endif
  358. #endif
  359. #if CG_SEARCH
  360. coding_gain_search(cov);
  361. #else
  362. fprintf(stdout,"cg=%-24.18G\n",coding_gain_1d_collapsed(cov,f));
  363. #endif
  364. for(i=0;i<NUM_PROCS;i++){
  365. trans_data_clear(&ctx[i].td);
  366. }
  367. return EXIT_SUCCESS;
  368. }