trans.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386
  1. /*Daala video codec
  2. Copyright (c) 2013 Daala project contributors. All rights reserved.
  3. Redistribution and use in source and binary forms, with or without
  4. modification, are permitted provided that the following conditions are met:
  5. - Redistributions of source code must retain the above copyright notice, this
  6. list of conditions and the following disclaimer.
  7. - Redistributions in binary form must reproduce the above copyright notice,
  8. this list of conditions and the following disclaimer in the documentation
  9. and/or other materials provided with the distribution.
  10. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS”
  11. AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  12. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  13. DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
  14. FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  15. DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  16. SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  17. CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  18. OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  19. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
  20. #ifdef HAVE_CONFIG_H
  21. #include "config.h"
  22. #endif
  23. #include <stdlib.h>
  24. #include "od_defs.h"
  25. #include "od_filter.h"
  26. #include "stats_tools.h"
  27. #include "trans_tools.h"
  28. #include "int_search.h"
  29. #include "kiss99.h"
  30. #define USE_FILES (0)
  31. #define USE_AR95 (1)
  32. #define USE_SUBSET1 (0)
  33. #define USE_SUBSET3 (0)
  34. #define PRINT_COV (0)
  35. #define CG_SEARCH (0)
  36. #define USE_SIMPLEX (1)
  37. #define RAMP_DYADIC (0)
  38. #if CG_SEARCH
  39. # if USE_TYPE3 && RAMP_DYADIC
  40. # error "Dyadic ramp constraint not supported for Type-III transform."
  41. # endif
  42. # if USE_SIMPLEX && RAMP_DYADIC
  43. # error "Dyadic ramp constraint not supported with simplex search."
  44. # endif
  45. static void coding_gain_search(const double _r[2*B_SZ]){
  46. # if !USE_SIMPLEX
  47. # if B_SZ==4
  48. {
  49. int f[4];
  50. int p0;
  51. int q0;
  52. int s0;
  53. int s1;
  54. double cg;
  55. double best_cg;
  56. best_cg=0;
  57. # if RAMP_DYADIC
  58. for(q0=(1<<FILTER_BITS);q0>=-(1<<FILTER_BITS);q0--){
  59. int t0;
  60. f[3]=q0;
  61. /* S0 = 4/1*(1-q0/64)
  62. * S0 >= 1 -> 64-q0 >= 16
  63. */
  64. t0=(1<<FILTER_BITS)-q0;
  65. s0=1*t0-0;
  66. if(s0>=(1<<FILTER_BITS-2)){
  67. s0*=4;
  68. f[0]=s0;
  69. for(p0=-(1<<FILTER_BITS);p0<=(1<<FILTER_BITS);p0++){
  70. f[2]=p0;
  71. /* S1 = 4/3*(1-(1-q0/64)*p0/64)
  72. * S1 >= 1 -> 64^2-(64-q0)*p0 >= 64*48
  73. * S1 = x/64 -> 64^2-(64-q0)*p0 = 0 MOD 48
  74. */
  75. s1=(1<<2*FILTER_BITS)-t0*p0;
  76. if(s1>=(1<<FILTER_BITS)*(3<<FILTER_BITS-2)&&s1%(3<<FILTER_BITS-2)==0){
  77. s1/=(3<<FILTER_BITS-2);
  78. f[1]=s1;
  79. cg=coding_gain_1d_collapsed(_r,f);
  80. if(cg>best_cg){
  81. best_cg=cg;
  82. printf("%i %i %i %i %G\n",p0,q0,s0,s1,cg);
  83. }
  84. }
  85. }
  86. }
  87. }
  88. # else
  89. for(p0=-(1<<FILTER_BITS);p0<=(1<<FILTER_BITS);p0++){
  90. f[2]=p0;
  91. for(q0=(1<<FILTER_BITS);q0>=-(1<<FILTER_BITS);q0--){
  92. f[3]=q0;
  93. for(s0=(1<<FILTER_BITS);s0<=2*(1<<FILTER_BITS);s0++){
  94. f[0]=s0;
  95. for(s1=(1<<FILTER_BITS);s1<=2*(1<<FILTER_BITS);s1++){
  96. f[1]=s1;
  97. cg=coding_gain_1d_collapsed(_r,f);
  98. if(cg>best_cg){
  99. best_cg=cg;
  100. printf("%i %i %i %i %G\n",p0,q0,s0,s1,cg);
  101. }
  102. }
  103. }
  104. }
  105. }
  106. # endif
  107. }
  108. # elif B_SZ==8
  109. {
  110. int f[10];
  111. int p0;
  112. int p1;
  113. int p2;
  114. int q0;
  115. int q1;
  116. int q2;
  117. int s0;
  118. int s1;
  119. int s2;
  120. int s3;
  121. double cg;
  122. double best_cg;
  123. best_cg=0;
  124. # if RAMP_DYADIC
  125. for(q0=(1<<FILTER_BITS);q0>=-(1<<FILTER_BITS);q0--){
  126. int t0;
  127. f[7]=q0;
  128. /* S0 = 8/1*(1-q0/64)
  129. * S0 >= 1 -> 64-q0 >= 8
  130. */
  131. t0=(1<<FILTER_BITS)-q0;
  132. s0=1*t0-0;
  133. if(s0>=(1<<FILTER_BITS-3)){
  134. s0*=8;
  135. f[0]=s0;
  136. for(p0=-(1<<FILTER_BITS);p0<=(1<<FILTER_BITS);p0++){
  137. f[4]=p0;
  138. for(q1=(1<<FILTER_BITS);q1>=-(1<<FILTER_BITS);q1--){
  139. int t1;
  140. f[8]=q1;
  141. /* S1 = 8/3*((1-q1/64)-(1-q0/64)*p0/64)
  142. * S1 >= 1 -> 64*t1-t0*p0 >= 64*24
  143. * S1 = x/64 -> 64*t1-t0*p0 = 0 MOD 24
  144. */
  145. t1=(1<<FILTER_BITS)-q1;
  146. s1=(1<<FILTER_BITS)*t1-t0*p0;
  147. if(s1>=(1<<FILTER_BITS)*(3<<FILTER_BITS-3)&&
  148. s1%(3<<FILTER_BITS-3)==0){
  149. s1/=(3<<FILTER_BITS-3);
  150. f[1]=s1;
  151. for(p1=-(1<<FILTER_BITS);p1<=(1<<FILTER_BITS);p1++){
  152. f[5]=p1;
  153. for(q2=(1<<FILTER_BITS);q2>=-(1<<FILTER_BITS);q2--){
  154. int t2;
  155. f[9]=q2;
  156. /* S2 = 8/5*((1-q2/64)-(1-q1/64)*p1/64)
  157. * S2 >= 1 -> 64*t2-t1*p1) >= 64*40
  158. * S2 = x/64 -> 64*t2-t1*p1 = 0 MOD 40
  159. */
  160. t2=(1<<FILTER_BITS)-q2;
  161. s2=(1<<FILTER_BITS)*t2-t1*p1;
  162. if(s2>=(1<<FILTER_BITS)*(5<<FILTER_BITS-3)&&
  163. s2%(5<<FILTER_BITS-3)==0){
  164. s2/=(5<<FILTER_BITS-3);
  165. f[2]=s2;
  166. for(p2=-(1<<FILTER_BITS);p2<=(1<<FILTER_BITS);p2++){
  167. f[6]=p2;
  168. /* S3 = 8/7*(1-(1-q2/64)*p2/64)
  169. * S3 >= 1 -> 64^2-t2*p2 >= 64*56
  170. * S3 = x/64 -> 64^2-t2*p2 = 0 MOD 56
  171. */
  172. s3=(1<<2*FILTER_BITS)-t2*p2;
  173. if(s3>=(1<<FILTER_BITS)*(7<<FILTER_BITS-3)&&
  174. s3%(7<<FILTER_BITS-3)==0){
  175. s3/=(7<<FILTER_BITS-3);
  176. f[3]=s3;
  177. cg=coding_gain_1d_collapsed(_r,f);
  178. if(cg>best_cg){
  179. best_cg=cg;
  180. printf("%i %i %i %i %i %i %i %i %i %i %-24.18G\n",
  181. p0,p1,p2,q0,q1,q2,s0,s1,s2,s3,cg);
  182. }
  183. }
  184. }
  185. }
  186. }
  187. }
  188. }
  189. }
  190. }
  191. }
  192. }
  193. # else
  194. # error "Exhaustive search for B_SZ==8 only supported using RAMP_DYADIC (1)."
  195. # endif
  196. }
  197. # else
  198. # error "Exhaustive search not supported for this block size."
  199. # endif
  200. # else
  201. {
  202. int dims;
  203. int i;
  204. kiss99_ctx ks[NUM_PROCS];
  205. int lb[22];
  206. int ub[22];
  207. # if B_SZ==4
  208. dims=4;
  209. # elif B_SZ==8
  210. dims=10;
  211. # elif B_SZ==16
  212. dims=22;
  213. # else
  214. # error "Unsupported block size."
  215. # endif
  216. for(i=0;i<dims;i++){
  217. lb[i]=i<(B_SZ>>1)?(1<<FILTER_BITS):-(1<<FILTER_BITS);
  218. ub[i]=i<(B_SZ>>1)?2*(1<<FILTER_BITS):(1<<FILTER_BITS);
  219. }
  220. for(i=0;i<NUM_PROCS;i++){
  221. uint32_t srand;
  222. srand=i*16843009; /*Broadcast char to 4xchar*/
  223. kiss99_srand(&ks[i],(unsigned char *)&srand,sizeof(srand));
  224. }
  225. #pragma omp parallel for schedule(dynamic)
  226. for(i=0;i<128;i++){
  227. int tid;
  228. int j;
  229. # if B_SZ==4
  230. int f[4];
  231. # elif B_SZ==8
  232. int f[10];
  233. # elif B_SZ==16
  234. int f[22];
  235. # else
  236. # error "Unsupported block size."
  237. # endif
  238. double cg;
  239. tid=OD_OMP_GET_THREAD;
  240. for(j=0;j<dims;j++){
  241. int range;
  242. int mask;
  243. int rng;
  244. range=ub[j]-lb[j];
  245. mask=(1<<OD_ILOG_NZ(range))-1;
  246. do {
  247. rng=((int)kiss99_rand(&ks[tid]))&mask;
  248. }
  249. while(rng>range);
  250. f[j]=lb[j]+rng;
  251. }
  252. j=int_simplex_max(&cg,dims,coding_gain_1d_collapsed,_r,lb,ub,f);
  253. fprintf(stdout,"obj=%-24.18G steps=%4d params={",cg,j);
  254. for(j=0;j<dims;j++){
  255. fprintf(stdout,"%3d%c",f[j],j==dims-1?'}':',');
  256. }
  257. fprintf(stdout,"\n");
  258. }
  259. }
  260. # endif
  261. }
  262. #endif
  263. #if USE_FILES
  264. static int t_start(void *_ctx,const char *_name,const th_info *_ti,int _pli,
  265. int _nxblocks,int _nyblocks){
  266. trans_ctx *ctx;
  267. fprintf(stdout,"%s %i %i\n",_name,_nxblocks,_nyblocks);
  268. fflush(stdout);
  269. ctx=(trans_ctx *)_ctx;
  270. image_ctx_init(&ctx->img,_name,_nxblocks,_nyblocks);
  271. return EXIT_SUCCESS;
  272. }
  273. static void t_load_data(void *_ctx,const unsigned char *_data,int _stride,
  274. int _bi,int _bj){
  275. trans_ctx *ctx;
  276. ctx=(trans_ctx *)_ctx;
  277. if(_bi==0&&_bj==0){
  278. int x;
  279. int y;
  280. int z;
  281. unsigned char buf[2*B_SZ];
  282. /* add the rows */
  283. for(y=0;y<ctx->img.nyblocks*B_SZ;y++){
  284. for(x=0;x<ctx->img.nxblocks*B_SZ-(2*B_SZ-1);x++){
  285. for(z=0;z<2*B_SZ;z++){
  286. buf[z]=_data[y*_stride+(x+z)];
  287. }
  288. trans_data_add(&ctx->td,buf);
  289. }
  290. }
  291. /* add the columns */
  292. for(y=0;y<ctx->img.nyblocks*B_SZ-(2*B_SZ-1);y++){
  293. for(x=0;x<ctx->img.nxblocks*B_SZ;x++){
  294. for(z=0;z<2*B_SZ;z++){
  295. buf[z]=_data[(y+z)*_stride+x];
  296. }
  297. trans_data_add(&ctx->td,buf);
  298. }
  299. }
  300. }
  301. }
  302. #define PADDING (0)
  303. const block_func BLOCKS[]={
  304. t_load_data
  305. };
  306. const int NBLOCKS=sizeof(BLOCKS)/sizeof(*BLOCKS);
  307. #endif
  308. int main(int _argc,const char *_argv[]){
  309. trans_ctx ctx[NUM_PROCS];
  310. const int *f;
  311. int i;
  312. double r[2*B_SZ];
  313. const double *cov;
  314. (void)_argc;
  315. (void)_argv;
  316. #if B_SZ==4
  317. f=OD_FILTER_PARAMS4;
  318. #elif B_SZ==8
  319. f=OD_FILTER_PARAMS8;
  320. #elif B_SZ==16
  321. f=OD_FILTER_PARAMS16;
  322. #else
  323. # error "Need filter params for this block size."
  324. #endif
  325. for(i=0;i<NUM_PROCS;i++){
  326. trans_data_init(&ctx[i].td,2*B_SZ);
  327. }
  328. cov=r;
  329. #if USE_FILES
  330. OD_OMP_SET_THREADS(NUM_PROCS);
  331. ne_apply_to_blocks(ctx,sizeof(*ctx),0x1,PADDING,t_start,NBLOCKS,BLOCKS,NULL,
  332. _argc,_argv);
  333. for(i=1;i<NUM_PROCS;i++){
  334. trans_data_combine(&ctx[0].td,&ctx[i].td);
  335. }
  336. trans_data_normalize(&ctx[0].td);
  337. # if PRINT_COV
  338. trans_data_print(&ctx[0].td,stderr);
  339. # endif
  340. fprintf(stdout,"original cg=%- 24.16G\n",coding_gain_1d(ctx[0].td.cov,f));
  341. trans_data_collapse(&ctx[0].td,1,r);
  342. fprintf(stdout,"collapse cg=%- 24.16G\n",coding_gain_1d_collapsed(r,f));
  343. trans_data_expand(&ctx[0].td,1,r);
  344. fprintf(stdout,"expanded cg=%- 24.16G\n",coding_gain_1d(ctx[0].td.cov,f));
  345. #elif USE_AR95
  346. auto_regressive_collapsed(r,2*B_SZ,1,0.95);
  347. #elif USE_SUBSET1
  348. # if B_SZ_LOG>=OD_LOG_BSIZE0&&B_SZ_LOG<OD_LOG_BSIZE0+OD_NBSIZES
  349. cov=SUBSET1_1D[B_SZ_LOG-OD_LOG_BSIZE0];
  350. # else
  351. # error "Need auto-correlation matrix for subset1 for this block size."
  352. # endif
  353. #elif USE_SUBSET3
  354. # if B_SZ_LOG>=OD_LOG_BSIZE0&&B_SZ_LOG<OD_LOG_BSIZE0+OD_NBSIZES
  355. cov=SUBSET3_1D[B_SZ_LOG-OD_LOG_BSIZE0];
  356. # else
  357. # error "Need auto-correlation matrix for subset3 for this block size."
  358. # endif
  359. #endif
  360. #if CG_SEARCH
  361. coding_gain_search(cov);
  362. #else
  363. fprintf(stdout,"cg=%-24.18G\n",coding_gain_1d_collapsed(cov,f));
  364. #endif
  365. for(i=0;i<NUM_PROCS;i++){
  366. trans_data_clear(&ctx[i].td);
  367. }
  368. return EXIT_SUCCESS;
  369. }