encode.c 53 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600
  1. /*Daala video codec
  2. Copyright (c) 2006-2013 Daala project contributors. All rights reserved.
  3. Redistribution and use in source and binary forms, with or without
  4. modification, are permitted provided that the following conditions are met:
  5. - Redistributions of source code must retain the above copyright notice, this
  6. list of conditions and the following disclaimer.
  7. - Redistributions in binary form must reproduce the above copyright notice,
  8. this list of conditions and the following disclaimer in the documentation
  9. and/or other materials provided with the distribution.
  10. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  11. AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  12. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  13. DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
  14. FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  15. DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  16. SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  17. CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  18. OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  19. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
  20. #ifdef HAVE_CONFIG_H
  21. # include "config.h"
  22. #endif
  23. #include <stddef.h>
  24. #include <stdio.h>
  25. #include <stdlib.h>
  26. #include <math.h>
  27. #include <string.h>
  28. #include "encint.h"
  29. #if defined(OD_ENCODER_CHECK)
  30. # include "decint.h"
  31. #endif
  32. #include "generic_code.h"
  33. #include "filter.h"
  34. #include "dct.h"
  35. #include "intra.h"
  36. #include "logging.h"
  37. #include "partition.h"
  38. #include "pvq.h"
  39. #include "pvq_code.h"
  40. #include "block_size.h"
  41. #include "logging.h"
  42. #include "tf.h"
  43. #include "accounting.h"
  44. #include "state.h"
  45. #include "mcenc.h"
  46. #if defined(OD_X86ASM)
  47. # include "x86/x86int.h"
  48. #endif
  49. static double mode_bits = 0;
  50. static double mode_count = 0;
  51. static int od_quantizer_from_quality(int quality) {
  52. return quality == 0 ? 0 :
  53. (quality << OD_COEFF_SHIFT >> OD_QUALITY_SHIFT) +
  54. (1 << OD_COEFF_SHIFT >> 1);
  55. }
  56. void od_enc_opt_vtbl_init_c(od_enc_ctx *enc) {
  57. enc->opt_vtbl.mc_compute_sad_4x4_xstride_1 =
  58. od_mc_compute_sad_4x4_xstride_1_c;
  59. enc->opt_vtbl.mc_compute_sad_8x8_xstride_1 =
  60. od_mc_compute_sad_8x8_xstride_1_c;
  61. enc->opt_vtbl.mc_compute_sad_16x16_xstride_1 =
  62. od_mc_compute_sad_16x16_xstride_1_c;
  63. }
  64. static void od_enc_opt_vtbl_init(od_enc_ctx *enc) {
  65. #if defined(OD_X86ASM)
  66. od_enc_opt_vtbl_init_x86(enc);
  67. #else
  68. od_enc_opt_vtbl_init_c(enc);
  69. #endif
  70. }
  71. static int od_enc_init(od_enc_ctx *enc, const daala_info *info) {
  72. int i;
  73. int ret;
  74. ret = od_state_init(&enc->state, info);
  75. if (ret < 0) return ret;
  76. od_enc_opt_vtbl_init(enc);
  77. oggbyte_writeinit(&enc->obb);
  78. od_ec_enc_init(&enc->ec, 65025);
  79. enc->packet_state = OD_PACKET_INFO_HDR;
  80. for (i = 0; i < OD_NPLANES_MAX; i++){
  81. enc->quality[i] = 10;
  82. }
  83. enc->mvest = od_mv_est_alloc(enc);
  84. if (OD_UNLIKELY(!enc->mvest)) {
  85. return OD_EFAULT;
  86. }
  87. enc->params.mv_level_min = 0;
  88. enc->params.mv_level_max = 4;
  89. #if defined(OD_ACCOUNTING)
  90. od_acct_init(&enc->acct);
  91. #endif
  92. enc->bs = (od_block_size_comp *)_ogg_malloc(sizeof(*enc->bs));
  93. #if defined(OD_ENCODER_CHECK)
  94. enc->dec = daala_decode_alloc(info, NULL);
  95. #endif
  96. return 0;
  97. }
  98. static void od_enc_clear(od_enc_ctx *enc) {
  99. od_mv_est_free(enc->mvest);
  100. od_ec_enc_clear(&enc->ec);
  101. oggbyte_writeclear(&enc->obb);
  102. od_state_clear(&enc->state);
  103. #if defined(OD_ACCOUNTING)
  104. od_acct_clear(&enc->acct);
  105. #endif
  106. }
  107. daala_enc_ctx *daala_encode_create(const daala_info *info) {
  108. od_enc_ctx *enc;
  109. if (info == NULL) return NULL;
  110. enc = (od_enc_ctx *)_ogg_malloc(sizeof(*enc));
  111. if (od_enc_init(enc, info) < 0) {
  112. _ogg_free(enc);
  113. return NULL;
  114. }
  115. return enc;
  116. }
  117. void daala_encode_free(daala_enc_ctx *enc) {
  118. if (enc != NULL) {
  119. #if defined(OD_ENCODER_CHECK)
  120. if (enc->dec != NULL) {
  121. daala_decode_free(enc->dec);
  122. }
  123. #endif
  124. _ogg_free(enc->bs);
  125. od_enc_clear(enc);
  126. _ogg_free(enc);
  127. }
  128. }
  129. int daala_encode_ctl(daala_enc_ctx *enc, int req, void *buf, size_t buf_sz) {
  130. (void)buf;
  131. (void)buf_sz;
  132. switch (req) {
  133. case OD_SET_QUANT:
  134. {
  135. int i;
  136. OD_ASSERT(enc);
  137. OD_ASSERT(buf);
  138. OD_ASSERT(buf_sz == sizeof(*enc->quality));
  139. for (i = 0; i < OD_NPLANES_MAX; i++){
  140. int tmp = *(int *)buf;
  141. enc->quality[i] = tmp > 0 ? (tmp << OD_QUALITY_SHIFT) - 8 : 0;
  142. }
  143. return OD_SUCCESS;
  144. }
  145. case OD_SET_MC_USE_CHROMA:
  146. {
  147. int mc_use_chroma;
  148. OD_ASSERT(enc);
  149. OD_ASSERT(buf);
  150. OD_ASSERT(buf_sz == sizeof(mc_use_chroma));
  151. mc_use_chroma = *(int *)buf;
  152. if (mc_use_chroma) {
  153. enc->mvest->flags |= OD_MC_USE_CHROMA;
  154. }
  155. else {
  156. enc->mvest->flags &= ~OD_MC_USE_CHROMA;
  157. }
  158. return OD_SUCCESS;
  159. }
  160. case OD_SET_MV_RES_MIN:
  161. {
  162. int mv_res_min;
  163. OD_ASSERT(enc);
  164. OD_ASSERT(buf);
  165. OD_ASSERT(buf_sz == sizeof(mv_res_min));
  166. mv_res_min = *(int *)buf;
  167. if (mv_res_min < 0 || mv_res_min > 2) {
  168. return OD_EINVAL;
  169. }
  170. enc->mvest->mv_res_min = mv_res_min;
  171. return OD_SUCCESS;
  172. }
  173. case OD_SET_MV_LEVEL_MIN:
  174. {
  175. int mv_level_min;
  176. OD_ASSERT(enc);
  177. OD_ASSERT(buf);
  178. OD_ASSERT(buf_sz == sizeof(mv_level_min));
  179. mv_level_min = *(int *)buf;
  180. if (mv_level_min < 0 || mv_level_min > 4) {
  181. return OD_EINVAL;
  182. }
  183. enc->params.mv_level_min = mv_level_min;
  184. return OD_SUCCESS;
  185. }
  186. case OD_SET_MV_LEVEL_MAX:
  187. {
  188. int mv_level_max;
  189. OD_ASSERT(enc);
  190. OD_ASSERT(buf);
  191. OD_ASSERT(buf_sz == sizeof(mv_level_max));
  192. mv_level_max = *(int *)buf;
  193. if (mv_level_max < 0 || mv_level_max > 4) {
  194. return OD_EINVAL;
  195. }
  196. enc->params.mv_level_max = mv_level_max;
  197. return OD_SUCCESS;
  198. }
  199. default: return OD_EIMPL;
  200. }
  201. }
  202. void od_encode_checkpoint(const daala_enc_ctx *enc, od_rollback_buffer *rbuf) {
  203. od_ec_enc_checkpoint(&rbuf->ec, &enc->ec);
  204. OD_COPY(&rbuf->adapt, &enc->state.adapt, 1);
  205. }
  206. void od_encode_rollback(daala_enc_ctx *enc, const od_rollback_buffer *rbuf) {
  207. od_ec_enc_rollback(&enc->ec, &rbuf->ec);
  208. OD_COPY(&enc->state.adapt, &rbuf->adapt, 1);
  209. }
  210. static void od_img_plane_copy_pad8(od_img_plane *dst_p,
  211. int plane_width, int plane_height, od_img_plane *src_p,
  212. int pic_width, int pic_height) {
  213. unsigned char *dst_data;
  214. ptrdiff_t dstride;
  215. int y;
  216. dstride = dst_p->ystride;
  217. /*If we have _no_ data, just encode a dull green.*/
  218. if (pic_width == 0 || pic_height == 0) {
  219. dst_data = dst_p->data;
  220. for (y = 0; y < plane_height; y++) {
  221. OD_CLEAR(dst_data, plane_width);
  222. dst_data += dstride;
  223. }
  224. }
  225. /*Otherwise, copy what we do have, and add our own padding.*/
  226. else {
  227. unsigned char *src_data;
  228. unsigned char *dst;
  229. ptrdiff_t sxstride;
  230. ptrdiff_t systride;
  231. int x;
  232. /*Step 1: Copy the data we do have.*/
  233. sxstride = src_p->xstride;
  234. systride = src_p->ystride;
  235. dst_data = dst_p->data;
  236. src_data = src_p->data;
  237. dst = dst_data;
  238. for (y = 0; y < pic_height; y++) {
  239. if (sxstride == 1) OD_COPY(dst, src_data, pic_width);
  240. else for (x = 0; x < pic_width; x++) dst[x] = *(src_data + sxstride*x);
  241. dst += dstride;
  242. src_data += systride;
  243. }
  244. /*Step 2: Perform a low-pass extension into the padding region.*/
  245. /*Right side.*/
  246. for (x = pic_width; x < plane_width; x++) {
  247. dst = dst_data + x - 1;
  248. for (y = 0; y < pic_height; y++) {
  249. dst[1] = (2*dst[0] + (dst - (dstride & -(y > 0)))[0]
  250. + (dst + (dstride & -(y + 1 < pic_height)))[0] + 2) >> 2;
  251. dst += dstride;
  252. }
  253. }
  254. /*Bottom.*/
  255. dst = dst_data + dstride*pic_height;
  256. for (y = pic_height; y < plane_height; y++) {
  257. for (x = 0; x < plane_width; x++) {
  258. dst[x] = (2*(dst - dstride)[x] + (dst - dstride)[x - (x > 0)]
  259. + (dst - dstride)[x + (x + 1 < plane_width)] + 2) >> 2;
  260. }
  261. dst += dstride;
  262. }
  263. }
  264. }
  265. /*Extend the edge into the padding.*/
  266. static void od_img_plane_edge_ext8(od_img_plane *dst_p,
  267. int plane_width, int plane_height, int horz_padding, int vert_padding) {
  268. ptrdiff_t dstride;
  269. unsigned char *dst_data;
  270. unsigned char *dst;
  271. int x;
  272. int y;
  273. dstride = dst_p->ystride;
  274. dst_data = dst_p->data;
  275. /*Left side.*/
  276. for (y = 0; y < plane_height; y++) {
  277. dst = dst_data + dstride * y;
  278. for (x = 1; x <= horz_padding; x++) {
  279. (dst-x)[0] = dst[0];
  280. }
  281. }
  282. /*Right side.*/
  283. for (y = 0; y < plane_height; y++) {
  284. dst = dst_data + plane_width - 1 + dstride * y;
  285. for (x = 1; x <= horz_padding; x++) {
  286. dst[x] = dst[0];
  287. }
  288. }
  289. /*Top.*/
  290. dst = dst_data - horz_padding;
  291. for (y = 0; y < vert_padding; y++) {
  292. for (x = 0; x < plane_width + 2 * horz_padding; x++) {
  293. (dst - dstride)[x] = dst[x];
  294. }
  295. dst -= dstride;
  296. }
  297. /*Bottom.*/
  298. dst = dst_data - horz_padding + plane_height * dstride;
  299. for (y = 0; y < vert_padding; y++) {
  300. for (x = 0; x < plane_width + 2 * horz_padding; x++) {
  301. dst[x] = (dst - dstride)[x];
  302. }
  303. dst += dstride;
  304. }
  305. }
  306. struct od_mb_enc_ctx {
  307. signed char *modes[OD_NPLANES_MAX];
  308. od_coeff *c;
  309. od_coeff **d;
  310. od_coeff *tf[OD_NPLANES_MAX];
  311. od_coeff *md;
  312. od_coeff *mc;
  313. od_coeff *l;
  314. int is_keyframe;
  315. int nk;
  316. int k_total;
  317. int sum_ex_total_q8;
  318. int ncount;
  319. int count_total_q8;
  320. int count_ex_total_q8;
  321. };
  322. typedef struct od_mb_enc_ctx od_mb_enc_ctx;
  323. static void od_encode_compute_pred(daala_enc_ctx *enc, od_mb_enc_ctx *ctx, od_coeff *pred,
  324. int ln, int pli, int bx, int by, int has_ur) {
  325. int n;
  326. int n2;
  327. int xdec;
  328. int ydec;
  329. int w;
  330. int frame_width;
  331. signed char *modes;
  332. od_coeff *d;
  333. od_coeff *tf;
  334. od_coeff *md;
  335. od_coeff *l;
  336. int x;
  337. int y;
  338. int zzi;
  339. OD_ASSERT(ln >= 0 && ln <= 2);
  340. n = 1 << (ln + 2);
  341. n2 = n*n;
  342. xdec = enc->state.io_imgs[OD_FRAME_INPUT].planes[pli].xdec;
  343. ydec = enc->state.io_imgs[OD_FRAME_INPUT].planes[pli].ydec;
  344. frame_width = enc->state.frame_width;
  345. w = frame_width >> xdec;
  346. modes = ctx->modes[OD_DISABLE_CFL ? pli : 0];
  347. d = ctx->d[pli];
  348. /*We never use tf on the chroma planes, but if we do it will blow up, which
  349. is better than always using luma's tf.*/
  350. tf = ctx->tf[pli];
  351. md = ctx->md;
  352. l = ctx->l;
  353. if (ctx->is_keyframe) {
  354. if (bx > 0 && by > 0) {
  355. if (pli == 0 || OD_DISABLE_CFL) {
  356. ogg_uint16_t mode_cdf[OD_INTRA_NMODES];
  357. ogg_uint32_t mode_dist[OD_INTRA_NMODES];
  358. int m_l;
  359. int m_ul;
  360. int m_u;
  361. int mode;
  362. od_coeff *coeffs[4];
  363. int strides[4];
  364. /*Search predictors from the surrounding blocks.*/
  365. coeffs[0] = tf + ((by - (1 << ln)) << 2)*w + ((bx - (1 << ln)) << 2);
  366. coeffs[1] = tf + ((by - (1 << ln)) << 2)*w + ((bx - (0 << ln)) << 2);
  367. coeffs[2] = tf + ((by - (1 << ln)) << 2)*w + ((bx + (1 << ln)) << 2);
  368. coeffs[3] = tf + ((by - (0 << ln)) << 2)*w + ((bx - (1 << ln)) << 2);
  369. if (!has_ur) {
  370. coeffs[2] = coeffs[1];
  371. }
  372. strides[0] = w;
  373. strides[1] = w;
  374. strides[2] = w;
  375. strides[3] = w;
  376. m_l = modes[by*(w >> 2) + bx - 1];
  377. m_ul = modes[(by - 1)*(w >> 2) + bx - 1];
  378. m_u = modes[(by - 1)*(w >> 2) + bx];
  379. od_intra_pred_cdf(mode_cdf, enc->state.adapt.mode_probs[pli],
  380. OD_INTRA_NMODES, m_l, m_ul, m_u);
  381. (*OD_INTRA_DIST[ln])(mode_dist, d + (by << 2)*w + (bx << 2), w,
  382. coeffs, strides);
  383. /*Lambda = 1*/
  384. #if OD_DISABLE_INTRA
  385. mode = 0;
  386. #else
  387. /* Make lambda proportional to quantization step size, with exact
  388. factor based on quick experiments with subset1 (can be improved). */
  389. mode = od_intra_pred_search(mode_cdf, mode_dist, OD_INTRA_NMODES,
  390. OD_MINI(32767, enc->quantizer[pli] << 4));
  391. #endif
  392. (*OD_INTRA_GET[ln])(pred, coeffs, strides, mode);
  393. #if OD_DISABLE_INTRA
  394. OD_CLEAR(pred+1, n2-1);
  395. #endif
  396. OD_ACCT_UPDATE(&enc->acct, od_ec_enc_tell_frac(&enc->ec),
  397. OD_ACCT_CAT_TECHNIQUE, OD_ACCT_TECH_INTRA_MODE);
  398. #if !OD_DISABLE_INTRA
  399. od_ec_encode_cdf_unscaled(&enc->ec, mode, mode_cdf, OD_INTRA_NMODES);
  400. #endif
  401. OD_ACCT_UPDATE(&enc->acct, od_ec_enc_tell_frac(&enc->ec),
  402. OD_ACCT_CAT_TECHNIQUE, OD_ACCT_TECH_UNKNOWN);
  403. mode_bits -= M_LOG2E*log(
  404. (mode_cdf[mode] - (mode == 0 ? 0 : mode_cdf[mode - 1]))/
  405. (float)mode_cdf[OD_INTRA_NMODES - 1]);
  406. mode_count++;
  407. for (y = 0; y < (1 << ln); y++) {
  408. for (x = 0; x < (1 << ln); x++) {
  409. modes[(by + y)*(w >> 2) + bx + x] = mode;
  410. }
  411. }
  412. od_intra_pred_update(enc->state.adapt.mode_probs[pli], OD_INTRA_NMODES,
  413. mode, m_l, m_ul, m_u);
  414. }
  415. else {
  416. int i;
  417. int j;
  418. for (i = 0; i < n; i++) {
  419. for (j = 0; j < n; j++) {
  420. pred[i*n + j] = l[((by << 2) + i)*w + (bx << 2) + j];
  421. }
  422. }
  423. }
  424. }
  425. else {
  426. int nsize;
  427. for (zzi = 0; zzi < n2; zzi++) pred[zzi] = 0;
  428. nsize = ln;
  429. /*444/420 only right now.*/
  430. OD_ASSERT(xdec == ydec);
  431. if (bx > 0) {
  432. int noff;
  433. nsize = OD_BLOCK_SIZE4x4(enc->state.bsize, enc->state.bstride,
  434. (bx - 1) << xdec, by << ydec);
  435. nsize = OD_MAXI(nsize - xdec, 0);
  436. noff = 1 << nsize;
  437. /*Because of the quad-tree structure we can always find our neighbors
  438. starting offset by rounding to a multiple of his size.*/
  439. OD_ASSERT(!(bx & (noff - 1)));
  440. pred[0] = d[((by & ~(noff - 1)) << 2)*w + ((bx - noff) << 2)];
  441. }
  442. else if (by > 0) {
  443. int noff;
  444. nsize = OD_BLOCK_SIZE4x4(enc->state.bsize, enc->state.bstride,
  445. bx << xdec, (by - 1) << ydec);
  446. nsize = OD_MAXI(nsize - xdec, 0);
  447. noff = 1 << nsize;
  448. OD_ASSERT(!(by & (noff - 1)));
  449. pred[0] = d[((by - noff) << 2)*w + ((bx & ~(noff - 1)) << 2)];
  450. }
  451. /*Rescale DC for correct transform size.*/
  452. if (nsize > ln) pred[0] >>= (nsize - ln);
  453. else if (nsize < ln) pred[0] <<= (ln - nsize);
  454. if (pli == 0) {
  455. for (y = 0; y < (1 << ln); y++) {
  456. for (x = 0; x < (1 << ln); x++) {
  457. modes[(by + y)*(w >> 2) + bx + x] = 0;
  458. }
  459. }
  460. }
  461. }
  462. }
  463. else {
  464. int ci;
  465. ci = 0;
  466. for (y = 0; y < n; y++) {
  467. for (x = 0; x < n; x++) {
  468. pred[ci++] = md[(y + (by << 2))*w + (x + (bx << 2))];
  469. }
  470. }
  471. }
  472. }
  473. static void od_single_band_lossless_encode(daala_enc_ctx *enc, int ln,
  474. od_coeff *scalar_out, const od_coeff *cblock, const od_coeff *predt,
  475. int pli) {
  476. int *adapt;
  477. int vk;
  478. int zzi;
  479. int n2;
  480. ogg_int32_t adapt_curr[OD_NSB_ADAPT_CTXS];
  481. adapt = enc->state.adapt.pvq_adapt;
  482. vk = 0;
  483. n2 = 1 << (2*ln + 4);
  484. for (zzi = 1; zzi < n2; zzi++) {
  485. scalar_out[zzi] = cblock[zzi] - predt[zzi];
  486. vk += abs(scalar_out[zzi]);
  487. }
  488. generic_encode(&enc->ec, &enc->state.adapt.model_g[pli], vk, -1,
  489. &enc->state.adapt.ex_g[pli][ln], 0);
  490. laplace_encode_vector(&enc->ec, scalar_out + 1, n2 - 1, vk, adapt_curr,
  491. adapt);
  492. for (zzi = 1; zzi < n2; zzi++) {
  493. scalar_out[zzi] = scalar_out[zzi] + predt[zzi];
  494. }
  495. if (adapt_curr[OD_ADAPT_K_Q8] > 0) {
  496. adapt[OD_ADAPT_K_Q8] += 256*adapt_curr[OD_ADAPT_K_Q8] -
  497. adapt[OD_ADAPT_K_Q8] >> OD_SCALAR_ADAPT_SPEED;
  498. adapt[OD_ADAPT_SUM_EX_Q8] += adapt_curr[OD_ADAPT_SUM_EX_Q8] -
  499. adapt[OD_ADAPT_SUM_EX_Q8] >> OD_SCALAR_ADAPT_SPEED;
  500. }
  501. if (adapt_curr[OD_ADAPT_COUNT_Q8] > 0) {
  502. adapt[OD_ADAPT_COUNT_Q8] += adapt_curr[OD_ADAPT_COUNT_Q8]-
  503. adapt[OD_ADAPT_COUNT_Q8] >> OD_SCALAR_ADAPT_SPEED;
  504. adapt[OD_ADAPT_COUNT_EX_Q8] += adapt_curr[OD_ADAPT_COUNT_EX_Q8]-
  505. adapt[OD_ADAPT_COUNT_EX_Q8] >> OD_SCALAR_ADAPT_SPEED;
  506. }
  507. }
  508. static void od_block_encode(daala_enc_ctx *enc, od_mb_enc_ctx *ctx, int ln,
  509. int pli, int bx, int by, int has_ur) {
  510. int n;
  511. int xdec;
  512. int w;
  513. int frame_width;
  514. od_coeff *c;
  515. od_coeff *d;
  516. od_coeff *tf;
  517. od_coeff *md;
  518. od_coeff *mc;
  519. od_coeff pred[16*16];
  520. od_coeff predt[16*16];
  521. od_coeff cblock[16*16];
  522. od_coeff scalar_out[16*16];
  523. int quant;
  524. int dc_quant;
  525. int lossless;
  526. #if defined(OD_OUTPUT_PRED)
  527. od_coeff preds[16*16];
  528. int zzi;
  529. #endif
  530. OD_ASSERT(ln >= 0 && ln <= 2);
  531. n = 1 << (ln + 2);
  532. bx <<= ln;
  533. by <<= ln;
  534. xdec = enc->state.io_imgs[OD_FRAME_INPUT].planes[pli].xdec;
  535. frame_width = enc->state.frame_width;
  536. w = frame_width >> xdec;
  537. c = ctx->c;
  538. d = ctx->d[pli];
  539. /*We never use tf on the chroma planes, but if we do it will blow up, which
  540. is better than always using luma's tf.*/
  541. tf = ctx->tf[pli];
  542. md = ctx->md;
  543. mc = ctx->mc;
  544. /* Apply forward transform. */
  545. if (!ctx->is_keyframe) {
  546. (*enc->state.opt_vtbl.fdct_2d[ln])(md + (by << 2)*w + (bx << 2), w,
  547. mc + (by << 2)*w + (bx << 2), w);
  548. }
  549. od_encode_compute_pred(enc, ctx, pred, ln, pli, bx, by, has_ur);
  550. lossless = (enc->quantizer[pli] == 0);
  551. #if defined(OD_OUTPUT_PRED)
  552. for (zzi = 0; zzi < (n*n); zzi++) preds[zzi] = pred[zzi];
  553. #endif
  554. /* Change ordering for encoding. */
  555. od_raster_to_coding_order(cblock, n, &d[((by << 2))*w + (bx << 2)], w,
  556. lossless);
  557. od_raster_to_coding_order(predt, n, &pred[0], n, lossless);
  558. /* Lossless encoding uses an actual quantizer of 1, but is signalled
  559. with a 'quantizer' of 0. */
  560. quant = OD_MAXI(1, enc->quantizer[pli]);
  561. if (lossless) dc_quant = quant;
  562. else dc_quant = OD_MAXI(1, quant*OD_PVQ_QM_Q4[pli][ln][0] >> 4);
  563. /* This quantization may be overridden in the PVQ code for full RDO. */
  564. if (OD_DISABLE_HAAR_DC || !ctx->is_keyframe) {
  565. if (abs(cblock[0] - predt[0]) < dc_quant * 141 / 256) { /* 0.55 */
  566. scalar_out[0] = 0;
  567. }
  568. else {
  569. scalar_out[0] = OD_DIV_R0(cblock[0] - predt[0], dc_quant);
  570. }
  571. }
  572. OD_ACCT_UPDATE(&enc->acct, od_ec_enc_tell_frac(&enc->ec),
  573. OD_ACCT_CAT_TECHNIQUE, OD_ACCT_TECH_AC_COEFFS);
  574. if (lossless) {
  575. od_single_band_lossless_encode(enc, ln, scalar_out, cblock, predt, pli);
  576. }
  577. else {
  578. pvq_encode(enc, predt, cblock, scalar_out, quant, pli, ln,
  579. OD_PVQ_QM_Q4[pli][ln], OD_PVQ_BETA[pli][ln],
  580. OD_ROBUST_STREAM, ctx->is_keyframe);
  581. }
  582. OD_ACCT_UPDATE(&enc->acct, od_ec_enc_tell_frac(&enc->ec),
  583. OD_ACCT_CAT_TECHNIQUE, OD_ACCT_TECH_UNKNOWN);
  584. if (OD_DISABLE_HAAR_DC || !ctx->is_keyframe) {
  585. int has_dc_skip;
  586. has_dc_skip = !ctx->is_keyframe && !lossless;
  587. OD_ACCT_UPDATE(&enc->acct, od_ec_enc_tell_frac(&enc->ec),
  588. OD_ACCT_CAT_TECHNIQUE, OD_ACCT_TECH_DC_COEFF);
  589. if (!has_dc_skip || scalar_out[0]) {
  590. generic_encode(&enc->ec, &enc->state.adapt.model_dc[pli],
  591. abs(scalar_out[0]) - has_dc_skip, -1, &enc->state.adapt.ex_dc[pli][ln][0], 2);
  592. }
  593. if (scalar_out[0]) od_ec_enc_bits(&enc->ec, scalar_out[0] < 0, 1);
  594. OD_ACCT_UPDATE(&enc->acct, od_ec_enc_tell_frac(&enc->ec),
  595. OD_ACCT_CAT_TECHNIQUE, OD_ACCT_TECH_UNKNOWN);
  596. scalar_out[0] = scalar_out[0]*dc_quant;
  597. scalar_out[0] += predt[0];
  598. }
  599. else {
  600. scalar_out[0] = cblock[0];
  601. }
  602. od_coding_order_to_raster(&d[((by << 2))*w + (bx << 2)], w, scalar_out, n,
  603. lossless);
  604. /*Update the TF'd luma plane with CfL, or all the planes without CfL.*/
  605. if (ctx->is_keyframe && (pli == 0 || OD_DISABLE_CFL)) {
  606. od_convert_block_down(tf + (by << 2)*w + (bx << 2), w,
  607. d + (by << 2)*w + (bx << 2), w, ln, 0, 0);
  608. }
  609. /*Apply the inverse transform.*/
  610. #if !defined(OD_OUTPUT_PRED)
  611. (*enc->state.opt_vtbl.idct_2d[ln])(c + (by << 2)*w + (bx << 2), w,
  612. d + (by << 2)*w + (bx << 2), w);
  613. #else
  614. # if 0
  615. /*Output the resampled luma plane.*/
  616. if (pli != 0) {
  617. for (y = 0; y < n; y++) {
  618. for (x = 0; x < n; x++) {
  619. preds[y*n + x] = l[((by << 2) + y)*w + (bx << 2) + x] >> xdec;
  620. }
  621. }
  622. }
  623. # endif
  624. (*enc->state.opt_vtbl.idct_2d[ln])(c + (by << 2)*w + (bx << 2), w, preds, n);
  625. #endif
  626. }
  627. static void od_compute_dcts(daala_enc_ctx *enc, od_mb_enc_ctx *ctx, int pli,
  628. int bx, int by, int l, int xdec, int ydec) {
  629. int od;
  630. int d;
  631. int w;
  632. od_coeff *c;
  633. c = ctx->d[pli];
  634. w = enc->state.frame_width >> xdec;
  635. /*This code assumes 4:4:4 or 4:2:0 input.*/
  636. OD_ASSERT(xdec == ydec);
  637. od = OD_BLOCK_SIZE4x4(enc->state.bsize,
  638. enc->state.bstride, bx << l, by << l);
  639. d = OD_MAXI(od, xdec);
  640. OD_ASSERT(d <= l);
  641. if (d == l) {
  642. d -= xdec;
  643. (*enc->state.opt_vtbl.fdct_2d[d])(c + (by << (2 + d))*w + (bx << (2 + d)), w,
  644. ctx->c + (by << (2 + d))*w + (bx << (2 + d)), w);
  645. }
  646. else {
  647. l--;
  648. bx <<= 1;
  649. by <<= 1;
  650. od_compute_dcts(enc, ctx, pli, bx + 0, by + 0, l, xdec, ydec);
  651. od_compute_dcts(enc, ctx, pli, bx + 1, by + 0, l, xdec, ydec);
  652. od_compute_dcts(enc, ctx, pli, bx + 0, by + 1, l, xdec, ydec);
  653. od_compute_dcts(enc, ctx, pli, bx + 1, by + 1, l, xdec, ydec);
  654. if (!OD_DISABLE_HAAR_DC && ctx->is_keyframe) {
  655. od_coeff x[4];
  656. int l2;
  657. l2 = l - xdec + 2;
  658. x[0] = c[(by << l2)*w + (bx << l2)];
  659. x[1] = c[(by << l2)*w + ((bx + 1) << l2)];
  660. x[2] = c[((by + 1) << l2)*w + (bx << l2)];
  661. x[3] = c[((by + 1) << l2)*w + ((bx + 1) << l2)];
  662. OD_HAAR_KERNEL(x[0], x[2], x[1], x[3]);
  663. c[(by << l2)*w + (bx << l2)] = x[0];
  664. c[(by << l2)*w + ((bx + 1) << l2)] = x[1];
  665. c[((by + 1) << l2)*w + (bx << l2)] = x[2];
  666. c[((by + 1) << l2)*w + ((bx + 1) << l2)] = x[3];
  667. }
  668. }
  669. }
  670. #if !OD_DISABLE_HAAR_DC
  671. static void od_quantize_haar_dc(daala_enc_ctx *enc, od_mb_enc_ctx *ctx,
  672. int pli, int bx, int by, int l, int xdec, int ydec, od_coeff hgrad,
  673. od_coeff vgrad, int has_ur) {
  674. int od;
  675. int d;
  676. int w;
  677. int i;
  678. int dc_quant;
  679. od_coeff *c;
  680. c = ctx->d[pli];
  681. w = enc->state.frame_width >> xdec;
  682. /*This code assumes 4:4:4 or 4:2:0 input.*/
  683. OD_ASSERT(xdec == ydec);
  684. od = OD_BLOCK_SIZE4x4(enc->state.bsize,
  685. enc->state.bstride, bx << l, by << l);
  686. d = OD_MAXI(od, xdec);
  687. OD_ASSERT(d <= l);
  688. if (enc->quantizer[pli] == 0) dc_quant = 1;
  689. else {
  690. dc_quant = OD_MAXI(1, enc->quantizer[pli]*OD_DC_RES[pli] >> 4);
  691. }
  692. OD_ACCT_UPDATE(&enc->acct, od_ec_enc_tell_frac(&enc->ec),
  693. OD_ACCT_CAT_TECHNIQUE, OD_ACCT_TECH_DC_COEFF);
  694. if (l == 3) {
  695. int nhsb;
  696. int quant;
  697. int dc0;
  698. int l2;
  699. od_coeff sb_dc_pred;
  700. od_coeff sb_dc_curr;
  701. od_coeff *sb_dc_mem;
  702. nhsb = enc->state.nhsb;
  703. sb_dc_mem = enc->state.sb_dc_mem[pli];
  704. l2 = l - xdec + 2;
  705. if (by > 0 && bx > 0) {
  706. /* These coeffs were LS-optimized on subset 1. */
  707. if (has_ur) {
  708. sb_dc_pred = (22*sb_dc_mem[by*nhsb + bx - 1]
  709. - 9*sb_dc_mem[(by - 1)*nhsb + bx - 1]
  710. + 15*sb_dc_mem[(by - 1)*nhsb + bx]
  711. + 4*sb_dc_mem[(by - 1)*nhsb + bx + 1] + 16) >> 5;
  712. }
  713. else {
  714. sb_dc_pred = (23*sb_dc_mem[by*nhsb + bx - 1]
  715. - 10*sb_dc_mem[(by - 1)*nhsb + bx - 1]
  716. + 19*sb_dc_mem[(by - 1)*nhsb + bx] + 16) >> 5;
  717. }
  718. }
  719. else if (by > 0) sb_dc_pred = sb_dc_mem[(by - 1)*nhsb + bx];
  720. else if (bx > 0) sb_dc_pred = sb_dc_mem[by*nhsb + bx - 1];
  721. else sb_dc_pred = 0;
  722. dc0 = c[(by << l2)*w + (bx << l2)] - sb_dc_pred;
  723. quant = OD_DIV_R0(dc0, dc_quant);
  724. generic_encode(&enc->ec, &enc->state.adapt.model_dc[pli], abs(quant), -1,
  725. &enc->state.adapt.ex_sb_dc[pli], 2);
  726. if (quant) od_ec_enc_bits(&enc->ec, quant < 0, 1);
  727. sb_dc_curr = quant*dc_quant + sb_dc_pred;
  728. c[(by << l2)*w + (bx << l2)] = sb_dc_curr;
  729. sb_dc_mem[by*nhsb + bx] = sb_dc_curr;
  730. if (by > 0) vgrad = sb_dc_mem[(by - 1)*nhsb + bx] - sb_dc_curr;
  731. if (bx > 0) hgrad = sb_dc_mem[by*nhsb + bx - 1]- sb_dc_curr;
  732. }
  733. if (l > d) {
  734. od_coeff x[4];
  735. int l2;
  736. l--;
  737. bx <<= 1;
  738. by <<= 1;
  739. l2 = l - xdec + 2;
  740. x[0] = c[(by << l2)*w + (bx << l2)];
  741. x[1] = c[(by << l2)*w + ((bx + 1) << l2)];
  742. x[2] = c[((by + 1) << l2)*w + (bx << l2)];
  743. x[3] = c[((by + 1) << l2)*w + ((bx + 1) << l2)];
  744. x[1] -= hgrad/5;
  745. x[2] -= vgrad/5;
  746. for (i = 1; i < 4; i++) {
  747. int quant;
  748. quant = OD_DIV_R0(x[i], dc_quant);
  749. generic_encode(&enc->ec, &enc->state.adapt.model_dc[pli], abs(quant), -1,
  750. &enc->state.adapt.ex_dc[pli][l][i-1], 2);
  751. if (quant) od_ec_enc_bits(&enc->ec, quant < 0, 1);
  752. x[i] = quant*dc_quant;
  753. }
  754. /* Gives best results for subset1, more conservative than the
  755. theoretical /4 of a pure gradient. */
  756. x[1] += hgrad/5;
  757. x[2] += vgrad/5;
  758. hgrad = x[1];
  759. vgrad = x[2];
  760. OD_HAAR_KERNEL(x[0], x[1], x[2], x[3]);
  761. c[(by << l2)*w + (bx << l2)] = x[0];
  762. c[(by << l2)*w + ((bx + 1) << l2)] = x[1];
  763. c[((by + 1) << l2)*w + (bx << l2)] = x[2];
  764. c[((by + 1) << l2)*w + ((bx + 1) << l2)] = x[3];
  765. od_quantize_haar_dc(enc, ctx, pli, bx + 0, by + 0, l, xdec, ydec, hgrad,
  766. vgrad, 0);
  767. od_quantize_haar_dc(enc, ctx, pli, bx + 1, by + 0, l, xdec, ydec, hgrad,
  768. vgrad, 0);
  769. od_quantize_haar_dc(enc, ctx, pli, bx + 0, by + 1, l, xdec, ydec, hgrad,
  770. vgrad, 0);
  771. od_quantize_haar_dc(enc, ctx, pli, bx + 1, by + 1, l, xdec, ydec, hgrad,
  772. vgrad, 0);
  773. }
  774. OD_ACCT_UPDATE(&enc->acct, od_ec_enc_tell_frac(&enc->ec),
  775. OD_ACCT_CAT_TECHNIQUE, OD_ACCT_TECH_UNKNOWN);
  776. }
  777. #endif
  778. static void od_encode_recursive(daala_enc_ctx *enc, od_mb_enc_ctx *ctx,
  779. int pli, int bx, int by, int l, int xdec, int ydec, int has_ur) {
  780. int od;
  781. int d;
  782. /*This code assumes 4:4:4 or 4:2:0 input.*/
  783. OD_ASSERT(xdec == ydec);
  784. od = OD_BLOCK_SIZE4x4(enc->state.bsize,
  785. enc->state.bstride, bx << l, by << l);
  786. d = OD_MAXI(od, xdec);
  787. OD_ASSERT(d <= l);
  788. if (d == l) {
  789. d -= xdec;
  790. /*Construct the luma predictors for chroma planes.*/
  791. if (ctx->l != NULL) {
  792. int w;
  793. int frame_width;
  794. OD_ASSERT(pli > 0);
  795. frame_width = enc->state.frame_width;
  796. w = frame_width >> xdec;
  797. od_resample_luma_coeffs(ctx->l + (by << (2 + d))*w + (bx << (2 + d)), w,
  798. ctx->d[0] + (by << (2 + l))*frame_width + (bx << (2 + l)),
  799. frame_width, xdec, ydec, d, od);
  800. }
  801. od_block_encode(enc, ctx, d, pli, bx, by, has_ur);
  802. }
  803. else {
  804. l--;
  805. bx <<= 1;
  806. by <<= 1;
  807. od_encode_recursive(enc, ctx, pli, bx + 0, by + 0, l, xdec, ydec, 1);
  808. od_encode_recursive(enc, ctx, pli, bx + 1, by + 0, l, xdec, ydec, has_ur);
  809. od_encode_recursive(enc, ctx, pli, bx + 0, by + 1, l, xdec, ydec, 1);
  810. od_encode_recursive(enc, ctx, pli, bx + 1, by + 1, l, xdec, ydec, 0);
  811. }
  812. }
  813. static void od_encode_mv(daala_enc_ctx *enc, od_mv_grid_pt *mvg, int vx,
  814. int vy, int level, int mv_res, int width, int height) {
  815. generic_encoder *model;
  816. int pred[2];
  817. int ox;
  818. int oy;
  819. int id;
  820. od_state_get_predictor(&enc->state, pred, vx, vy, level, mv_res);
  821. ox = (mvg->mv[0] >> mv_res) - pred[0];
  822. oy = (mvg->mv[1] >> mv_res) - pred[1];
  823. /*Interleave positive and negative values.*/
  824. model = &enc->state.adapt.mv_model;
  825. id = OD_MINI(abs(oy), 3)*4 + OD_MINI(abs(ox), 3);
  826. od_encode_cdf_adapt(&enc->ec, id, enc->state.adapt.mv_small_cdf, 16,
  827. enc->state.adapt.mv_small_increment);
  828. if (abs(ox) >= 3) {
  829. generic_encode(&enc->ec, model, abs(ox) - 3, width << (3 - mv_res),
  830. &enc->state.adapt.mv_ex[level], 6);
  831. }
  832. if (abs(oy) >= 3) {
  833. generic_encode(&enc->ec, model, abs(oy) - 3, height << (3 - mv_res),
  834. &enc->state.adapt.mv_ey[level], 6);
  835. }
  836. if (abs(ox)) od_ec_enc_bits(&enc->ec, ox < 0, 1);
  837. if (abs(oy)) od_ec_enc_bits(&enc->ec, oy < 0, 1);
  838. }
  839. static void od_img_copy_pad(od_state *state, od_img *img) {
  840. int pli;
  841. int nplanes;
  842. nplanes = img->nplanes;
  843. /* Copy and pad the image. */
  844. for (pli = 0; pli < nplanes; pli++) {
  845. od_img_plane plane;
  846. int plane_width;
  847. int plane_height;
  848. int xdec;
  849. int ydec;
  850. *&plane = *(img->planes + pli);
  851. xdec = plane.xdec;
  852. ydec = plane.ydec;
  853. plane_width = ((state->info.pic_width + (1 << xdec) - 1) >> xdec);
  854. plane_height = ((state->info.pic_height + (1 << ydec) - 1) >> ydec);
  855. od_img_plane_copy_pad8(&state->io_imgs[OD_FRAME_INPUT].planes[pli],
  856. state->frame_width >> xdec, state->frame_height >> ydec,
  857. &plane, plane_width, plane_height);
  858. od_img_plane_edge_ext8(&state->io_imgs[OD_FRAME_INPUT].planes[pli],
  859. state->frame_width >> xdec, state->frame_height >> ydec,
  860. OD_UMV_PADDING >> xdec, OD_UMV_PADDING >> ydec);
  861. }
  862. }
  863. #if defined(OD_DUMP_IMAGES)
  864. static void od_img_dump_padded(od_state *state) {
  865. daala_info *info;
  866. od_img img;
  867. int nplanes;
  868. int pli;
  869. info = &state->info;
  870. nplanes = info->nplanes;
  871. /*Modify the image offsets to include the padding.*/
  872. *&img = *(state->io_imgs+OD_FRAME_INPUT);
  873. for (pli = 0; pli < nplanes; pli++) {
  874. img.planes[pli].data -= (OD_UMV_PADDING>>info->plane_info[pli].xdec)
  875. +img.planes[pli].ystride*(OD_UMV_PADDING>>info->plane_info[pli].ydec);
  876. }
  877. img.width += OD_UMV_PADDING<<1;
  878. img.height += OD_UMV_PADDING<<1;
  879. od_state_dump_img(state, &img, "pad");
  880. }
  881. #endif
  882. static void od_predict_frame(daala_enc_ctx *enc) {
  883. int nplanes;
  884. int pli;
  885. int frame_width;
  886. int frame_height;
  887. nplanes = enc->state.info.nplanes;
  888. frame_width = enc->state.frame_width;
  889. frame_height = enc->state.frame_height;
  890. #if defined(OD_DUMP_IMAGES) && defined(OD_ANIMATE)
  891. enc->state.ani_iter = 0;
  892. #endif
  893. OD_LOG((OD_LOG_ENCODER, OD_LOG_INFO, "Predicting frame %i:",
  894. (int)daala_granule_basetime(enc, enc->state.cur_time)));
  895. /*2851196 ~= sqrt(ln(2)/6) in Q23.
  896. The lower bound of 56 is there because we do not yet consider PVQ noref
  897. flags during the motion search, so we waste far too many bits trying to
  898. predict unpredictable areas when lamba is too small.
  899. Hopefully when we fix that, we can remove the limit.*/
  900. od_mv_est(enc->mvest, OD_FRAME_PREV,
  901. OD_MAXI((2851196 + (((1 << OD_COEFF_SHIFT) - 1) >> 1) >> OD_COEFF_SHIFT)*
  902. enc->quantizer[0] >> (23 - OD_LAMBDA_SCALE), 56));
  903. od_state_mc_predict(&enc->state, OD_FRAME_PREV);
  904. /*Do edge extension here because the block-size analysis needs to read
  905. outside the frame, but otherwise isn't read from.*/
  906. for (pli = 0; pli < nplanes; pli++) {
  907. od_img_plane plane;
  908. *&plane = *(enc->state.io_imgs[OD_FRAME_REC].planes + pli);
  909. od_img_plane_edge_ext8(&plane, frame_width >> plane.xdec,
  910. frame_height >> plane.ydec, OD_UMV_PADDING >> plane.xdec,
  911. OD_UMV_PADDING >> plane.ydec);
  912. }
  913. #if defined(OD_DUMP_IMAGES)
  914. /*Dump reconstructed frame.*/
  915. /*od_state_dump_img(&enc->state,enc->state.io_imgs + OD_FRAME_REC,"rec");*/
  916. od_state_fill_vis(&enc->state);
  917. od_state_dump_img(&enc->state, &enc->state.vis_img, "vis");
  918. #endif
  919. }
  920. static void od_split_superblocks(daala_enc_ctx *enc, int is_keyframe) {
  921. int nhsb;
  922. int nvsb;
  923. int i;
  924. int j;
  925. int k;
  926. int m;
  927. od_state *state;
  928. state = &enc->state;
  929. nhsb = state->nhsb;
  930. nvsb = state->nvsb;
  931. od_state_init_border(state);
  932. /* Allocate a blockSizeComp for scratch space and then calculate the block
  933. sizes eventually store them in bsize. */
  934. od_log_matrix_uchar(OD_LOG_GENERIC, OD_LOG_INFO, "bimg ",
  935. state->io_imgs[OD_FRAME_INPUT].planes[0].data -
  936. 16*state->io_imgs[OD_FRAME_INPUT].planes[0].ystride - 16,
  937. state->io_imgs[OD_FRAME_INPUT].planes[0].ystride, (nvsb + 1)*32);
  938. OD_ACCT_UPDATE(&enc->acct, od_ec_enc_tell_frac(&enc->ec),
  939. OD_ACCT_CAT_TECHNIQUE, OD_ACCT_TECH_BLOCK_SIZE);
  940. OD_ACCT_UPDATE(&enc->acct, od_ec_enc_tell_frac(&enc->ec),
  941. OD_ACCT_CAT_PLANE, OD_ACCT_PLANE_FRAME);
  942. for (i = 0; i < nvsb; i++) {
  943. unsigned char *bimg;
  944. unsigned char *rimg;
  945. int istride;
  946. int rstride;
  947. int bstride;
  948. bstride = state->bstride;
  949. istride = state->io_imgs[OD_FRAME_INPUT].planes[0].ystride;
  950. rstride = is_keyframe ? 0 :
  951. state->io_imgs[OD_FRAME_REC].planes[0].ystride;
  952. bimg = state->io_imgs[OD_FRAME_INPUT].planes[0].data + i*istride*32;
  953. rimg = state->io_imgs[OD_FRAME_REC].planes[0].data + i*rstride*32;
  954. for (j = 0; j < nhsb; j++) {
  955. int bsize[4][4];
  956. unsigned char *state_bsize;
  957. state_bsize = &state->bsize[i*4*state->bstride + j*4];
  958. od_split_superblock(enc->bs, bimg + j*32, istride,
  959. is_keyframe ? NULL : rimg + j*32, rstride, bsize, enc->quantizer[0]);
  960. /* Grab the 4x4 information returned from `od_split_superblock` in bsize
  961. and store it in the od_state bsize. */
  962. for (k = 0; k < 4; k++) {
  963. for (m = 0; m < 4; m++) {
  964. if (OD_LIMIT_BSIZE_MIN != OD_LIMIT_BSIZE_MAX) {
  965. state_bsize[k*bstride + m] =
  966. OD_MAXI(OD_MINI(bsize[k][m], OD_LIMIT_BSIZE_MAX),
  967. OD_LIMIT_BSIZE_MIN);
  968. }
  969. else {
  970. state_bsize[k*bstride + m] = OD_LIMIT_BSIZE_MIN;
  971. }
  972. }
  973. }
  974. if (OD_LIMIT_BSIZE_MIN != OD_LIMIT_BSIZE_MAX) {
  975. od_block_size_encode(&enc->ec, &enc->state.adapt, &state_bsize[0],
  976. bstride);
  977. }
  978. }
  979. }
  980. OD_ACCT_UPDATE(&enc->acct, od_ec_enc_tell_frac(&enc->ec),
  981. OD_ACCT_CAT_TECHNIQUE, OD_ACCT_TECH_UNKNOWN);
  982. od_log_matrix_uchar(OD_LOG_GENERIC, OD_LOG_INFO, "bsize ", state->bsize,
  983. state->bstride, (nvsb + 1)*4);
  984. for (i = 0; i < nvsb*4; i++) {
  985. for (j = 0; j < nhsb*4; j++) {
  986. OD_LOG_PARTIAL((OD_LOG_GENERIC, OD_LOG_INFO, "%d ",
  987. state->bsize[i*state->bstride + j]));
  988. }
  989. OD_LOG_PARTIAL((OD_LOG_GENERIC, OD_LOG_INFO, "\n"));
  990. }
  991. }
  992. static void od_encode_mvs(daala_enc_ctx *enc) {
  993. int nhmvbs;
  994. int nvmvbs;
  995. int vx;
  996. int vy;
  997. od_img *mvimg;
  998. int width;
  999. int height;
  1000. int mv_res;
  1001. od_mv_grid_pt *mvp;
  1002. od_mv_grid_pt *other;
  1003. od_mv_grid_pt **grid;
  1004. OD_ACCT_UPDATE(&enc->acct, od_ec_enc_tell_frac(&enc->ec),
  1005. OD_ACCT_CAT_TECHNIQUE, OD_ACCT_TECH_MOTION_VECTORS);
  1006. nhmvbs = (enc->state.nhmbs + 1) << 2;
  1007. nvmvbs = (enc->state.nvmbs + 1) << 2;
  1008. mvimg = enc->state.io_imgs + OD_FRAME_REC;
  1009. mv_res = enc->state.mv_res;
  1010. OD_ASSERT(0 <= mv_res && mv_res < 3);
  1011. od_ec_enc_uint(&enc->ec, mv_res, 3);
  1012. width = (mvimg->width + 32) << (3 - mv_res);
  1013. height = (mvimg->height + 32) << (3 - mv_res);
  1014. grid = enc->state.mv_grid;
  1015. /*Code the motion vectors and flags. At each level, the MVs are zero
  1016. outside of the frame, so don't code them.*/
  1017. /*Level 0.*/
  1018. for (vy = 4; vy < nvmvbs; vy += 4) {
  1019. for (vx = 4; vx < nhmvbs; vx += 4) {
  1020. mvp = &grid[vy][vx];
  1021. od_encode_mv(enc, mvp, vx, vy, 0, mv_res, width, height);
  1022. }
  1023. }
  1024. /*od_ec_acct_add_label(&enc->ec.acct, "mvf-l1");
  1025. od_ec_acct_add_label(&enc->ec.acct, "mvf-l2");
  1026. od_ec_acct_add_label(&enc->ec.acct, "mvf-l3");
  1027. od_ec_acct_add_label(&enc->ec.acct, "mvf-l4");*/
  1028. /*Level 1.*/
  1029. for (vy = 2; vy <= nvmvbs; vy += 4) {
  1030. for (vx = 2; vx <= nhmvbs; vx += 4) {
  1031. int p_invalid;
  1032. p_invalid = od_mv_level1_probz(grid, vx, vy);
  1033. mvp = &(grid[vy][vx]);
  1034. /*od_ec_acct_record(&enc->ec.acct, "mvf-l1", mvp->valid, 2,
  1035. od_mv_level1_ctx(grid, vx, vy));*/
  1036. if (p_invalid >= 16384) {
  1037. od_ec_encode_bool_q15(&enc->ec, mvp->valid, p_invalid);
  1038. }
  1039. else {
  1040. od_ec_encode_bool_q15(&enc->ec, !mvp->valid, 32768 - p_invalid);
  1041. }
  1042. if (mvp->valid) {
  1043. od_encode_mv(enc, mvp, vx, vy, 1, mv_res, width, height);
  1044. }
  1045. }
  1046. }
  1047. /*Level 2.*/
  1048. for (vy = 0; vy <= nvmvbs; vy += 2) {
  1049. for (vx = 2*((vy & 3) == 0); vx <= nhmvbs; vx += 4) {
  1050. mvp = &grid[vy][vx];
  1051. if ((vy-2 < 0 || grid[vy-2][vx].valid)
  1052. && (vx-2 < 0 || grid[vy][vx-2].valid)
  1053. && (vy+2 > nvmvbs || grid[vy+2][vx].valid)
  1054. && (vx+2 > nhmvbs || grid[vy][vx+2].valid)) {
  1055. int p_invalid;
  1056. p_invalid = od_mv_level2_probz(grid, vx, vy);
  1057. /*od_ec_acct_record(&enc->ec.acct, "mvf-l2", mvp->valid, 2,
  1058. od_mv_level2_ctx(grid, vx, vy));*/
  1059. if (p_invalid >= 16384) {
  1060. od_ec_encode_bool_q15(&enc->ec, mvp->valid, p_invalid);
  1061. }
  1062. else {
  1063. od_ec_encode_bool_q15(&enc->ec, !mvp->valid, 32768 - p_invalid);
  1064. }
  1065. if (mvp->valid && vx >= 2 && vy >= 2 && vx <= nhmvbs - 2 &&
  1066. vy <= nvmvbs - 2) {
  1067. od_encode_mv(enc, mvp, vx, vy, 2, mv_res, width, height);
  1068. }
  1069. }
  1070. }
  1071. }
  1072. /*Level 3.*/
  1073. /*Level 3 motion vector flags outside the frame are specially coded
  1074. since more information is known. On the grid edge, an L2 MV will only be
  1075. valid if a L3 MV is needed outside of the frame. In the middle of the
  1076. edge, this implies a tristate of the two possible child L3 MVs; they
  1077. can't both be invalid. At the corner, one of the child L3 vectors will
  1078. never appear, so an L2 MV directly implies the remaining L3 child.*/
  1079. for (vy = 1; vy <= nvmvbs; vy += 2) {
  1080. for (vx = 1; vx <= nhmvbs; vx += 2) {
  1081. mvp = &grid[vy][vx];
  1082. if (vy < 2 || vy > nvmvbs - 2) {
  1083. if ((vx == 3 && grid[vy == 1 ? vy - 1 : vy + 1][vx - 1].valid)
  1084. || (vx == nhmvbs - 3
  1085. && grid[vy == 1 ? vy - 1 : vy + 1][vx + 1].valid)) {
  1086. other = &grid[vy][vx == 3 ? vx - 2 : vx + 2];
  1087. /*MVs are valid but will be zero.*/
  1088. OD_ASSERT(mvp->valid && !mvp->mv[0] && !mvp->mv[1]
  1089. && !other->valid);
  1090. }
  1091. else if (vx > 3 && vx < nhmvbs - 3) {
  1092. other = &grid[vy][vx + 2];
  1093. if (!(vx & 2) && grid[vy == 1 ? vy - 1 : vy + 1][vx + 1].valid) {
  1094. /*0 = both valid, 1 = only this one, 2 = other one valid*/
  1095. int s;
  1096. s = mvp->valid && other->valid ? 0 : mvp->valid
  1097. + (other->valid << 1);
  1098. od_ec_encode_cdf_q15(&enc->ec, s, OD_UNIFORM_CDF_Q15(3), 3);
  1099. /*MVs are valid but will be zero.*/
  1100. OD_ASSERT((mvp->valid && !mvp->mv[0] && !mvp->mv[1])
  1101. || (other->valid && !other->mv[0] && !other->mv[1]));
  1102. }
  1103. else if (!(vx & 2)) {
  1104. OD_ASSERT(!mvp->valid && !other->valid);
  1105. }
  1106. }
  1107. else {
  1108. OD_ASSERT(!mvp->valid);
  1109. }
  1110. }
  1111. else if (vx < 2 || vx > nhmvbs - 2) {
  1112. od_mv_grid_pt *other;
  1113. if ((vy == 3 && grid[vy - 1][vx == 1 ? vx - 1 : vx + 1].valid)
  1114. || (vy == nvmvbs - 3
  1115. && grid[vy + 1][vx == 1 ? vx - 1 : vx + 1].valid)) {
  1116. other = &grid[vy == 3 ? vy - 2 : vy + 2][vx];
  1117. /*MVs are valid but will be zero.*/
  1118. OD_ASSERT(mvp->valid && !mvp->mv[0] && !mvp->mv[1]
  1119. && !other->valid);
  1120. }
  1121. else if (!(vy & 2) && grid[vy + 1][vx == 1 ? vx - 1 : vx + 1].valid) {
  1122. int s;
  1123. other = &grid[vy + 2][vx];
  1124. s = mvp->valid && other->valid ? 0 : mvp->valid
  1125. + (other->valid << 1);
  1126. od_ec_encode_cdf_q15(&enc->ec, s, OD_UNIFORM_CDF_Q15(3), 3);
  1127. /*MVs are valid but will be zero.*/
  1128. OD_ASSERT((mvp->valid && !mvp->mv[0] && !mvp->mv[1])
  1129. || (other->valid && !other->mv[0] && !other->mv[1]));
  1130. }
  1131. else if (!(vy & 2)) {
  1132. other = &grid[vy == 3 ? vy - 2 : vy + 2][vx];
  1133. OD_ASSERT(!mvp->valid && !other->valid);
  1134. }
  1135. }
  1136. else if (grid[vy - 1][vx - 1].valid && grid[vy - 1][vx + 1].valid
  1137. && grid[vy + 1][vx + 1].valid && grid[vy + 1][vx - 1].valid) {
  1138. int p_invalid;
  1139. p_invalid = od_mv_level3_probz(grid, vx, vy);
  1140. /*od_ec_acct_record(&enc->ec.acct, "mvf-l3", mvp->valid, 2,
  1141. od_mv_level3_ctx(grid, vx, vy));*/
  1142. if (p_invalid >= 16384) {
  1143. od_ec_encode_bool_q15(&enc->ec, mvp->valid, p_invalid);
  1144. }
  1145. else {
  1146. od_ec_encode_bool_q15(&enc->ec, !mvp->valid, 32768 - p_invalid);
  1147. }
  1148. if (mvp->valid) {
  1149. od_encode_mv(enc, mvp, vx, vy, 3, mv_res, width, height);
  1150. }
  1151. }
  1152. else {
  1153. OD_ASSERT(!mvp->valid);
  1154. }
  1155. }
  1156. }
  1157. /*Level 4.*/
  1158. for (vy = 2; vy <= nvmvbs - 2; vy += 1) {
  1159. for (vx = 3 - (vy & 1); vx <= nhmvbs - 2; vx += 2) {
  1160. mvp = &grid[vy][vx];
  1161. if (grid[vy-1][vx].valid && grid[vy][vx-1].valid
  1162. && grid[vy+1][vx].valid && grid[vy][vx+1].valid) {
  1163. int p_invalid;
  1164. p_invalid = od_mv_level4_probz(grid, vx, vy);
  1165. /*od_ec_acct_record(&enc->ec.acct, "mvf-l4", mvp->valid, 2,
  1166. od_mv_level4_ctx(grid, vx, vy));*/
  1167. if (p_invalid >= 16384) {
  1168. od_ec_encode_bool_q15(&enc->ec, mvp->valid, p_invalid);
  1169. }
  1170. else {
  1171. od_ec_encode_bool_q15(&enc->ec, !mvp->valid, 32768 - p_invalid);
  1172. }
  1173. if (mvp->valid) {
  1174. od_encode_mv(enc, mvp, vx, vy, 4, mv_res, width, height);
  1175. }
  1176. else {
  1177. OD_ASSERT(!mvp->valid);
  1178. }
  1179. }
  1180. }
  1181. }
  1182. OD_ACCT_UPDATE(&enc->acct, od_ec_enc_tell_frac(&enc->ec),
  1183. OD_ACCT_CAT_TECHNIQUE, OD_ACCT_TECH_UNKNOWN);
  1184. }
  1185. static void od_encode_residual(daala_enc_ctx *enc, od_mb_enc_ctx *mbctx) {
  1186. int xdec;
  1187. int ydec;
  1188. int sby;
  1189. int sbx;
  1190. int h;
  1191. int w;
  1192. int y;
  1193. int x;
  1194. int pli;
  1195. int nplanes;
  1196. int frame_width;
  1197. int frame_height;
  1198. int nhsb;
  1199. int nvsb;
  1200. od_state *state = &enc->state;
  1201. nplanes = state->info.nplanes;
  1202. frame_width = state->frame_width;
  1203. frame_height = state->frame_height;
  1204. nhsb = state->nhsb;
  1205. nvsb = state->nvsb;
  1206. for (pli = 0; pli < nplanes; pli++) {
  1207. xdec = state->io_imgs[OD_FRAME_INPUT].planes[pli].xdec;
  1208. ydec = state->io_imgs[OD_FRAME_INPUT].planes[pli].ydec;
  1209. w = frame_width >> xdec;
  1210. h = frame_height >> ydec;
  1211. OD_ACCT_UPDATE(&enc->acct, od_ec_enc_tell_frac(&enc->ec),
  1212. OD_ACCT_CAT_TECHNIQUE, OD_ACCT_TECH_FRAME);
  1213. OD_ACCT_UPDATE(&enc->acct, od_ec_enc_tell_frac(&enc->ec),
  1214. OD_ACCT_CAT_PLANE, OD_ACCT_PLANE_FRAME);
  1215. /* TODO: We shouldn't be encoding the full, linear quantizer range. */
  1216. od_ec_enc_uint(&enc->ec, enc->quantizer[pli], 512<<OD_COEFF_SHIFT);
  1217. /*If the quantizer is zero (lossless), force scalar.*/
  1218. OD_ACCT_UPDATE(&enc->acct, od_ec_enc_tell_frac(&enc->ec),
  1219. OD_ACCT_CAT_TECHNIQUE, OD_ACCT_TECH_UNKNOWN);
  1220. OD_ACCT_UPDATE(&enc->acct, od_ec_enc_tell_frac(&enc->ec),
  1221. OD_ACCT_CAT_PLANE, OD_ACCT_PLANE_UNKNOWN);
  1222. }
  1223. for (pli = 0; pli < nplanes; pli++) {
  1224. xdec = state->io_imgs[OD_FRAME_INPUT].planes[pli].xdec;
  1225. ydec = state->io_imgs[OD_FRAME_INPUT].planes[pli].ydec;
  1226. w = frame_width >> xdec;
  1227. h = frame_height >> ydec;
  1228. /*Collect the image data needed for this plane.*/
  1229. {
  1230. unsigned char *data;
  1231. unsigned char *mdata;
  1232. int ystride;
  1233. int coeff_shift;
  1234. coeff_shift = enc->quantizer[pli] == 0 ? 0 : OD_COEFF_SHIFT;
  1235. data = state->io_imgs[OD_FRAME_INPUT].planes[pli].data;
  1236. mdata = state->io_imgs[OD_FRAME_REC].planes[pli].data;
  1237. ystride = state->io_imgs[OD_FRAME_INPUT].planes[pli].ystride;
  1238. for (y = 0; y < h; y++) {
  1239. for (x = 0; x < w; x++) {
  1240. state->ctmp[pli][y*w + x] = (data[ystride*y + x] - 128) <<
  1241. coeff_shift;
  1242. if (!mbctx->is_keyframe) {
  1243. state->mctmp[pli][y*w + x] = (mdata[ystride*y + x] - 128)
  1244. << coeff_shift;
  1245. }
  1246. }
  1247. }
  1248. }
  1249. /*Apply the prefilter across the entire image.*/
  1250. for (sby = 0; sby < nvsb; sby++) {
  1251. for (sbx = 0; sbx < nhsb; sbx++) {
  1252. od_apply_prefilter(state->ctmp[pli], w, sbx, sby, 3,
  1253. state->bsize, state->bstride, xdec, ydec,
  1254. (sbx > 0 ? OD_LEFT_EDGE : 0) |
  1255. (sby < nvsb - 1 ? OD_BOTTOM_EDGE : 0));
  1256. if (!mbctx->is_keyframe) {
  1257. od_apply_prefilter(state->mctmp[pli], w, sbx, sby, 3, state->bsize,
  1258. state->bstride, xdec, ydec, (sbx > 0 ? OD_LEFT_EDGE : 0) |
  1259. (sby < nvsb - 1 ? OD_BOTTOM_EDGE : 0));
  1260. }
  1261. }
  1262. }
  1263. }
  1264. for (sby = 0; sby < nvsb; sby++) {
  1265. for (sbx = 0; sbx < nhsb; sbx++) {
  1266. for (pli = 0; pli < nplanes; pli++) {
  1267. OD_ACCT_UPDATE(&enc->acct, od_ec_enc_tell_frac(&enc->ec),
  1268. OD_ACCT_CAT_PLANE, OD_ACCT_PLANE_LUMA + pli);
  1269. mbctx->c = state->ctmp[pli];
  1270. mbctx->d = state->dtmp;
  1271. mbctx->mc = state->mctmp[pli];
  1272. mbctx->md = state->mdtmp[pli];
  1273. mbctx->l = state->lbuf[pli];
  1274. xdec = state->io_imgs[OD_FRAME_INPUT].planes[pli].xdec;
  1275. ydec = state->io_imgs[OD_FRAME_INPUT].planes[pli].ydec;
  1276. mbctx->nk = mbctx->k_total = mbctx->sum_ex_total_q8 = 0;
  1277. mbctx->ncount = mbctx->count_total_q8 = mbctx->count_ex_total_q8 = 0;
  1278. /*Need to update this to decay based on superblocks width.*/
  1279. od_compute_dcts(enc, mbctx, pli, sbx, sby, 3, xdec, ydec);
  1280. if (!OD_DISABLE_HAAR_DC && mbctx->is_keyframe) {
  1281. od_quantize_haar_dc(enc, mbctx, pli, sbx, sby, 3, xdec, ydec, 0,
  1282. 0, sby > 0 && sbx < nhsb - 1);
  1283. }
  1284. od_encode_recursive(enc, mbctx, pli, sbx, sby, 3, xdec, ydec,
  1285. sby > 0 && sbx < nhsb - 1);
  1286. }
  1287. OD_ACCT_UPDATE(&enc->acct, od_ec_enc_tell_frac(&enc->ec),
  1288. OD_ACCT_CAT_PLANE, OD_ACCT_PLANE_UNKNOWN);
  1289. }
  1290. }
  1291. for (pli = 0; pli < nplanes; pli++) {
  1292. xdec = state->io_imgs[OD_FRAME_INPUT].planes[pli].xdec;
  1293. ydec = state->io_imgs[OD_FRAME_INPUT].planes[pli].ydec;
  1294. w = frame_width >> xdec;
  1295. h = frame_height >> ydec;
  1296. /*Apply the postfilter across the entire image.*/
  1297. for (sby = 0; sby < nvsb; sby++) {
  1298. for (sbx = 0; sbx < nhsb; sbx++) {
  1299. od_apply_postfilter(state->ctmp[pli], w, sbx, sby, 3, state->bsize,
  1300. state->bstride, xdec, ydec,
  1301. (sby > 0 ? OD_TOP_EDGE : 0) | (sbx < nhsb - 1 ? OD_RIGHT_EDGE : 0));
  1302. }
  1303. }
  1304. {
  1305. unsigned char *data;
  1306. int ystride;
  1307. int coeff_shift;
  1308. coeff_shift = enc->quantizer[pli] == 0 ? 0 : OD_COEFF_SHIFT;
  1309. data = state->io_imgs[OD_FRAME_REC].planes[pli].data;
  1310. ystride = state->io_imgs[OD_FRAME_INPUT].planes[pli].ystride;
  1311. for (y = 0; y < h; y++) {
  1312. for (x = 0; x < w; x++) {
  1313. data[ystride*y + x] = OD_CLAMP255(((state->ctmp[pli][y*w + x]
  1314. + (1 << coeff_shift >> 1)) >> coeff_shift) + 128);
  1315. }
  1316. }
  1317. }
  1318. }
  1319. }
  1320. #if defined(OD_LOGGING_ENABLED)
  1321. static void od_dump_frame_metrics(od_state *state) {
  1322. int pli;
  1323. int nplanes;
  1324. int frame_width;
  1325. int frame_height;
  1326. nplanes = state->info.nplanes;
  1327. frame_width = state->frame_width;
  1328. frame_height = state->frame_height;
  1329. for (pli = 0; pli < nplanes; pli++) {
  1330. unsigned char *data;
  1331. ogg_int64_t enc_sqerr;
  1332. ogg_uint32_t npixels;
  1333. int ystride;
  1334. int xdec;
  1335. int ydec;
  1336. int w;
  1337. int h;
  1338. int x;
  1339. int y;
  1340. enc_sqerr = 0;
  1341. data = state->io_imgs[OD_FRAME_INPUT].planes[pli].data;
  1342. ystride = state->io_imgs[OD_FRAME_INPUT].planes[pli].ystride;
  1343. xdec = state->io_imgs[OD_FRAME_INPUT].planes[pli].xdec;
  1344. ydec = state->io_imgs[OD_FRAME_INPUT].planes[pli].ydec;
  1345. w = frame_width >> xdec;
  1346. h = frame_height >> ydec;
  1347. npixels = w*h;
  1348. for (y = 0; y < h; y++) {
  1349. unsigned char *rec_row;
  1350. unsigned char *inp_row;
  1351. rec_row = state->io_imgs[OD_FRAME_REC].planes[pli].data +
  1352. state->io_imgs[OD_FRAME_REC].planes[pli].ystride*y;
  1353. inp_row = data + ystride*y;
  1354. for (x = 0; x < w; x++) {
  1355. int inp_val;
  1356. int diff;
  1357. inp_val = inp_row[x];
  1358. diff = inp_val - rec_row[x];
  1359. enc_sqerr += diff*diff;
  1360. }
  1361. }
  1362. OD_LOG((OD_LOG_ENCODER, OD_LOG_DEBUG,
  1363. "Encoded Plane %i, Squared Error: %12lli Pixels: %6u PSNR: %5.4f",
  1364. pli, (long long)enc_sqerr, npixels,
  1365. 10*log10(255*255.0*npixels/enc_sqerr)));
  1366. }
  1367. }
  1368. #endif
  1369. int daala_encode_img_in(daala_enc_ctx *enc, od_img *img, int duration) {
  1370. int refi;
  1371. int nplanes;
  1372. int pli;
  1373. int frame_width;
  1374. int frame_height;
  1375. int pic_width;
  1376. int pic_height;
  1377. od_mb_enc_ctx mbctx;
  1378. #if defined(OD_ACCOUNTING)
  1379. od_acct_reset(&enc->acct);
  1380. #endif
  1381. #if defined(OD_EC_ACCOUNTING)
  1382. od_ec_acct_reset(&enc->ec.acct);
  1383. #endif
  1384. if (enc == NULL || img == NULL) return OD_EFAULT;
  1385. if (enc->packet_state == OD_PACKET_DONE) return OD_EINVAL;
  1386. /*Check the input image dimensions to make sure they're compatible with the
  1387. declared video size.*/
  1388. nplanes = enc->state.info.nplanes;
  1389. for (pli = 0; pli < OD_NPLANES_MAX; pli++) {
  1390. mbctx.tf[pli] = enc->state.tf[pli];
  1391. mbctx.modes[pli] = enc->state.modes[pli];
  1392. }
  1393. if (img->nplanes != nplanes) return OD_EINVAL;
  1394. for (pli = 0; pli < nplanes; pli++) {
  1395. if (img->planes[pli].xdec != enc->state.info.plane_info[pli].xdec
  1396. || img->planes[pli].ydec != enc->state.info.plane_info[pli].ydec) {
  1397. return OD_EINVAL;
  1398. }
  1399. }
  1400. frame_width = enc->state.frame_width;
  1401. frame_height = enc->state.frame_height;
  1402. pic_width = enc->state.info.pic_width;
  1403. pic_height = enc->state.info.pic_height;
  1404. if (img->width != frame_width || img->height != frame_height) {
  1405. /*The buffer does not match the frame size.
  1406. Check to see if it matches the picture size.*/
  1407. if (img->width != pic_width || img->height != pic_height) {
  1408. /*It doesn't; we don't know how to handle it yet.*/
  1409. return OD_EINVAL;
  1410. }
  1411. }
  1412. od_img_copy_pad(&enc->state, img);
  1413. #if defined(OD_DUMP_IMAGES)
  1414. if (od_logging_active(OD_LOG_GENERIC, OD_LOG_DEBUG)) {
  1415. od_img_dump_padded(&enc->state);
  1416. }
  1417. #endif
  1418. /* Check if the frame should be a keyframe. */
  1419. mbctx.is_keyframe = (enc->state.cur_time %
  1420. (enc->state.info.keyframe_rate) == 0) ? 1 : 0;
  1421. /*Update the buffer state.*/
  1422. if (enc->state.ref_imgi[OD_FRAME_SELF] >= 0) {
  1423. enc->state.ref_imgi[OD_FRAME_PREV] =
  1424. enc->state.ref_imgi[OD_FRAME_SELF];
  1425. /*TODO: Update golden frame.*/
  1426. if (enc->state.ref_imgi[OD_FRAME_GOLD] < 0) {
  1427. enc->state.ref_imgi[OD_FRAME_GOLD] =
  1428. enc->state.ref_imgi[OD_FRAME_SELF];
  1429. /*TODO: Mark keyframe timebase.*/
  1430. }
  1431. }
  1432. for (pli = 0; pli < nplanes; pli++) {
  1433. enc->quantizer[pli] = od_quantizer_from_quality(enc->quality[pli]);
  1434. /* At low rate, boost the keyframe quality by multiplying the quantizer
  1435. by 29/32 (~0.9). */
  1436. if (mbctx.is_keyframe && enc->quantizer[pli] > 20 << OD_COEFF_SHIFT) {
  1437. enc->quantizer[pli] = (16+29*enc->quantizer[pli]) >> 5;
  1438. }
  1439. }
  1440. /*Select a free buffer to use for this reference frame.*/
  1441. for (refi = 0; refi == enc->state.ref_imgi[OD_FRAME_GOLD]
  1442. || refi == enc->state.ref_imgi[OD_FRAME_PREV]
  1443. || refi == enc->state.ref_imgi[OD_FRAME_NEXT]; refi++);
  1444. enc->state.ref_imgi[OD_FRAME_SELF] = refi;
  1445. /*We must be a keyframe if we don't have a reference.*/
  1446. mbctx.is_keyframe |= !(enc->state.ref_imgi[OD_FRAME_PREV] >= 0);
  1447. /*Initialize the entropy coder.*/
  1448. od_ec_enc_reset(&enc->ec);
  1449. OD_ACCT_UPDATE(&enc->acct, od_ec_enc_tell_frac(&enc->ec),
  1450. OD_ACCT_CAT_TECHNIQUE, OD_ACCT_TECH_FRAME);
  1451. OD_ACCT_UPDATE(&enc->acct, od_ec_enc_tell_frac(&enc->ec),
  1452. OD_ACCT_CAT_PLANE, OD_ACCT_PLANE_FRAME);
  1453. /*Write a bit to mark this as a data packet.*/
  1454. od_ec_encode_bool_q15(&enc->ec, 0, 16384);
  1455. /*Code the keyframe bit.*/
  1456. od_ec_encode_bool_q15(&enc->ec, mbctx.is_keyframe, 16384);
  1457. OD_LOG((OD_LOG_ENCODER, OD_LOG_INFO, "is_keyframe=%d", mbctx.is_keyframe));
  1458. OD_ACCT_UPDATE(&enc->acct, od_ec_enc_tell_frac(&enc->ec),
  1459. OD_ACCT_CAT_TECHNIQUE, OD_ACCT_TECH_UNKNOWN);
  1460. OD_ACCT_UPDATE(&enc->acct, od_ec_enc_tell_frac(&enc->ec),
  1461. OD_ACCT_CAT_PLANE, OD_ACCT_TECH_UNKNOWN);
  1462. /*TODO: Increment frame count.*/
  1463. od_adapt_ctx_reset(&enc->state.adapt, mbctx.is_keyframe);
  1464. if (!mbctx.is_keyframe) {
  1465. od_predict_frame(enc);
  1466. od_split_superblocks(enc, 0);
  1467. od_encode_mvs(enc);
  1468. }
  1469. else {
  1470. od_split_superblocks(enc, 1);
  1471. }
  1472. od_encode_residual(enc, &mbctx);
  1473. #if defined(OD_DUMP_IMAGES) || defined(OD_DUMP_RECONS)
  1474. /*Dump YUV*/
  1475. od_state_dump_yuv(&enc->state, enc->state.io_imgs + OD_FRAME_REC, "out");
  1476. #endif
  1477. #if defined(OD_LOGGING_ENABLED)
  1478. od_dump_frame_metrics(&enc->state);
  1479. #endif
  1480. OD_LOG((OD_LOG_ENCODER, OD_LOG_INFO,
  1481. "mode bits: %f/%f=%f", mode_bits, mode_count, mode_bits/mode_count));
  1482. enc->packet_state = OD_PACKET_READY;
  1483. od_state_upsample8(&enc->state,
  1484. enc->state.ref_imgs + enc->state.ref_imgi[OD_FRAME_SELF],
  1485. enc->state.io_imgs + OD_FRAME_REC);
  1486. #if defined(OD_DUMP_IMAGES)
  1487. /*Dump reference frame.*/
  1488. /*od_state_dump_img(&enc->state,
  1489. enc->state.ref_img + enc->state.ref_imigi[OD_FRAME_SELF], "ref");*/
  1490. #endif
  1491. #if defined(OD_ACCOUNTING)
  1492. OD_ASSERT(enc->acct.last_frac_bits == od_ec_enc_tell_frac(&enc->ec));
  1493. od_acct_write(&enc->acct, enc->state.cur_time);
  1494. #endif
  1495. #if defined(OD_EC_ACCOUNTING)
  1496. od_ec_acct_write(&enc->ec.acct);
  1497. #endif
  1498. if (enc->state.info.frame_duration == 0) enc->state.cur_time += duration;
  1499. else enc->state.cur_time += enc->state.info.frame_duration;
  1500. return 0;
  1501. }
  1502. #if defined(OD_ENCODER_CHECK)
  1503. static void daala_encoder_check(daala_enc_ctx *ctx, od_img *img,
  1504. ogg_packet *op) {
  1505. int pli;
  1506. od_img dec_img;
  1507. OD_ASSERT(ctx->dec);
  1508. if (daala_decode_packet_in(ctx->dec, &dec_img, op) < 0) {
  1509. fprintf(stderr,"decode failed!\n");
  1510. return;
  1511. }
  1512. OD_ASSERT(img->nplanes == dec_img.nplanes);
  1513. for (pli = 0; pli < img->nplanes; pli++) {
  1514. int plane_width;
  1515. int plane_height;
  1516. int xdec;
  1517. int ydec;
  1518. int i;
  1519. OD_ASSERT(img->planes[pli].xdec == dec_img.planes[pli].xdec);
  1520. OD_ASSERT(img->planes[pli].ydec == dec_img.planes[pli].ydec);
  1521. OD_ASSERT(img->planes[pli].ystride == dec_img.planes[pli].ystride);
  1522. xdec = dec_img.planes[pli].xdec;
  1523. ydec = dec_img.planes[pli].ydec;
  1524. plane_width = ctx->dec->state.frame_width >> xdec;
  1525. plane_height = ctx->dec->state.frame_height >> ydec;
  1526. for (i = 0; i < plane_height; i++) {
  1527. if (memcmp(img->planes[pli].data + img->planes[pli].ystride * i,
  1528. dec_img.planes[pli].data + dec_img.planes[pli].ystride * i,
  1529. plane_width)) {
  1530. fprintf(stderr,"pixel mismatch in row %d\n", i);
  1531. }
  1532. }
  1533. }
  1534. }
  1535. #endif
  1536. int daala_encode_packet_out(daala_enc_ctx *enc, int last, ogg_packet *op) {
  1537. ogg_uint32_t nbytes;
  1538. if (enc == NULL || op == NULL) return OD_EFAULT;
  1539. else if (enc->packet_state <= 0 || enc->packet_state == OD_PACKET_DONE) {
  1540. return 0;
  1541. }
  1542. op->packet = od_ec_enc_done(&enc->ec, &nbytes);
  1543. op->bytes = nbytes;
  1544. OD_LOG((OD_LOG_ENCODER, OD_LOG_INFO, "Output Bytes: %ld", op->bytes));
  1545. op->b_o_s = 0;
  1546. op->e_o_s = last;
  1547. op->packetno = 0;
  1548. op->granulepos = enc->state.cur_time;
  1549. if (last) enc->packet_state = OD_PACKET_DONE;
  1550. else enc->packet_state = OD_PACKET_EMPTY;
  1551. #if defined(OD_ENCODER_CHECK)
  1552. /*Compare reconstructed frame against decoded frame.*/
  1553. daala_encoder_check(enc, enc->state.io_imgs + OD_FRAME_REC, op);
  1554. #endif
  1555. return 1;
  1556. }