rate.c 45 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138
  1. /********************************************************************
  2. * *
  3. * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
  4. * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
  5. * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
  6. * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
  7. * *
  8. * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2009 *
  9. * by the Xiph.Org Foundation http://www.xiph.org/ *
  10. * *
  11. ********************************************************************
  12. function:
  13. last mod: $Id: rate.c 16503 2009-08-22 18:14:02Z giles $
  14. ********************************************************************/
  15. #include <stdlib.h>
  16. #include <string.h>
  17. #include "encint.h"
  18. /*A rough lookup table for tan(x), 0<=x<pi/2.
  19. The values are Q12 fixed-point and spaced at 5 degree intervals.
  20. These decisions are somewhat arbitrary, but sufficient for the 2nd order
  21. Bessel follower below.
  22. Values of x larger than 85 degrees are extrapolated from the last inteval,
  23. which is way off, but "good enough".*/
  24. static unsigned short OC_ROUGH_TAN_LOOKUP[18]={
  25. 0, 358, 722, 1098, 1491, 1910,
  26. 2365, 2868, 3437, 4096, 4881, 5850,
  27. 7094, 8784,11254,15286,23230,46817
  28. };
  29. /*_alpha is Q24 in the range [0,0.5).
  30. The return values is 5.12.*/
  31. static int oc_warp_alpha(int _alpha){
  32. int i;
  33. int d;
  34. int t0;
  35. int t1;
  36. i=_alpha*36>>24;
  37. if(i>=17)i=16;
  38. t0=OC_ROUGH_TAN_LOOKUP[i];
  39. t1=OC_ROUGH_TAN_LOOKUP[i+1];
  40. d=_alpha*36-(i<<24);
  41. return (int)(((ogg_int64_t)t0<<32)+(t1-t0<<8)*(ogg_int64_t)d>>32);
  42. }
  43. /*Re-initialize the Bessel filter coefficients with the specified delay.
  44. This does not alter the x/y state, but changes the reaction time of the
  45. filter.
  46. Altering the time constant of a reactive filter without alterning internal
  47. state is something that has to be done carefuly, but our design operates at
  48. high enough delays and with small enough time constant changes to make it
  49. safe.*/
  50. static void oc_iir_filter_reinit(oc_iir_filter *_f,int _delay){
  51. int alpha;
  52. ogg_int64_t one48;
  53. ogg_int64_t warp;
  54. ogg_int64_t k1;
  55. ogg_int64_t k2;
  56. ogg_int64_t d;
  57. ogg_int64_t a;
  58. ogg_int64_t ik2;
  59. ogg_int64_t b1;
  60. ogg_int64_t b2;
  61. /*This borrows some code from an unreleased version of Postfish.
  62. See the recipe at http://unicorn.us.com/alex/2polefilters.html for details
  63. on deriving the filter coefficients.*/
  64. /*alpha is Q24*/
  65. alpha=(1<<24)/_delay;
  66. one48=(ogg_int64_t)1<<48;
  67. /*warp is 7.12*/
  68. warp=OC_MAXI(oc_warp_alpha(alpha),1);
  69. /*k1 is 9.12*/
  70. k1=3*warp;
  71. /*k2 is 16.24.*/
  72. k2=k1*warp;
  73. /*d is 16.15.*/
  74. d=((1<<12)+k1<<12)+k2+256>>9;
  75. /*a is 0.32, since d is larger than both 1.0 and k2.*/
  76. a=(k2<<23)/d;
  77. /*ik2 is 25.24.*/
  78. ik2=one48/k2;
  79. /*b1 is Q56; in practice, the integer ranges between -2 and 2.*/
  80. b1=2*a*(ik2-(1<<24));
  81. /*b2 is Q56; in practice, the integer ranges between -2 and 2.*/
  82. b2=(one48<<8)-(4*a<<24)-b1;
  83. /*All of the filter parameters are Q24.*/
  84. _f->c[0]=(ogg_int32_t)(b1+((ogg_int64_t)1<<31)>>32);
  85. _f->c[1]=(ogg_int32_t)(b2+((ogg_int64_t)1<<31)>>32);
  86. _f->g=(ogg_int32_t)(a+128>>8);
  87. }
  88. /*Initialize a 2nd order low-pass Bessel filter with the corresponding delay
  89. and initial value.
  90. _value is Q24.*/
  91. static void oc_iir_filter_init(oc_iir_filter *_f,int _delay,ogg_int32_t _value){
  92. oc_iir_filter_reinit(_f,_delay);
  93. _f->y[1]=_f->y[0]=_f->x[1]=_f->x[0]=_value;
  94. }
  95. static ogg_int64_t oc_iir_filter_update(oc_iir_filter *_f,ogg_int32_t _x){
  96. ogg_int64_t c0;
  97. ogg_int64_t c1;
  98. ogg_int64_t g;
  99. ogg_int64_t x0;
  100. ogg_int64_t x1;
  101. ogg_int64_t y0;
  102. ogg_int64_t y1;
  103. ogg_int64_t ya;
  104. c0=_f->c[0];
  105. c1=_f->c[1];
  106. g=_f->g;
  107. x0=_f->x[0];
  108. x1=_f->x[1];
  109. y0=_f->y[0];
  110. y1=_f->y[1];
  111. ya=(_x+x0*2+x1)*g+y0*c0+y1*c1+(1<<23)>>24;
  112. _f->x[1]=(ogg_int32_t)x0;
  113. _f->x[0]=_x;
  114. _f->y[1]=(ogg_int32_t)y0;
  115. _f->y[0]=(ogg_int32_t)ya;
  116. return ya;
  117. }
  118. /*Search for the quantizer that matches the target most closely.
  119. We don't assume a linear ordering, but when there are ties we pick the
  120. quantizer closest to the old one.*/
  121. static int oc_enc_find_qi_for_target(oc_enc_ctx *_enc,int _qti,int _qi_old,
  122. int _qi_min,ogg_int64_t _log_qtarget){
  123. ogg_int64_t best_qdiff;
  124. int best_qi;
  125. int qi;
  126. best_qi=_qi_min;
  127. best_qdiff=_enc->log_qavg[_qti][best_qi]-_log_qtarget;
  128. best_qdiff=best_qdiff+OC_SIGNMASK(best_qdiff)^OC_SIGNMASK(best_qdiff);
  129. for(qi=_qi_min+1;qi<64;qi++){
  130. ogg_int64_t qdiff;
  131. qdiff=_enc->log_qavg[_qti][qi]-_log_qtarget;
  132. qdiff=qdiff+OC_SIGNMASK(qdiff)^OC_SIGNMASK(qdiff);
  133. if(qdiff<best_qdiff||
  134. qdiff==best_qdiff&&abs(qi-_qi_old)<abs(best_qi-_qi_old)){
  135. best_qi=qi;
  136. best_qdiff=qdiff;
  137. }
  138. }
  139. return best_qi;
  140. }
  141. void oc_enc_calc_lambda(oc_enc_ctx *_enc,int _qti){
  142. ogg_int64_t lq;
  143. int qi;
  144. int qi1;
  145. int nqis;
  146. /*For now, lambda is fixed depending on the qi value and frame type:
  147. lambda=qscale*(qavg[qti][qi]**2),
  148. where qscale=0.2125.
  149. This was derived by exhaustively searching for the optimal quantizer for
  150. the AC coefficients in each block from a number of test sequences for a
  151. number of fixed lambda values and fitting the peaks of the resulting
  152. histograms (on the log(qavg) scale).
  153. The same model applies to both inter and intra frames.
  154. A more adaptive scheme might perform better.*/
  155. qi=_enc->state.qis[0];
  156. /*If rate control is active, use the lambda for the _target_ quantizer.
  157. This allows us to scale to rates slightly lower than we'd normally be able
  158. to reach, and give the rate control a semblance of "fractional qi"
  159. precision.
  160. TODO: Add API for changing QI, and allow extra precision.*/
  161. if(_enc->state.info.target_bitrate>0)lq=_enc->rc.log_qtarget;
  162. else lq=_enc->log_qavg[_qti][qi];
  163. /*The resulting lambda value is less than 0x500000.*/
  164. _enc->lambda=(int)oc_bexp64(2*lq-0x4780BD468D6B62BLL);
  165. /*Select additional quantizers.
  166. The R-D optimal block AC quantizer statistics suggest that the distribution
  167. is roughly Gaussian-like with a slight positive skew.
  168. K-means clustering on log_qavg to select 3 quantizers produces cluster
  169. centers of {log_qavg-0.6,log_qavg,log_qavg+0.7}.
  170. Experiments confirm these are relatively good choices.
  171. Although we do greedy R-D optimization of the qii flags to avoid switching
  172. too frequently, this becomes ineffective at low rates, either because we
  173. do a poor job of predicting the actual R-D cost, or the greedy
  174. optimization is not sufficient.
  175. Therefore adaptive quantization is disabled above an (experimentally
  176. suggested) threshold of log_qavg=7.00 (e.g., below INTRA qi=12 or
  177. INTER qi=20 with current matrices).
  178. This may need to be revised if the R-D cost estimation or qii flag
  179. optimization strategies change.*/
  180. nqis=1;
  181. if(lq<(OC_Q57(56)>>3)&&!_enc->vp3_compatible){
  182. qi1=oc_enc_find_qi_for_target(_enc,_qti,OC_MAXI(qi-1,0),0,
  183. lq+(OC_Q57(7)+5)/10);
  184. if(qi1!=qi)_enc->state.qis[nqis++]=qi1;
  185. qi1=oc_enc_find_qi_for_target(_enc,_qti,OC_MINI(qi+1,63),0,
  186. lq-(OC_Q57(6)+5)/10);
  187. if(qi1!=qi&&qi1!=_enc->state.qis[nqis-1])_enc->state.qis[nqis++]=qi1;
  188. }
  189. _enc->state.nqis=nqis;
  190. }
  191. /*Binary exponential of _log_scale with 24-bit fractional precision and
  192. saturation.
  193. _log_scale: A binary logarithm in Q24 format.
  194. Return: The binary exponential in Q24 format, saturated to 2**47-1 if
  195. _log_scale was too large.*/
  196. static ogg_int64_t oc_bexp_q24(ogg_int32_t _log_scale){
  197. if(_log_scale<(ogg_int32_t)23<<24){
  198. ogg_int64_t ret;
  199. ret=oc_bexp64(((ogg_int64_t)_log_scale<<33)+OC_Q57(24));
  200. return ret<0x7FFFFFFFFFFFLL?ret:0x7FFFFFFFFFFFLL;
  201. }
  202. return 0x7FFFFFFFFFFFLL;
  203. }
  204. /*Convenience function converts Q57 value to a clamped 32-bit Q24 value
  205. _in: input in Q57 format.
  206. Return: same number in Q24 */
  207. static ogg_int32_t oc_q57_to_q24(ogg_int64_t _in){
  208. ogg_int64_t ret;
  209. ret=_in+((ogg_int64_t)1<<32)>>33;
  210. /*0x80000000 is automatically converted to unsigned on 32-bit systems.
  211. -0x7FFFFFFF-1 is needed to avoid "promoting" the whole expression to
  212. unsigned.*/
  213. return (ogg_int32_t)OC_CLAMPI(-0x7FFFFFFF-1,ret,0x7FFFFFFF);
  214. }
  215. /*Binary exponential of _log_scale with 24-bit fractional precision and
  216. saturation.
  217. _log_scale: A binary logarithm in Q57 format.
  218. Return: The binary exponential in Q24 format, saturated to 2**31-1 if
  219. _log_scale was too large.*/
  220. static ogg_int32_t oc_bexp64_q24(ogg_int64_t _log_scale){
  221. if(_log_scale<OC_Q57(8)){
  222. ogg_int64_t ret;
  223. ret=oc_bexp64(_log_scale+OC_Q57(24));
  224. return ret<0x7FFFFFFF?(ogg_int32_t)ret:0x7FFFFFFF;
  225. }
  226. return 0x7FFFFFFF;
  227. }
  228. static void oc_enc_rc_reset(oc_enc_ctx *_enc){
  229. ogg_int64_t npixels;
  230. ogg_int64_t ibpp;
  231. int inter_delay;
  232. /*TODO: These parameters should be exposed in a th_encode_ctl() API.*/
  233. _enc->rc.bits_per_frame=(_enc->state.info.target_bitrate*
  234. (ogg_int64_t)_enc->state.info.fps_denominator)/
  235. _enc->state.info.fps_numerator;
  236. /*Insane framerates or frame sizes mean insane bitrates.
  237. Let's not get carried away.*/
  238. if(_enc->rc.bits_per_frame>0x400000000000LL){
  239. _enc->rc.bits_per_frame=(ogg_int64_t)0x400000000000LL;
  240. }
  241. else if(_enc->rc.bits_per_frame<32)_enc->rc.bits_per_frame=32;
  242. _enc->rc.buf_delay=OC_MAXI(_enc->rc.buf_delay,12);
  243. _enc->rc.max=_enc->rc.bits_per_frame*_enc->rc.buf_delay;
  244. /*Start with a buffer fullness of 50% plus 25% of the amount we plan to spend
  245. on a single keyframe interval.
  246. We can require fully half the bits in an interval for a keyframe, so this
  247. initial level gives us maximum flexibility for over/under-shooting in
  248. subsequent frames.*/
  249. _enc->rc.target=(_enc->rc.max+1>>1)+(_enc->rc.bits_per_frame+2>>2)*
  250. OC_MINI(_enc->keyframe_frequency_force,_enc->rc.buf_delay);
  251. _enc->rc.fullness=_enc->rc.target;
  252. /*Pick exponents and initial scales for quantizer selection.*/
  253. npixels=_enc->state.info.frame_width*
  254. (ogg_int64_t)_enc->state.info.frame_height;
  255. _enc->rc.log_npixels=oc_blog64(npixels);
  256. ibpp=npixels/_enc->rc.bits_per_frame;
  257. if(ibpp<1){
  258. _enc->rc.exp[0]=59;
  259. _enc->rc.log_scale[0]=oc_blog64(1997)-OC_Q57(8);
  260. }
  261. else if(ibpp<2){
  262. _enc->rc.exp[0]=55;
  263. _enc->rc.log_scale[0]=oc_blog64(1604)-OC_Q57(8);
  264. }
  265. else{
  266. _enc->rc.exp[0]=48;
  267. _enc->rc.log_scale[0]=oc_blog64(834)-OC_Q57(8);
  268. }
  269. if(ibpp<4){
  270. _enc->rc.exp[1]=100;
  271. _enc->rc.log_scale[1]=oc_blog64(2249)-OC_Q57(8);
  272. }
  273. else if(ibpp<8){
  274. _enc->rc.exp[1]=95;
  275. _enc->rc.log_scale[1]=oc_blog64(1751)-OC_Q57(8);
  276. }
  277. else{
  278. _enc->rc.exp[1]=73;
  279. _enc->rc.log_scale[1]=oc_blog64(1260)-OC_Q57(8);
  280. }
  281. _enc->rc.prev_drop_count=0;
  282. _enc->rc.log_drop_scale=OC_Q57(0);
  283. /*Set up second order followers, initialized according to corresponding
  284. time constants.*/
  285. oc_iir_filter_init(&_enc->rc.scalefilter[0],4,
  286. oc_q57_to_q24(_enc->rc.log_scale[0]));
  287. inter_delay=(_enc->rc.twopass?
  288. OC_MAXI(_enc->keyframe_frequency_force,12):_enc->rc.buf_delay)>>1;
  289. _enc->rc.inter_count=0;
  290. /*We clamp the actual inter_delay to a minimum of 10 to work within the range
  291. of values where later incrementing the delay works as designed.
  292. 10 is not an exact choice, but rather a good working trade-off.*/
  293. _enc->rc.inter_delay=10;
  294. _enc->rc.inter_delay_target=inter_delay;
  295. oc_iir_filter_init(&_enc->rc.scalefilter[1],_enc->rc.inter_delay,
  296. oc_q57_to_q24(_enc->rc.log_scale[1]));
  297. oc_iir_filter_init(&_enc->rc.vfrfilter,4,
  298. oc_bexp64_q24(_enc->rc.log_drop_scale));
  299. }
  300. void oc_rc_state_init(oc_rc_state *_rc,oc_enc_ctx *_enc){
  301. _rc->twopass=0;
  302. _rc->twopass_buffer_bytes=0;
  303. _rc->twopass_force_kf=0;
  304. _rc->frame_metrics=NULL;
  305. _rc->rate_bias=0;
  306. if(_enc->state.info.target_bitrate>0){
  307. /*The buffer size is set equal to the keyframe interval, clamped to the
  308. range [12,256] frames.
  309. The 12 frame minimum gives us some chance to distribute bit estimation
  310. errors.
  311. The 256 frame maximum means we'll require 8-10 seconds of pre-buffering
  312. at 24-30 fps, which is not unreasonable.*/
  313. _rc->buf_delay=_enc->keyframe_frequency_force>256?
  314. 256:_enc->keyframe_frequency_force;
  315. /*By default, enforce all buffer constraints.*/
  316. _rc->drop_frames=1;
  317. _rc->cap_overflow=1;
  318. _rc->cap_underflow=0;
  319. oc_enc_rc_reset(_enc);
  320. }
  321. }
  322. void oc_rc_state_clear(oc_rc_state *_rc){
  323. _ogg_free(_rc->frame_metrics);
  324. }
  325. void oc_enc_rc_resize(oc_enc_ctx *_enc){
  326. /*If encoding has not yet begun, reset the buffer state.*/
  327. if(_enc->state.curframe_num<0)oc_enc_rc_reset(_enc);
  328. else{
  329. int idt;
  330. /*Otherwise, update the bounds on the buffer, but not the current
  331. fullness.*/
  332. _enc->rc.bits_per_frame=(_enc->state.info.target_bitrate*
  333. (ogg_int64_t)_enc->state.info.fps_denominator)/
  334. _enc->state.info.fps_numerator;
  335. /*Insane framerates or frame sizes mean insane bitrates.
  336. Let's not get carried away.*/
  337. if(_enc->rc.bits_per_frame>0x400000000000LL){
  338. _enc->rc.bits_per_frame=(ogg_int64_t)0x400000000000LL;
  339. }
  340. else if(_enc->rc.bits_per_frame<32)_enc->rc.bits_per_frame=32;
  341. _enc->rc.buf_delay=OC_MAXI(_enc->rc.buf_delay,12);
  342. _enc->rc.max=_enc->rc.bits_per_frame*_enc->rc.buf_delay;
  343. _enc->rc.target=(_enc->rc.max+1>>1)+(_enc->rc.bits_per_frame+2>>2)*
  344. OC_MINI(_enc->keyframe_frequency_force,_enc->rc.buf_delay);
  345. /*Update the INTER-frame scale filter delay.
  346. We jump to it immediately if we've already seen enough frames; otherwise
  347. it is simply set as the new target.*/
  348. _enc->rc.inter_delay_target=idt=OC_MAXI(_enc->rc.buf_delay>>1,10);
  349. if(idt<OC_MINI(_enc->rc.inter_delay,_enc->rc.inter_count)){
  350. oc_iir_filter_init(&_enc->rc.scalefilter[1],idt,
  351. _enc->rc.scalefilter[1].y[0]);
  352. _enc->rc.inter_delay=idt;
  353. }
  354. }
  355. /*If we're in pass-2 mode, make sure the frame metrics array is big enough
  356. to hold frame statistics for the full buffer.*/
  357. if(_enc->rc.twopass==2){
  358. int cfm;
  359. int buf_delay;
  360. int reset_window;
  361. buf_delay=_enc->rc.buf_delay;
  362. reset_window=_enc->rc.frame_metrics==NULL&&(_enc->rc.frames_total[0]==0||
  363. buf_delay<_enc->rc.frames_total[0]+_enc->rc.frames_total[1]
  364. +_enc->rc.frames_total[2]);
  365. cfm=_enc->rc.cframe_metrics;
  366. /*Only try to resize the frame metrics buffer if a) it's too small and
  367. b) we were using a finite buffer, or are about to start.*/
  368. if(cfm<buf_delay&&(_enc->rc.frame_metrics!=NULL||reset_window)){
  369. oc_frame_metrics *fm;
  370. int nfm;
  371. int fmh;
  372. fm=(oc_frame_metrics *)_ogg_realloc(_enc->rc.frame_metrics,
  373. buf_delay*sizeof(*_enc->rc.frame_metrics));
  374. if(fm==NULL){
  375. /*We failed to allocate a finite buffer.*/
  376. /*If we don't have a valid 2-pass header yet, just return; we'll reset
  377. the buffer size when we read the header.*/
  378. if(_enc->rc.frames_total[0]==0)return;
  379. /*Otherwise revert to the largest finite buffer previously set, or to
  380. whole-file buffering if we were still using that.*/
  381. _enc->rc.buf_delay=_enc->rc.frame_metrics!=NULL?
  382. cfm:_enc->rc.frames_total[0]+_enc->rc.frames_total[1]
  383. +_enc->rc.frames_total[2];
  384. oc_enc_rc_resize(_enc);
  385. return;
  386. }
  387. _enc->rc.frame_metrics=fm;
  388. _enc->rc.cframe_metrics=buf_delay;
  389. /*Re-organize the circular buffer.*/
  390. fmh=_enc->rc.frame_metrics_head;
  391. nfm=_enc->rc.nframe_metrics;
  392. if(fmh+nfm>cfm){
  393. int shift;
  394. shift=OC_MINI(fmh+nfm-cfm,buf_delay-cfm);
  395. memcpy(fm+cfm,fm,OC_MINI(fmh+nfm-cfm,buf_delay-cfm)*sizeof(*fm));
  396. if(fmh+nfm>buf_delay)memmove(fm,fm+shift,fmh+nfm-buf_delay);
  397. }
  398. }
  399. /*We were using whole-file buffering; now we're not.*/
  400. if(reset_window){
  401. _enc->rc.nframes[0]=_enc->rc.nframes[1]=_enc->rc.nframes[2]=0;
  402. _enc->rc.scale_sum[0]=_enc->rc.scale_sum[1]=0;
  403. _enc->rc.scale_window_end=_enc->rc.scale_window0=
  404. _enc->state.curframe_num+_enc->prev_dup_count+1;
  405. if(_enc->rc.twopass_buffer_bytes){
  406. int qti;
  407. /*We already read the metrics for the first frame in the window.*/
  408. *(_enc->rc.frame_metrics)=*&_enc->rc.cur_metrics;
  409. _enc->rc.nframe_metrics++;
  410. qti=_enc->rc.cur_metrics.frame_type;
  411. _enc->rc.nframes[qti]++;
  412. _enc->rc.nframes[2]+=_enc->rc.cur_metrics.dup_count;
  413. _enc->rc.scale_sum[qti]+=oc_bexp_q24(_enc->rc.cur_metrics.log_scale);
  414. _enc->rc.scale_window_end+=_enc->rc.cur_metrics.dup_count+1;
  415. if(_enc->rc.scale_window_end-_enc->rc.scale_window0<buf_delay){
  416. /*We need more frame data.*/
  417. _enc->rc.twopass_buffer_bytes=0;
  418. }
  419. }
  420. }
  421. /*Otherwise, we could shrink the size of the current window, if necessary,
  422. but leaving it like it is lets us adapt to the new buffer size more
  423. gracefully.*/
  424. }
  425. }
  426. /*Scale the number of frames by the number of expected drops/duplicates.*/
  427. static int oc_rc_scale_drop(oc_rc_state *_rc,int _nframes){
  428. if(_rc->prev_drop_count>0||_rc->log_drop_scale>OC_Q57(0)){
  429. ogg_int64_t dup_scale;
  430. dup_scale=oc_bexp64((_rc->log_drop_scale
  431. +oc_blog64(_rc->prev_drop_count+1)>>1)+OC_Q57(8));
  432. if(dup_scale<_nframes<<8){
  433. int dup_scalei;
  434. dup_scalei=(int)dup_scale;
  435. if(dup_scalei>0)_nframes=((_nframes<<8)+dup_scalei-1)/dup_scalei;
  436. }
  437. else _nframes=!!_nframes;
  438. }
  439. return _nframes;
  440. }
  441. int oc_enc_select_qi(oc_enc_ctx *_enc,int _qti,int _clamp){
  442. ogg_int64_t rate_total;
  443. ogg_int64_t rate_bias;
  444. int nframes[2];
  445. int buf_delay;
  446. int buf_pad;
  447. ogg_int64_t log_qtarget;
  448. ogg_int64_t log_scale0;
  449. ogg_int64_t log_cur_scale;
  450. ogg_int64_t log_qexp;
  451. int exp0;
  452. int old_qi;
  453. int qi;
  454. /*Figure out how to re-distribute bits so that we hit our fullness target
  455. before the last keyframe in our current buffer window (after the current
  456. frame), or the end of the buffer window, whichever comes first.*/
  457. log_cur_scale=(ogg_int64_t)_enc->rc.scalefilter[_qti].y[0]<<33;
  458. buf_pad=0;
  459. switch(_enc->rc.twopass){
  460. default:{
  461. ogg_uint32_t next_key_frame;
  462. /*Single pass mode: assume only forced keyframes and attempt to estimate
  463. the drop count for VFR content.*/
  464. next_key_frame=_qti?_enc->keyframe_frequency_force
  465. -(_enc->state.curframe_num-_enc->state.keyframe_num):0;
  466. nframes[0]=(_enc->rc.buf_delay-OC_MINI(next_key_frame,_enc->rc.buf_delay)
  467. +_enc->keyframe_frequency_force-1)/_enc->keyframe_frequency_force;
  468. if(nframes[0]+_qti>1){
  469. nframes[0]--;
  470. buf_delay=next_key_frame+nframes[0]*_enc->keyframe_frequency_force;
  471. }
  472. else buf_delay=_enc->rc.buf_delay;
  473. nframes[1]=buf_delay-nframes[0];
  474. /*Downgrade the delta frame rate to correspond to the recent drop count
  475. history.*/
  476. nframes[1]=oc_rc_scale_drop(&_enc->rc,nframes[1]);
  477. }break;
  478. case 1:{
  479. /*Pass 1 mode: use a fixed qi value.*/
  480. qi=_enc->state.qis[0];
  481. _enc->rc.log_qtarget=_enc->log_qavg[_qti][qi];
  482. return qi;
  483. }break;
  484. case 2:{
  485. ogg_int64_t scale_sum[2];
  486. int qti;
  487. /*Pass 2 mode: we know exactly how much of each frame type there is in
  488. the current buffer window, and have estimates for the scales.*/
  489. nframes[0]=_enc->rc.nframes[0];
  490. nframes[1]=_enc->rc.nframes[1];
  491. scale_sum[0]=_enc->rc.scale_sum[0];
  492. scale_sum[1]=_enc->rc.scale_sum[1];
  493. /*The window size can be slightly larger than the buffer window for VFR
  494. content; clamp it down, if appropriate (the excess will all be dup
  495. frames).*/
  496. buf_delay=OC_MINI(_enc->rc.scale_window_end-_enc->rc.scale_window0,
  497. _enc->rc.buf_delay);
  498. /*If we're approaching the end of the file, add some slack to keep us
  499. from slamming into a rail.
  500. Our rate accuracy goes down, but it keeps the result sensible.
  501. We position the target where the first forced keyframe beyond the end
  502. of the file would be (for consistency with 1-pass mode).*/
  503. buf_pad=OC_MINI(_enc->rc.buf_delay,_enc->state.keyframe_num
  504. +_enc->keyframe_frequency_force-_enc->rc.scale_window0);
  505. if(buf_delay<buf_pad)buf_pad-=buf_delay;
  506. else{
  507. /*Otherwise, search for the last keyframe in the buffer window and
  508. target that.*/
  509. buf_pad=0;
  510. /*TODO: Currently we only do this when using a finite buffer; we could
  511. save the position of the last keyframe in the summary data and do it
  512. with a whole-file buffer as well, but it isn't likely to make a
  513. difference.*/
  514. if(_enc->rc.frame_metrics!=NULL){
  515. int fmi;
  516. int fm_tail;
  517. fm_tail=_enc->rc.frame_metrics_head+_enc->rc.nframe_metrics;
  518. if(fm_tail>=_enc->rc.cframe_metrics)fm_tail-=_enc->rc.cframe_metrics;
  519. for(fmi=fm_tail;;){
  520. oc_frame_metrics *m;
  521. fmi--;
  522. if(fmi<0)fmi+=_enc->rc.cframe_metrics;
  523. /*Stop before we remove the first frame.*/
  524. if(fmi==_enc->rc.frame_metrics_head)break;
  525. m=_enc->rc.frame_metrics+fmi;
  526. /*If we find a keyframe, remove it and everything past it.*/
  527. if(m->frame_type==OC_INTRA_FRAME){
  528. do{
  529. qti=m->frame_type;
  530. nframes[qti]--;
  531. scale_sum[qti]-=oc_bexp_q24(m->log_scale);
  532. buf_delay-=m->dup_count+1;
  533. fmi++;
  534. if(fmi>=_enc->rc.cframe_metrics)fmi=0;
  535. m=_enc->rc.frame_metrics+fmi;
  536. }
  537. while(fmi!=fm_tail);
  538. /*And stop scanning backwards.*/
  539. break;
  540. }
  541. }
  542. }
  543. }
  544. /*If we're not using the same frame type as in pass 1 (because someone
  545. changed the keyframe interval), remove that scale estimate.
  546. We'll add in a replacement for the correct frame type below.*/
  547. qti=_enc->rc.cur_metrics.frame_type;
  548. if(qti!=_qti){
  549. nframes[qti]--;
  550. scale_sum[qti]-=oc_bexp_q24(_enc->rc.cur_metrics.log_scale);
  551. }
  552. /*Compute log_scale estimates for each frame type from the pass-1 scales
  553. we measured in the current window.*/
  554. for(qti=0;qti<2;qti++){
  555. _enc->rc.log_scale[qti]=nframes[qti]>0?
  556. oc_blog64(scale_sum[qti])-oc_blog64(nframes[qti])-OC_Q57(24):
  557. -_enc->rc.log_npixels;
  558. }
  559. /*If we're not using the same frame type as in pass 1, add a scale
  560. estimate for the corresponding frame using the current low-pass
  561. filter value.
  562. This is mostly to ensure we have a valid estimate even when pass 1 had
  563. no frames of this type in the buffer window.
  564. TODO: We could also plan ahead and figure out how many keyframes we'll
  565. be forced to add in the current buffer window.*/
  566. qti=_enc->rc.cur_metrics.frame_type;
  567. if(qti!=_qti){
  568. ogg_int64_t scale;
  569. scale=_enc->rc.log_scale[_qti]<OC_Q57(23)?
  570. oc_bexp64(_enc->rc.log_scale[_qti]+OC_Q57(24)):0x7FFFFFFFFFFFLL;
  571. scale*=nframes[_qti];
  572. nframes[_qti]++;
  573. scale+=oc_bexp_q24(log_cur_scale>>33);
  574. _enc->rc.log_scale[_qti]=oc_blog64(scale)
  575. -oc_blog64(nframes[qti])-OC_Q57(24);
  576. }
  577. else log_cur_scale=(ogg_int64_t)_enc->rc.cur_metrics.log_scale<<33;
  578. /*Add the padding from above.
  579. This basically reverts to 1-pass estimations in the last keyframe
  580. interval.*/
  581. if(buf_pad>0){
  582. ogg_int64_t scale;
  583. int nextra_frames;
  584. /*Extend the buffer.*/
  585. buf_delay+=buf_pad;
  586. /*Add virtual delta frames according to the estimated drop count.*/
  587. nextra_frames=oc_rc_scale_drop(&_enc->rc,buf_pad);
  588. /*And blend in the low-pass filtered scale according to how many frames
  589. we added.*/
  590. scale=
  591. oc_bexp64(_enc->rc.log_scale[1]+OC_Q57(24))*(ogg_int64_t)nframes[1]
  592. +oc_bexp_q24(_enc->rc.scalefilter[1].y[0])*(ogg_int64_t)nextra_frames;
  593. nframes[1]+=nextra_frames;
  594. _enc->rc.log_scale[1]=oc_blog64(scale)-oc_blog64(nframes[1])-OC_Q57(24);
  595. }
  596. }break;
  597. }
  598. /*If we've been missing our target, add a penalty term.*/
  599. rate_bias=(_enc->rc.rate_bias/(_enc->state.curframe_num+1000))*
  600. (buf_delay-buf_pad);
  601. /*rate_total is the total bits available over the next buf_delay frames.*/
  602. rate_total=_enc->rc.fullness-_enc->rc.target+rate_bias
  603. +buf_delay*_enc->rc.bits_per_frame;
  604. log_scale0=_enc->rc.log_scale[_qti]+_enc->rc.log_npixels;
  605. /*If there aren't enough bits to achieve our desired fullness level, use the
  606. minimum quality permitted.*/
  607. if(rate_total<=buf_delay)log_qtarget=OC_QUANT_MAX_LOG;
  608. else{
  609. static const ogg_int64_t LOG_KEY_RATIO=0x0137222BB70747BALL;
  610. ogg_int64_t log_scale1;
  611. ogg_int64_t rlo;
  612. ogg_int64_t rhi;
  613. log_scale1=_enc->rc.log_scale[1-_qti]+_enc->rc.log_npixels;
  614. rlo=0;
  615. rhi=(rate_total+nframes[_qti]-1)/nframes[_qti];
  616. while(rlo<rhi){
  617. ogg_int64_t curr;
  618. ogg_int64_t rdiff;
  619. ogg_int64_t log_rpow;
  620. ogg_int64_t rscale;
  621. curr=rlo+rhi>>1;
  622. log_rpow=oc_blog64(curr)-log_scale0;
  623. log_rpow=(log_rpow+(_enc->rc.exp[_qti]>>1))/_enc->rc.exp[_qti];
  624. if(_qti)log_rpow+=LOG_KEY_RATIO>>6;
  625. else log_rpow-=LOG_KEY_RATIO>>6;
  626. log_rpow*=_enc->rc.exp[1-_qti];
  627. rscale=nframes[1-_qti]*oc_bexp64(log_scale1+log_rpow);
  628. rdiff=nframes[_qti]*curr+rscale-rate_total;
  629. if(rdiff<0)rlo=curr+1;
  630. else if(rdiff>0)rhi=curr-1;
  631. else break;
  632. }
  633. log_qtarget=OC_Q57(2)-((oc_blog64(rlo)-log_scale0+(_enc->rc.exp[_qti]>>1))/
  634. _enc->rc.exp[_qti]<<6);
  635. log_qtarget=OC_MINI(log_qtarget,OC_QUANT_MAX_LOG);
  636. }
  637. /*The above allocation looks only at the total rate we'll accumulate in the
  638. next buf_delay frames.
  639. However, we could overflow the buffer on the very next frame, so check for
  640. that here, if we're not using a soft target.*/
  641. exp0=_enc->rc.exp[_qti];
  642. if(_enc->rc.cap_overflow){
  643. ogg_int64_t margin;
  644. ogg_int64_t soft_limit;
  645. ogg_int64_t log_soft_limit;
  646. /*Allow 3% of the buffer for prediction error.
  647. This should be plenty, and we don't mind if we go a bit over; we only
  648. want to keep these bits from being completely wasted.*/
  649. margin=_enc->rc.max+31>>5;
  650. /*We want to use at least this many bits next frame.*/
  651. soft_limit=_enc->rc.fullness+_enc->rc.bits_per_frame-(_enc->rc.max-margin);
  652. log_soft_limit=oc_blog64(soft_limit);
  653. /*If we're predicting we won't use that many...*/
  654. log_qexp=(log_qtarget-OC_Q57(2)>>6)*exp0;
  655. if(log_scale0-log_qexp<log_soft_limit){
  656. /*Scale the adjustment based on how far into the margin we are.*/
  657. log_qexp+=(log_scale0-log_soft_limit-log_qexp>>32)*
  658. ((OC_MINI(margin,soft_limit)<<32)/margin);
  659. log_qtarget=((log_qexp+(exp0>>1))/exp0<<6)+OC_Q57(2);
  660. }
  661. }
  662. /*If this was not one of the initial frames, limit the change in quality.*/
  663. old_qi=_enc->state.qis[0];
  664. if(_clamp){
  665. ogg_int64_t log_qmin;
  666. ogg_int64_t log_qmax;
  667. /*Clamp the target quantizer to within [0.8*Q,1.2*Q], where Q is the
  668. current quantizer.
  669. TODO: With user-specified quant matrices, we need to enlarge these limits
  670. if they don't actually let us change qi values.*/
  671. log_qmin=_enc->log_qavg[_qti][old_qi]-0x00A4D3C25E68DC58LL;
  672. log_qmax=_enc->log_qavg[_qti][old_qi]+0x00A4D3C25E68DC58LL;
  673. log_qtarget=OC_CLAMPI(log_qmin,log_qtarget,log_qmax);
  674. }
  675. /*The above allocation looks only at the total rate we'll accumulate in the
  676. next buf_delay frames.
  677. However, we could bust the budget on the very next frame, so check for that
  678. here, if we're not using a soft target.*/
  679. /* Disabled when our minimum qi > 0; if we saturate log_qtarget to
  680. to the maximum possible size when we have a minimum qi, the
  681. resulting lambda will interact very strangely with SKIP. The
  682. resulting artifacts look like waterfalls. */
  683. if(_enc->state.info.quality==0){
  684. ogg_int64_t log_hard_limit;
  685. /*Compute the maximum number of bits we can use in the next frame.
  686. Allow 50% of the rate for a single frame for prediction error.
  687. This may not be enough for keyframes or sudden changes in complexity.*/
  688. log_hard_limit=oc_blog64(_enc->rc.fullness+(_enc->rc.bits_per_frame>>1));
  689. /*If we're predicting we'll use more than this...*/
  690. log_qexp=(log_qtarget-OC_Q57(2)>>6)*exp0;
  691. if(log_scale0-log_qexp>log_hard_limit){
  692. /*Force the target to hit our limit exactly.*/
  693. log_qexp=log_scale0-log_hard_limit;
  694. log_qtarget=((log_qexp+(exp0>>1))/exp0<<6)+OC_Q57(2);
  695. /*If that target is unreasonable, oh well; we'll have to drop.*/
  696. log_qtarget=OC_MINI(log_qtarget,OC_QUANT_MAX_LOG);
  697. }
  698. }
  699. /*Compute a final estimate of the number of bits we plan to use.*/
  700. log_qexp=(log_qtarget-OC_Q57(2)>>6)*_enc->rc.exp[_qti];
  701. _enc->rc.rate_bias+=oc_bexp64(log_cur_scale+_enc->rc.log_npixels-log_qexp);
  702. qi=oc_enc_find_qi_for_target(_enc,_qti,old_qi,
  703. _enc->state.info.quality,log_qtarget);
  704. /*Save the quantizer target for lambda calculations.*/
  705. _enc->rc.log_qtarget=log_qtarget;
  706. return qi;
  707. }
  708. int oc_enc_update_rc_state(oc_enc_ctx *_enc,
  709. long _bits,int _qti,int _qi,int _trial,int _droppable){
  710. ogg_int64_t buf_delta;
  711. ogg_int64_t log_scale;
  712. int dropped;
  713. dropped=0;
  714. /* Drop frames also disabled for now in the case of infinite-buffer
  715. two-pass mode */
  716. if(!_enc->rc.drop_frames||_enc->rc.twopass&&_enc->rc.frame_metrics==NULL){
  717. _droppable=0;
  718. }
  719. buf_delta=_enc->rc.bits_per_frame*(1+_enc->dup_count);
  720. if(_bits<=0){
  721. /*We didn't code any blocks in this frame.*/
  722. log_scale=OC_Q57(-64);
  723. _bits=0;
  724. }
  725. else{
  726. ogg_int64_t log_bits;
  727. ogg_int64_t log_qexp;
  728. /*Compute the estimated scale factor for this frame type.*/
  729. log_bits=oc_blog64(_bits);
  730. log_qexp=_enc->rc.log_qtarget-OC_Q57(2);
  731. log_qexp=(log_qexp>>6)*(_enc->rc.exp[_qti]);
  732. log_scale=OC_MINI(log_bits-_enc->rc.log_npixels+log_qexp,OC_Q57(16));
  733. }
  734. /*Special two-pass processing.*/
  735. switch(_enc->rc.twopass){
  736. case 1:{
  737. /*Pass 1 mode: save the metrics for this frame.*/
  738. _enc->rc.cur_metrics.log_scale=oc_q57_to_q24(log_scale);
  739. _enc->rc.cur_metrics.dup_count=_enc->dup_count;
  740. _enc->rc.cur_metrics.frame_type=_enc->state.frame_type;
  741. _enc->rc.twopass_buffer_bytes=0;
  742. }break;
  743. case 2:{
  744. /*Pass 2 mode:*/
  745. if(!_trial){
  746. ogg_int64_t next_frame_num;
  747. int qti;
  748. /*Move the current metrics back one frame.*/
  749. *&_enc->rc.prev_metrics=*&_enc->rc.cur_metrics;
  750. next_frame_num=_enc->state.curframe_num+_enc->dup_count+1;
  751. /*Back out the last frame's statistics from the sliding window.*/
  752. qti=_enc->rc.prev_metrics.frame_type;
  753. _enc->rc.frames_left[qti]--;
  754. _enc->rc.frames_left[2]-=_enc->rc.prev_metrics.dup_count;
  755. _enc->rc.nframes[qti]--;
  756. _enc->rc.nframes[2]-=_enc->rc.prev_metrics.dup_count;
  757. _enc->rc.scale_sum[qti]-=oc_bexp_q24(_enc->rc.prev_metrics.log_scale);
  758. _enc->rc.scale_window0=(int)next_frame_num;
  759. /*Free the corresponding entry in the circular buffer.*/
  760. if(_enc->rc.frame_metrics!=NULL){
  761. _enc->rc.nframe_metrics--;
  762. _enc->rc.frame_metrics_head++;
  763. if(_enc->rc.frame_metrics_head>=_enc->rc.cframe_metrics){
  764. _enc->rc.frame_metrics_head=0;
  765. }
  766. }
  767. /*Mark us ready for the next 2-pass packet.*/
  768. _enc->rc.twopass_buffer_bytes=0;
  769. /*Update state, so the user doesn't have to keep calling 2pass_in after
  770. they've fed in all the data when we're using a finite buffer.*/
  771. _enc->prev_dup_count=_enc->dup_count;
  772. oc_enc_rc_2pass_in(_enc,NULL,0);
  773. }
  774. }break;
  775. }
  776. /*Common to all passes:*/
  777. if(_bits>0){
  778. if(_trial){
  779. oc_iir_filter *f;
  780. /*Use the estimated scale factor directly if this was a trial.*/
  781. f=_enc->rc.scalefilter+_qti;
  782. f->y[1]=f->y[0]=f->x[1]=f->x[0]=oc_q57_to_q24(log_scale);
  783. _enc->rc.log_scale[_qti]=log_scale;
  784. }
  785. else{
  786. /*Lengthen the time constant for the INTER filter as we collect more
  787. frame statistics, until we reach our target.*/
  788. if(_enc->rc.inter_delay<_enc->rc.inter_delay_target&&
  789. _enc->rc.inter_count>=_enc->rc.inter_delay&&_qti==OC_INTER_FRAME){
  790. oc_iir_filter_reinit(&_enc->rc.scalefilter[1],++_enc->rc.inter_delay);
  791. }
  792. /*Otherwise update the low-pass scale filter for this frame type,
  793. regardless of whether or not we dropped this frame.*/
  794. _enc->rc.log_scale[_qti]=oc_iir_filter_update(
  795. _enc->rc.scalefilter+_qti,oc_q57_to_q24(log_scale))<<33;
  796. /*If this frame busts our budget, it must be dropped.*/
  797. if(_droppable&&_enc->rc.fullness+buf_delta<_bits){
  798. _enc->rc.prev_drop_count+=1+_enc->dup_count;
  799. _bits=0;
  800. dropped=1;
  801. }
  802. else{
  803. ogg_uint32_t drop_count;
  804. /*Update a low-pass filter to estimate the "real" frame rate taking
  805. drops and duplicates into account.
  806. This is only done if the frame is coded, as it needs the final
  807. count of dropped frames.*/
  808. drop_count=_enc->rc.prev_drop_count+1;
  809. if(drop_count>0x7F)drop_count=0x7FFFFFFF;
  810. else drop_count<<=24;
  811. _enc->rc.log_drop_scale=oc_blog64(oc_iir_filter_update(
  812. &_enc->rc.vfrfilter,drop_count))-OC_Q57(24);
  813. /*Initialize the drop count for this frame to the user-requested dup
  814. count.
  815. It will be increased if we drop more frames.*/
  816. _enc->rc.prev_drop_count=_enc->dup_count;
  817. }
  818. }
  819. /*Increment the INTER frame count, for filter adaptation purposes.*/
  820. if(_enc->rc.inter_count<INT_MAX)_enc->rc.inter_count+=_qti;
  821. }
  822. /*Increase the drop count.*/
  823. else _enc->rc.prev_drop_count+=1+_enc->dup_count;
  824. /*And update the buffer fullness level.*/
  825. if(!_trial){
  826. _enc->rc.fullness+=buf_delta-_bits;
  827. /*If we're too quick filling the buffer and overflow is capped,
  828. that rate is lost forever.*/
  829. if(_enc->rc.cap_overflow&&_enc->rc.fullness>_enc->rc.max){
  830. _enc->rc.fullness=_enc->rc.max;
  831. }
  832. /*If we're too quick draining the buffer and underflow is capped,
  833. don't try to make up that rate later.*/
  834. if(_enc->rc.cap_underflow&&_enc->rc.fullness<0){
  835. _enc->rc.fullness=0;
  836. }
  837. /*Adjust the bias for the real bits we've used.*/
  838. _enc->rc.rate_bias-=_bits;
  839. }
  840. return dropped;
  841. }
  842. #define OC_RC_2PASS_VERSION (1)
  843. #define OC_RC_2PASS_HDR_SZ (38)
  844. #define OC_RC_2PASS_PACKET_SZ (8)
  845. static void oc_rc_buffer_val(oc_rc_state *_rc,ogg_int64_t _val,int _bytes){
  846. while(_bytes-->0){
  847. _rc->twopass_buffer[_rc->twopass_buffer_bytes++]=(unsigned char)(_val&0xFF);
  848. _val>>=8;
  849. }
  850. }
  851. int oc_enc_rc_2pass_out(oc_enc_ctx *_enc,unsigned char **_buf){
  852. if(_enc->rc.twopass_buffer_bytes==0){
  853. if(_enc->rc.twopass==0){
  854. int qi;
  855. /*Pick first-pass qi for scale calculations.*/
  856. qi=oc_enc_select_qi(_enc,0,0);
  857. _enc->state.nqis=1;
  858. _enc->state.qis[0]=qi;
  859. _enc->rc.twopass=1;
  860. _enc->rc.frames_total[0]=_enc->rc.frames_total[1]=
  861. _enc->rc.frames_total[2]=0;
  862. _enc->rc.scale_sum[0]=_enc->rc.scale_sum[1]=0;
  863. /*Fill in dummy summary values.*/
  864. oc_rc_buffer_val(&_enc->rc,0x5032544F,4);
  865. oc_rc_buffer_val(&_enc->rc,OC_RC_2PASS_VERSION,4);
  866. oc_rc_buffer_val(&_enc->rc,0,OC_RC_2PASS_HDR_SZ-8);
  867. }
  868. else{
  869. int qti;
  870. qti=_enc->rc.cur_metrics.frame_type;
  871. _enc->rc.scale_sum[qti]+=oc_bexp_q24(_enc->rc.cur_metrics.log_scale);
  872. _enc->rc.frames_total[qti]++;
  873. _enc->rc.frames_total[2]+=_enc->rc.cur_metrics.dup_count;
  874. oc_rc_buffer_val(&_enc->rc,
  875. _enc->rc.cur_metrics.dup_count|_enc->rc.cur_metrics.frame_type<<31,4);
  876. oc_rc_buffer_val(&_enc->rc,_enc->rc.cur_metrics.log_scale,4);
  877. }
  878. }
  879. else if(_enc->packet_state==OC_PACKET_DONE&&
  880. _enc->rc.twopass_buffer_bytes!=OC_RC_2PASS_HDR_SZ){
  881. _enc->rc.twopass_buffer_bytes=0;
  882. oc_rc_buffer_val(&_enc->rc,0x5032544F,4);
  883. oc_rc_buffer_val(&_enc->rc,OC_RC_2PASS_VERSION,4);
  884. oc_rc_buffer_val(&_enc->rc,_enc->rc.frames_total[0],4);
  885. oc_rc_buffer_val(&_enc->rc,_enc->rc.frames_total[1],4);
  886. oc_rc_buffer_val(&_enc->rc,_enc->rc.frames_total[2],4);
  887. oc_rc_buffer_val(&_enc->rc,_enc->rc.exp[0],1);
  888. oc_rc_buffer_val(&_enc->rc,_enc->rc.exp[1],1);
  889. oc_rc_buffer_val(&_enc->rc,_enc->rc.scale_sum[0],8);
  890. oc_rc_buffer_val(&_enc->rc,_enc->rc.scale_sum[1],8);
  891. }
  892. else{
  893. /*The data for this frame has already been retrieved.*/
  894. *_buf=NULL;
  895. return 0;
  896. }
  897. *_buf=_enc->rc.twopass_buffer;
  898. return _enc->rc.twopass_buffer_bytes;
  899. }
  900. static size_t oc_rc_buffer_fill(oc_rc_state *_rc,
  901. unsigned char *_buf,size_t _bytes,size_t _consumed,size_t _goal){
  902. while(_rc->twopass_buffer_fill<_goal&&_consumed<_bytes){
  903. _rc->twopass_buffer[_rc->twopass_buffer_fill++]=_buf[_consumed++];
  904. }
  905. return _consumed;
  906. }
  907. static ogg_int64_t oc_rc_unbuffer_val(oc_rc_state *_rc,int _bytes){
  908. ogg_int64_t ret;
  909. int shift;
  910. ret=0;
  911. shift=0;
  912. while(_bytes-->0){
  913. ret|=((ogg_int64_t)_rc->twopass_buffer[_rc->twopass_buffer_bytes++])<<shift;
  914. shift+=8;
  915. }
  916. return ret;
  917. }
  918. int oc_enc_rc_2pass_in(oc_enc_ctx *_enc,unsigned char *_buf,size_t _bytes){
  919. size_t consumed;
  920. consumed=0;
  921. /*Enable pass 2 mode if this is the first call.*/
  922. if(_enc->rc.twopass==0){
  923. _enc->rc.twopass=2;
  924. _enc->rc.twopass_buffer_fill=0;
  925. _enc->rc.frames_total[0]=0;
  926. _enc->rc.nframe_metrics=0;
  927. _enc->rc.cframe_metrics=0;
  928. _enc->rc.frame_metrics_head=0;
  929. _enc->rc.scale_window0=0;
  930. _enc->rc.scale_window_end=0;
  931. }
  932. /*If we haven't got a valid summary header yet, try to parse one.*/
  933. if(_enc->rc.frames_total[0]==0){
  934. if(!_buf){
  935. int frames_needed;
  936. /*If we're using a whole-file buffer, we just need the first frame.
  937. Otherwise, we may need as many as one per buffer slot.*/
  938. frames_needed=_enc->rc.frame_metrics==NULL?1:_enc->rc.buf_delay;
  939. return OC_RC_2PASS_HDR_SZ+frames_needed*OC_RC_2PASS_PACKET_SZ
  940. -_enc->rc.twopass_buffer_fill;
  941. }
  942. consumed=oc_rc_buffer_fill(&_enc->rc,
  943. _buf,_bytes,consumed,OC_RC_2PASS_HDR_SZ);
  944. if(_enc->rc.twopass_buffer_fill>=OC_RC_2PASS_HDR_SZ){
  945. ogg_int64_t scale_sum[2];
  946. int exp[2];
  947. int buf_delay;
  948. /*Read the summary header data.*/
  949. /*Check the magic value and version number.*/
  950. if(oc_rc_unbuffer_val(&_enc->rc,4)!=0x5032544F||
  951. oc_rc_unbuffer_val(&_enc->rc,4)!=OC_RC_2PASS_VERSION){
  952. _enc->rc.twopass_buffer_bytes=0;
  953. return TH_ENOTFORMAT;
  954. }
  955. _enc->rc.frames_total[0]=(ogg_uint32_t)oc_rc_unbuffer_val(&_enc->rc,4);
  956. _enc->rc.frames_total[1]=(ogg_uint32_t)oc_rc_unbuffer_val(&_enc->rc,4);
  957. _enc->rc.frames_total[2]=(ogg_uint32_t)oc_rc_unbuffer_val(&_enc->rc,4);
  958. exp[0]=(int)oc_rc_unbuffer_val(&_enc->rc,1);
  959. exp[1]=(int)oc_rc_unbuffer_val(&_enc->rc,1);
  960. scale_sum[0]=oc_rc_unbuffer_val(&_enc->rc,8);
  961. scale_sum[1]=oc_rc_unbuffer_val(&_enc->rc,8);
  962. /*Make sure the file claims to have at least one frame.
  963. Otherwise we probably got the placeholder data from an aborted pass 1.
  964. Also make sure the total frame count doesn't overflow an integer.*/
  965. buf_delay=_enc->rc.frames_total[0]+_enc->rc.frames_total[1]
  966. +_enc->rc.frames_total[2];
  967. if(_enc->rc.frames_total[0]==0||buf_delay<0||
  968. (ogg_uint32_t)buf_delay<_enc->rc.frames_total[0]||
  969. (ogg_uint32_t)buf_delay<_enc->rc.frames_total[1]){
  970. _enc->rc.frames_total[0]=0;
  971. _enc->rc.twopass_buffer_bytes=0;
  972. return TH_EBADHEADER;
  973. }
  974. /*Got a valid header; set up pass 2.*/
  975. _enc->rc.frames_left[0]=_enc->rc.frames_total[0];
  976. _enc->rc.frames_left[1]=_enc->rc.frames_total[1];
  977. _enc->rc.frames_left[2]=_enc->rc.frames_total[2];
  978. /*If the user hasn't specified a buffer size, use the whole file.*/
  979. if(_enc->rc.frame_metrics==NULL){
  980. _enc->rc.buf_delay=buf_delay;
  981. _enc->rc.nframes[0]=_enc->rc.frames_total[0];
  982. _enc->rc.nframes[1]=_enc->rc.frames_total[1];
  983. _enc->rc.nframes[2]=_enc->rc.frames_total[2];
  984. _enc->rc.scale_sum[0]=scale_sum[0];
  985. _enc->rc.scale_sum[1]=scale_sum[1];
  986. _enc->rc.scale_window_end=buf_delay;
  987. oc_enc_rc_reset(_enc);
  988. }
  989. _enc->rc.exp[0]=exp[0];
  990. _enc->rc.exp[1]=exp[1];
  991. /*Clear the header data from the buffer to make room for packet data.*/
  992. _enc->rc.twopass_buffer_fill=0;
  993. _enc->rc.twopass_buffer_bytes=0;
  994. }
  995. }
  996. if(_enc->rc.frames_total[0]!=0){
  997. ogg_int64_t curframe_num;
  998. int nframes_total;
  999. curframe_num=_enc->state.curframe_num;
  1000. if(curframe_num>=0){
  1001. /*We just encoded a frame; make sure things matched.*/
  1002. if(_enc->rc.prev_metrics.dup_count!=_enc->prev_dup_count){
  1003. _enc->rc.twopass_buffer_bytes=0;
  1004. return TH_EINVAL;
  1005. }
  1006. }
  1007. curframe_num+=_enc->prev_dup_count+1;
  1008. nframes_total=_enc->rc.frames_total[0]+_enc->rc.frames_total[1]
  1009. +_enc->rc.frames_total[2];
  1010. if(curframe_num>=nframes_total){
  1011. /*We don't want any more data after the last frame, and we don't want to
  1012. allow any more frames to be encoded.*/
  1013. _enc->rc.twopass_buffer_bytes=0;
  1014. }
  1015. else if(_enc->rc.twopass_buffer_bytes==0){
  1016. if(_enc->rc.frame_metrics==NULL){
  1017. /*We're using a whole-file buffer:*/
  1018. if(!_buf)return OC_RC_2PASS_PACKET_SZ-_enc->rc.twopass_buffer_fill;
  1019. consumed=oc_rc_buffer_fill(&_enc->rc,
  1020. _buf,_bytes,consumed,OC_RC_2PASS_PACKET_SZ);
  1021. if(_enc->rc.twopass_buffer_fill>=OC_RC_2PASS_PACKET_SZ){
  1022. ogg_uint32_t dup_count;
  1023. ogg_int32_t log_scale;
  1024. int qti;
  1025. int arg;
  1026. /*Read the metrics for the next frame.*/
  1027. dup_count=oc_rc_unbuffer_val(&_enc->rc,4);
  1028. log_scale=oc_rc_unbuffer_val(&_enc->rc,4);
  1029. _enc->rc.cur_metrics.log_scale=log_scale;
  1030. qti=(dup_count&0x80000000)>>31;
  1031. _enc->rc.cur_metrics.dup_count=dup_count&0x7FFFFFFF;
  1032. _enc->rc.cur_metrics.frame_type=qti;
  1033. _enc->rc.twopass_force_kf=qti==OC_INTRA_FRAME;
  1034. /*"Helpfully" set the dup count back to what it was in pass 1.*/
  1035. arg=_enc->rc.cur_metrics.dup_count;
  1036. th_encode_ctl(_enc,TH_ENCCTL_SET_DUP_COUNT,&arg,sizeof(arg));
  1037. /*Clear the buffer for the next frame.*/
  1038. _enc->rc.twopass_buffer_fill=0;
  1039. }
  1040. }
  1041. else{
  1042. int frames_needed;
  1043. /*We're using a finite buffer:*/
  1044. frames_needed=OC_CLAMPI(0,_enc->rc.buf_delay
  1045. -(_enc->rc.scale_window_end-_enc->rc.scale_window0),
  1046. _enc->rc.frames_left[0]+_enc->rc.frames_left[1]
  1047. -_enc->rc.nframes[0]-_enc->rc.nframes[1]);
  1048. while(frames_needed>0){
  1049. if(!_buf){
  1050. return OC_RC_2PASS_PACKET_SZ*frames_needed
  1051. -_enc->rc.twopass_buffer_fill;
  1052. }
  1053. consumed=oc_rc_buffer_fill(&_enc->rc,
  1054. _buf,_bytes,consumed,OC_RC_2PASS_PACKET_SZ);
  1055. if(_enc->rc.twopass_buffer_fill>=OC_RC_2PASS_PACKET_SZ){
  1056. oc_frame_metrics *m;
  1057. int fmi;
  1058. ogg_uint32_t dup_count;
  1059. ogg_int32_t log_scale;
  1060. int qti;
  1061. /*Read the metrics for the next frame.*/
  1062. dup_count=oc_rc_unbuffer_val(&_enc->rc,4);
  1063. log_scale=oc_rc_unbuffer_val(&_enc->rc,4);
  1064. /*Add the to the circular buffer.*/
  1065. fmi=_enc->rc.frame_metrics_head+_enc->rc.nframe_metrics++;
  1066. if(fmi>=_enc->rc.cframe_metrics)fmi-=_enc->rc.cframe_metrics;
  1067. m=_enc->rc.frame_metrics+fmi;
  1068. m->log_scale=log_scale;
  1069. qti=(dup_count&0x80000000)>>31;
  1070. m->dup_count=dup_count&0x7FFFFFFF;
  1071. m->frame_type=qti;
  1072. /*And accumulate the statistics over the window.*/
  1073. _enc->rc.nframes[qti]++;
  1074. _enc->rc.nframes[2]+=m->dup_count;
  1075. _enc->rc.scale_sum[qti]+=oc_bexp_q24(m->log_scale);
  1076. _enc->rc.scale_window_end+=m->dup_count+1;
  1077. /*Compute an upper bound on the number of remaining packets needed
  1078. for the current window.*/
  1079. frames_needed=OC_CLAMPI(0,_enc->rc.buf_delay
  1080. -(_enc->rc.scale_window_end-_enc->rc.scale_window0),
  1081. _enc->rc.frames_left[0]+_enc->rc.frames_left[1]
  1082. -_enc->rc.nframes[0]-_enc->rc.nframes[1]);
  1083. /*Clear the buffer for the next frame.*/
  1084. _enc->rc.twopass_buffer_fill=0;
  1085. _enc->rc.twopass_buffer_bytes=0;
  1086. }
  1087. /*Go back for more data.*/
  1088. else break;
  1089. }
  1090. /*If we've got all the frames we need, fill in the current metrics.
  1091. We're ready to go.*/
  1092. if(frames_needed<=0){
  1093. int arg;
  1094. *&_enc->rc.cur_metrics=
  1095. *(_enc->rc.frame_metrics+_enc->rc.frame_metrics_head);
  1096. _enc->rc.twopass_force_kf=
  1097. _enc->rc.cur_metrics.frame_type==OC_INTRA_FRAME;
  1098. /*"Helpfully" set the dup count back to what it was in pass 1.*/
  1099. arg=_enc->rc.cur_metrics.dup_count;
  1100. th_encode_ctl(_enc,TH_ENCCTL_SET_DUP_COUNT,&arg,sizeof(arg));
  1101. /*Mark us ready for the next frame.*/
  1102. _enc->rc.twopass_buffer_bytes=1;
  1103. }
  1104. }
  1105. }
  1106. }
  1107. return (int)consumed;
  1108. }