patch-libavcodec_mips_aaccoder_mips_c 60 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411
  1. $OpenBSD: patch-libavcodec_mips_aaccoder_mips_c,v 1.5 2016/01/14 06:09:43 ajacoutot Exp $
  2. AAC encoder: refactor to resynchronize MIPS port
  3. AAC encoder: simplify and speed up find_min_book
  4. AAC encoder: Extensive improvements
  5. AAC encoder: memoize quantize_band_cost
  6. avcodec/mips/aaccoder_mips: Disable ff_aac_coder_init_mips() to prevent build failure
  7. avcodec/mips/aaccoder_mips: Sync with the generic code
  8. AAC encoder: improve SF range utilization
  9. AAC encoder: various fixes in M/S coding
  10. --- libavcodec/mips/aaccoder_mips.c.orig Fri Dec 25 03:00:19 2015
  11. +++ libavcodec/mips/aaccoder_mips.c Wed Jan 13 15:13:20 2016
  12. @@ -62,6 +62,8 @@
  13. #include "libavcodec/aac.h"
  14. #include "libavcodec/aacenc.h"
  15. #include "libavcodec/aactab.h"
  16. +#include "libavcodec/aacenctab.h"
  17. +#include "libavcodec/aacenc_utils.h"
  18. #if HAVE_INLINE_ASM
  19. typedef struct BandCodingPath {
  20. @@ -70,21 +72,6 @@ typedef struct BandCodingPath {
  21. int run;
  22. } BandCodingPath;
  23. -static const uint8_t run_value_bits_long[64] = {
  24. - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
  25. - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 10,
  26. - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
  27. - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 15
  28. -};
  29. -
  30. -static const uint8_t run_value_bits_short[16] = {
  31. - 3, 3, 3, 3, 3, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 9
  32. -};
  33. -
  34. -static const uint8_t * const run_value_bits[2] = {
  35. - run_value_bits_long, run_value_bits_short
  36. -};
  37. -
  38. static const uint8_t uquad_sign_bits[81] = {
  39. 0, 1, 1, 1, 2, 2, 1, 2, 2,
  40. 1, 2, 2, 2, 3, 3, 2, 3, 3,
  41. @@ -144,65 +131,6 @@ static const uint8_t esc_sign_bits[289] = {
  42. 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
  43. };
  44. -#define ROUND_STANDARD 0.4054f
  45. -#define ROUND_TO_ZERO 0.1054f
  46. -
  47. -static void abs_pow34_v(float *out, const float *in, const int size) {
  48. -#ifndef USE_REALLY_FULL_SEARCH
  49. - int i;
  50. - float a, b, c, d;
  51. - float ax, bx, cx, dx;
  52. -
  53. - for (i = 0; i < size; i += 4) {
  54. - a = fabsf(in[i ]);
  55. - b = fabsf(in[i+1]);
  56. - c = fabsf(in[i+2]);
  57. - d = fabsf(in[i+3]);
  58. -
  59. - ax = sqrtf(a);
  60. - bx = sqrtf(b);
  61. - cx = sqrtf(c);
  62. - dx = sqrtf(d);
  63. -
  64. - a = a * ax;
  65. - b = b * bx;
  66. - c = c * cx;
  67. - d = d * dx;
  68. -
  69. - out[i ] = sqrtf(a);
  70. - out[i+1] = sqrtf(b);
  71. - out[i+2] = sqrtf(c);
  72. - out[i+3] = sqrtf(d);
  73. - }
  74. -#endif /* USE_REALLY_FULL_SEARCH */
  75. -}
  76. -
  77. -static float find_max_val(int group_len, int swb_size, const float *scaled) {
  78. - float maxval = 0.0f;
  79. - int w2, i;
  80. - for (w2 = 0; w2 < group_len; w2++) {
  81. - for (i = 0; i < swb_size; i++) {
  82. - maxval = FFMAX(maxval, scaled[w2*128+i]);
  83. - }
  84. - }
  85. - return maxval;
  86. -}
  87. -
  88. -static int find_min_book(float maxval, int sf) {
  89. - float Q = ff_aac_pow2sf_tab[POW_SF2_ZERO - sf + SCALE_ONE_POS - SCALE_DIV_512];
  90. - float Q34 = sqrtf(Q * sqrtf(Q));
  91. - int qmaxval, cb;
  92. - qmaxval = maxval * Q34 + 0.4054f;
  93. - if (qmaxval == 0) cb = 0;
  94. - else if (qmaxval == 1) cb = 1;
  95. - else if (qmaxval == 2) cb = 3;
  96. - else if (qmaxval <= 4) cb = 5;
  97. - else if (qmaxval <= 7) cb = 7;
  98. - else if (qmaxval <= 12) cb = 9;
  99. - else cb = 11;
  100. - return cb;
  101. -}
  102. -
  103. /**
  104. * Functions developed from template function and optimized for quantizing and encoding band
  105. */
  106. @@ -210,14 +138,17 @@ static void quantize_and_encode_band_cost_SQUAD_mips(s
  107. PutBitContext *pb, const float *in, float *out,
  108. const float *scaled, int size, int scale_idx,
  109. int cb, const float lambda, const float uplim,
  110. - int *bits, const float ROUNDING)
  111. + int *bits, float *energy, const float ROUNDING)
  112. {
  113. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  114. + const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
  115. int i;
  116. int qc1, qc2, qc3, qc4;
  117. + float qenergy = 0.0f;
  118. uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
  119. uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
  120. + float *p_vec = (float *)ff_aac_codebook_vectors[cb-1];
  121. abs_pow34_v(s->scoefs, in, size);
  122. scaled = s->scoefs;
  123. @@ -225,6 +156,7 @@ static void quantize_and_encode_band_cost_SQUAD_mips(s
  124. int curidx;
  125. int *in_int = (int *)&in[i];
  126. int t0, t1, t2, t3, t4, t5, t6, t7;
  127. + const float *vec;
  128. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  129. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  130. @@ -276,21 +208,43 @@ static void quantize_and_encode_band_cost_SQUAD_mips(s
  131. curidx += 40;
  132. put_bits(pb, p_bits[curidx], p_codes[curidx]);
  133. +
  134. + if (out || energy) {
  135. + float e1,e2,e3,e4;
  136. + vec = &p_vec[curidx*4];
  137. + e1 = vec[0] * IQ;
  138. + e2 = vec[1] * IQ;
  139. + e3 = vec[2] * IQ;
  140. + e4 = vec[3] * IQ;
  141. + if (out) {
  142. + out[i+0] = e1;
  143. + out[i+1] = e2;
  144. + out[i+2] = e3;
  145. + out[i+3] = e4;
  146. + }
  147. + if (energy)
  148. + qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
  149. + }
  150. }
  151. + if (energy)
  152. + *energy = qenergy;
  153. }
  154. static void quantize_and_encode_band_cost_UQUAD_mips(struct AACEncContext *s,
  155. PutBitContext *pb, const float *in, float *out,
  156. const float *scaled, int size, int scale_idx,
  157. int cb, const float lambda, const float uplim,
  158. - int *bits, const float ROUNDING)
  159. + int *bits, float *energy, const float ROUNDING)
  160. {
  161. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  162. + const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
  163. int i;
  164. int qc1, qc2, qc3, qc4;
  165. + float qenergy = 0.0f;
  166. uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
  167. uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
  168. + float *p_vec = (float *)ff_aac_codebook_vectors[cb-1];
  169. abs_pow34_v(s->scoefs, in, size);
  170. scaled = s->scoefs;
  171. @@ -300,6 +254,7 @@ static void quantize_and_encode_band_cost_UQUAD_mips(s
  172. uint8_t v_bits;
  173. unsigned int v_codes;
  174. int t0, t1, t2, t3, t4;
  175. + const float *vec;
  176. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  177. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  178. @@ -368,21 +323,43 @@ static void quantize_and_encode_band_cost_UQUAD_mips(s
  179. v_codes = (p_codes[curidx] << count) | (sign & ((1 << count) - 1));
  180. v_bits = p_bits[curidx] + count;
  181. put_bits(pb, v_bits, v_codes);
  182. +
  183. + if (out || energy) {
  184. + float e1,e2,e3,e4;
  185. + vec = &p_vec[curidx*4];
  186. + e1 = copysignf(vec[0] * IQ, in[i+0]);
  187. + e2 = copysignf(vec[1] * IQ, in[i+1]);
  188. + e3 = copysignf(vec[2] * IQ, in[i+2]);
  189. + e4 = copysignf(vec[3] * IQ, in[i+3]);
  190. + if (out) {
  191. + out[i+0] = e1;
  192. + out[i+1] = e2;
  193. + out[i+2] = e3;
  194. + out[i+3] = e4;
  195. + }
  196. + if (energy)
  197. + qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
  198. + }
  199. }
  200. + if (energy)
  201. + *energy = qenergy;
  202. }
  203. static void quantize_and_encode_band_cost_SPAIR_mips(struct AACEncContext *s,
  204. PutBitContext *pb, const float *in, float *out,
  205. const float *scaled, int size, int scale_idx,
  206. int cb, const float lambda, const float uplim,
  207. - int *bits, const float ROUNDING)
  208. + int *bits, float *energy, const float ROUNDING)
  209. {
  210. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  211. + const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
  212. int i;
  213. int qc1, qc2, qc3, qc4;
  214. + float qenergy = 0.0f;
  215. uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
  216. uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
  217. + float *p_vec = (float *)ff_aac_codebook_vectors[cb-1];
  218. abs_pow34_v(s->scoefs, in, size);
  219. scaled = s->scoefs;
  220. @@ -392,6 +369,7 @@ static void quantize_and_encode_band_cost_SPAIR_mips(s
  221. uint8_t v_bits;
  222. unsigned int v_codes;
  223. int t0, t1, t2, t3, t4, t5, t6, t7;
  224. + const float *vec1, *vec2;
  225. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  226. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  227. @@ -447,30 +425,54 @@ static void quantize_and_encode_band_cost_SPAIR_mips(s
  228. v_codes = (p_codes[curidx] << p_bits[curidx2]) | (p_codes[curidx2]);
  229. v_bits = p_bits[curidx] + p_bits[curidx2];
  230. put_bits(pb, v_bits, v_codes);
  231. +
  232. + if (out || energy) {
  233. + float e1,e2,e3,e4;
  234. + vec1 = &p_vec[curidx*2 ];
  235. + vec2 = &p_vec[curidx2*2];
  236. + e1 = vec1[0] * IQ;
  237. + e2 = vec1[1] * IQ;
  238. + e3 = vec2[0] * IQ;
  239. + e4 = vec2[1] * IQ;
  240. + if (out) {
  241. + out[i+0] = e1;
  242. + out[i+1] = e2;
  243. + out[i+2] = e3;
  244. + out[i+3] = e4;
  245. + }
  246. + if (energy)
  247. + qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
  248. + }
  249. }
  250. + if (energy)
  251. + *energy = qenergy;
  252. }
  253. static void quantize_and_encode_band_cost_UPAIR7_mips(struct AACEncContext *s,
  254. PutBitContext *pb, const float *in, float *out,
  255. const float *scaled, int size, int scale_idx,
  256. int cb, const float lambda, const float uplim,
  257. - int *bits, const float ROUNDING)
  258. + int *bits, float *energy, const float ROUNDING)
  259. {
  260. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  261. + const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
  262. int i;
  263. int qc1, qc2, qc3, qc4;
  264. + float qenergy = 0.0f;
  265. uint8_t *p_bits = (uint8_t*) ff_aac_spectral_bits[cb-1];
  266. uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
  267. + float *p_vec = (float *)ff_aac_codebook_vectors[cb-1];
  268. abs_pow34_v(s->scoefs, in, size);
  269. scaled = s->scoefs;
  270. for (i = 0; i < size; i += 4) {
  271. - int curidx, sign1, count1, sign2, count2;
  272. + int curidx1, curidx2, sign1, count1, sign2, count2;
  273. int *in_int = (int *)&in[i];
  274. uint8_t v_bits;
  275. unsigned int v_codes;
  276. int t0, t1, t2, t3, t4;
  277. + const float *vec1, *vec2;
  278. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  279. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  280. @@ -528,43 +530,67 @@ static void quantize_and_encode_band_cost_UPAIR7_mips(
  281. "memory"
  282. );
  283. - curidx = 8 * qc1;
  284. - curidx += qc2;
  285. + curidx1 = 8 * qc1;
  286. + curidx1 += qc2;
  287. - v_codes = (p_codes[curidx] << count1) | sign1;
  288. - v_bits = p_bits[curidx] + count1;
  289. + v_codes = (p_codes[curidx1] << count1) | sign1;
  290. + v_bits = p_bits[curidx1] + count1;
  291. put_bits(pb, v_bits, v_codes);
  292. - curidx = 8 * qc3;
  293. - curidx += qc4;
  294. + curidx2 = 8 * qc3;
  295. + curidx2 += qc4;
  296. - v_codes = (p_codes[curidx] << count2) | sign2;
  297. - v_bits = p_bits[curidx] + count2;
  298. + v_codes = (p_codes[curidx2] << count2) | sign2;
  299. + v_bits = p_bits[curidx2] + count2;
  300. put_bits(pb, v_bits, v_codes);
  301. +
  302. + if (out || energy) {
  303. + float e1,e2,e3,e4;
  304. + vec1 = &p_vec[curidx1*2];
  305. + vec2 = &p_vec[curidx2*2];
  306. + e1 = copysignf(vec1[0] * IQ, in[i+0]);
  307. + e2 = copysignf(vec1[1] * IQ, in[i+1]);
  308. + e3 = copysignf(vec2[0] * IQ, in[i+2]);
  309. + e4 = copysignf(vec2[1] * IQ, in[i+3]);
  310. + if (out) {
  311. + out[i+0] = e1;
  312. + out[i+1] = e2;
  313. + out[i+2] = e3;
  314. + out[i+3] = e4;
  315. + }
  316. + if (energy)
  317. + qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
  318. + }
  319. }
  320. + if (energy)
  321. + *energy = qenergy;
  322. }
  323. static void quantize_and_encode_band_cost_UPAIR12_mips(struct AACEncContext *s,
  324. PutBitContext *pb, const float *in, float *out,
  325. const float *scaled, int size, int scale_idx,
  326. int cb, const float lambda, const float uplim,
  327. - int *bits, const float ROUNDING)
  328. + int *bits, float *energy, const float ROUNDING)
  329. {
  330. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  331. + const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
  332. int i;
  333. int qc1, qc2, qc3, qc4;
  334. + float qenergy = 0.0f;
  335. uint8_t *p_bits = (uint8_t*) ff_aac_spectral_bits[cb-1];
  336. uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
  337. + float *p_vec = (float *)ff_aac_codebook_vectors[cb-1];
  338. abs_pow34_v(s->scoefs, in, size);
  339. scaled = s->scoefs;
  340. for (i = 0; i < size; i += 4) {
  341. - int curidx, sign1, count1, sign2, count2;
  342. + int curidx1, curidx2, sign1, count1, sign2, count2;
  343. int *in_int = (int *)&in[i];
  344. uint8_t v_bits;
  345. unsigned int v_codes;
  346. int t0, t1, t2, t3, t4;
  347. + const float *vec1, *vec2;
  348. qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
  349. qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
  350. @@ -621,31 +647,53 @@ static void quantize_and_encode_band_cost_UPAIR12_mips
  351. : "memory"
  352. );
  353. - curidx = 13 * qc1;
  354. - curidx += qc2;
  355. + curidx1 = 13 * qc1;
  356. + curidx1 += qc2;
  357. - v_codes = (p_codes[curidx] << count1) | sign1;
  358. - v_bits = p_bits[curidx] + count1;
  359. + v_codes = (p_codes[curidx1] << count1) | sign1;
  360. + v_bits = p_bits[curidx1] + count1;
  361. put_bits(pb, v_bits, v_codes);
  362. - curidx = 13 * qc3;
  363. - curidx += qc4;
  364. + curidx2 = 13 * qc3;
  365. + curidx2 += qc4;
  366. - v_codes = (p_codes[curidx] << count2) | sign2;
  367. - v_bits = p_bits[curidx] + count2;
  368. + v_codes = (p_codes[curidx2] << count2) | sign2;
  369. + v_bits = p_bits[curidx2] + count2;
  370. put_bits(pb, v_bits, v_codes);
  371. +
  372. + if (out || energy) {
  373. + float e1,e2,e3,e4;
  374. + vec1 = &p_vec[curidx1*2];
  375. + vec2 = &p_vec[curidx2*2];
  376. + e1 = copysignf(vec1[0] * IQ, in[i+0]);
  377. + e2 = copysignf(vec1[1] * IQ, in[i+1]);
  378. + e3 = copysignf(vec2[0] * IQ, in[i+2]);
  379. + e4 = copysignf(vec2[1] * IQ, in[i+3]);
  380. + if (out) {
  381. + out[i+0] = e1;
  382. + out[i+1] = e2;
  383. + out[i+2] = e3;
  384. + out[i+3] = e4;
  385. + }
  386. + if (energy)
  387. + qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
  388. + }
  389. }
  390. + if (energy)
  391. + *energy = qenergy;
  392. }
  393. static void quantize_and_encode_band_cost_ESC_mips(struct AACEncContext *s,
  394. PutBitContext *pb, const float *in, float *out,
  395. const float *scaled, int size, int scale_idx,
  396. int cb, const float lambda, const float uplim,
  397. - int *bits, const float ROUNDING)
  398. + int *bits, float *energy, const float ROUNDING)
  399. {
  400. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  401. + const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
  402. int i;
  403. int qc1, qc2, qc3, qc4;
  404. + float qenergy = 0.0f;
  405. uint8_t *p_bits = (uint8_t* )ff_aac_spectral_bits[cb-1];
  406. uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
  407. @@ -661,6 +709,7 @@ static void quantize_and_encode_band_cost_ESC_mips(str
  408. uint8_t v_bits;
  409. unsigned int v_codes;
  410. int t0, t1, t2, t3, t4;
  411. + const float *vec1, *vec2;
  412. qc1 = scaled[i ] * Q34 + ROUNDING;
  413. qc2 = scaled[i+1] * Q34 + ROUNDING;
  414. @@ -729,6 +778,24 @@ static void quantize_and_encode_band_cost_ESC_mips(str
  415. v_codes = (p_codes[curidx2] << count2) | sign2;
  416. v_bits = p_bits[curidx2] + count2;
  417. put_bits(pb, v_bits, v_codes);
  418. +
  419. + if (out || energy) {
  420. + float e1,e2,e3,e4;
  421. + vec1 = &p_vectors[curidx*2 ];
  422. + vec2 = &p_vectors[curidx2*2];
  423. + e1 = copysignf(vec1[0] * IQ, in[i+0]);
  424. + e2 = copysignf(vec1[1] * IQ, in[i+1]);
  425. + e3 = copysignf(vec2[0] * IQ, in[i+2]);
  426. + e4 = copysignf(vec2[1] * IQ, in[i+3]);
  427. + if (out) {
  428. + out[i+0] = e1;
  429. + out[i+1] = e2;
  430. + out[i+2] = e3;
  431. + out[i+3] = e4;
  432. + }
  433. + if (energy)
  434. + qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
  435. + }
  436. }
  437. } else {
  438. for (i = 0; i < size; i += 4) {
  439. @@ -839,15 +906,33 @@ static void quantize_and_encode_band_cost_ESC_mips(str
  440. v_codes = (((1 << (len - 3)) - 2) << len) | (c4 & ((1 << len) - 1));
  441. put_bits(pb, len * 2 - 3, v_codes);
  442. }
  443. +
  444. + if (out || energy) {
  445. + float e1, e2, e3, e4;
  446. + e1 = copysignf(c1 * cbrtf(c1) * IQ, in[i+0]);
  447. + e2 = copysignf(c2 * cbrtf(c2) * IQ, in[i+1]);
  448. + e3 = copysignf(c3 * cbrtf(c3) * IQ, in[i+2]);
  449. + e4 = copysignf(c4 * cbrtf(c4) * IQ, in[i+3]);
  450. + if (out) {
  451. + out[i+0] = e1;
  452. + out[i+1] = e2;
  453. + out[i+2] = e3;
  454. + out[i+3] = e4;
  455. + }
  456. + if (energy)
  457. + qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
  458. + }
  459. }
  460. }
  461. + if (energy)
  462. + *energy = qenergy;
  463. }
  464. static void quantize_and_encode_band_cost_NONE_mips(struct AACEncContext *s,
  465. PutBitContext *pb, const float *in, float *out,
  466. const float *scaled, int size, int scale_idx,
  467. int cb, const float lambda, const float uplim,
  468. - int *bits, const float ROUNDING) {
  469. + int *bits, float *energy, const float ROUNDING) {
  470. av_assert0(0);
  471. }
  472. @@ -855,7 +940,7 @@ static void quantize_and_encode_band_cost_ZERO_mips(st
  473. PutBitContext *pb, const float *in, float *out,
  474. const float *scaled, int size, int scale_idx,
  475. int cb, const float lambda, const float uplim,
  476. - int *bits, const float ROUNDING) {
  477. + int *bits, float *energy, const float ROUNDING) {
  478. int i;
  479. if (bits)
  480. *bits = 0;
  481. @@ -867,13 +952,15 @@ static void quantize_and_encode_band_cost_ZERO_mips(st
  482. out[i+3] = 0.0f;
  483. }
  484. }
  485. + if (energy)
  486. + *energy = 0.0f;
  487. }
  488. static void (*const quantize_and_encode_band_cost_arr[])(struct AACEncContext *s,
  489. PutBitContext *pb, const float *in, float *out,
  490. const float *scaled, int size, int scale_idx,
  491. int cb, const float lambda, const float uplim,
  492. - int *bits, const float ROUNDING) = {
  493. + int *bits, float *energy, const float ROUNDING) = {
  494. quantize_and_encode_band_cost_ZERO_mips,
  495. quantize_and_encode_band_cost_SQUAD_mips,
  496. quantize_and_encode_band_cost_SQUAD_mips,
  497. @@ -894,17 +981,17 @@ static void (*const quantize_and_encode_band_cost_arr[
  498. #define quantize_and_encode_band_cost( \
  499. s, pb, in, out, scaled, size, scale_idx, cb, \
  500. - lambda, uplim, bits, ROUNDING) \
  501. + lambda, uplim, bits, energy, ROUNDING) \
  502. quantize_and_encode_band_cost_arr[cb]( \
  503. s, pb, in, out, scaled, size, scale_idx, cb, \
  504. - lambda, uplim, bits, ROUNDING)
  505. + lambda, uplim, bits, energy, ROUNDING)
  506. static void quantize_and_encode_band_mips(struct AACEncContext *s, PutBitContext *pb,
  507. const float *in, float *out, int size, int scale_idx,
  508. int cb, const float lambda, int rtz)
  509. {
  510. quantize_and_encode_band_cost(s, pb, in, out, NULL, size, scale_idx, cb, lambda,
  511. - INFINITY, NULL, (rtz) ? ROUND_TO_ZERO : ROUND_STANDARD);
  512. + INFINITY, NULL, NULL, (rtz) ? ROUND_TO_ZERO : ROUND_STANDARD);
  513. }
  514. /**
  515. @@ -1384,7 +1471,7 @@ static float (*const get_band_numbits_arr[])(struct AA
  516. static float quantize_band_cost_bits(struct AACEncContext *s, const float *in,
  517. const float *scaled, int size, int scale_idx,
  518. int cb, const float lambda, const float uplim,
  519. - int *bits)
  520. + int *bits, float *energy, int rtz)
  521. {
  522. return get_band_numbits(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits);
  523. }
  524. @@ -1397,7 +1484,7 @@ static float get_band_cost_ZERO_mips(struct AACEncCont
  525. PutBitContext *pb, const float *in,
  526. const float *scaled, int size, int scale_idx,
  527. int cb, const float lambda, const float uplim,
  528. - int *bits)
  529. + int *bits, float *energy)
  530. {
  531. int i;
  532. float cost = 0;
  533. @@ -1410,6 +1497,8 @@ static float get_band_cost_ZERO_mips(struct AACEncCont
  534. }
  535. if (bits)
  536. *bits = 0;
  537. + if (energy)
  538. + *energy = 0.0f;
  539. return cost * lambda;
  540. }
  541. @@ -1417,7 +1506,7 @@ static float get_band_cost_NONE_mips(struct AACEncCont
  542. PutBitContext *pb, const float *in,
  543. const float *scaled, int size, int scale_idx,
  544. int cb, const float lambda, const float uplim,
  545. - int *bits)
  546. + int *bits, float *energy)
  547. {
  548. av_assert0(0);
  549. return 0;
  550. @@ -1427,12 +1516,13 @@ static float get_band_cost_SQUAD_mips(struct AACEncCon
  551. PutBitContext *pb, const float *in,
  552. const float *scaled, int size, int scale_idx,
  553. int cb, const float lambda, const float uplim,
  554. - int *bits)
  555. + int *bits, float *energy)
  556. {
  557. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  558. const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
  559. int i;
  560. float cost = 0;
  561. + float qenergy = 0.0f;
  562. int qc1, qc2, qc3, qc4;
  563. int curbits = 0;
  564. @@ -1499,6 +1589,9 @@ static float get_band_cost_SQUAD_mips(struct AACEncCon
  565. curbits += p_bits[curidx];
  566. vec = &p_codes[curidx*4];
  567. + qenergy += vec[0]*vec[0] + vec[1]*vec[1]
  568. + + vec[2]*vec[2] + vec[3]*vec[3];
  569. +
  570. __asm__ volatile (
  571. ".set push \n\t"
  572. ".set noreorder \n\t"
  573. @@ -1533,6 +1626,8 @@ static float get_band_cost_SQUAD_mips(struct AACEncCon
  574. if (bits)
  575. *bits = curbits;
  576. + if (energy)
  577. + *energy = qenergy * (IQ*IQ);
  578. return cost * lambda + curbits;
  579. }
  580. @@ -1540,12 +1635,13 @@ static float get_band_cost_UQUAD_mips(struct AACEncCon
  581. PutBitContext *pb, const float *in,
  582. const float *scaled, int size, int scale_idx,
  583. int cb, const float lambda, const float uplim,
  584. - int *bits)
  585. + int *bits, float *energy)
  586. {
  587. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  588. const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
  589. int i;
  590. float cost = 0;
  591. + float qenergy = 0.0f;
  592. int curbits = 0;
  593. int qc1, qc2, qc3, qc4;
  594. @@ -1598,6 +1694,9 @@ static float get_band_cost_UQUAD_mips(struct AACEncCon
  595. curbits += uquad_sign_bits[curidx];
  596. vec = &p_codes[curidx*4];
  597. + qenergy += vec[0]*vec[0] + vec[1]*vec[1]
  598. + + vec[2]*vec[2] + vec[3]*vec[3];
  599. +
  600. __asm__ volatile (
  601. ".set push \n\t"
  602. ".set noreorder \n\t"
  603. @@ -1635,6 +1734,8 @@ static float get_band_cost_UQUAD_mips(struct AACEncCon
  604. if (bits)
  605. *bits = curbits;
  606. + if (energy)
  607. + *energy = qenergy * (IQ*IQ);
  608. return cost * lambda + curbits;
  609. }
  610. @@ -1642,12 +1743,13 @@ static float get_band_cost_SPAIR_mips(struct AACEncCon
  611. PutBitContext *pb, const float *in,
  612. const float *scaled, int size, int scale_idx,
  613. int cb, const float lambda, const float uplim,
  614. - int *bits)
  615. + int *bits, float *energy)
  616. {
  617. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  618. const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
  619. int i;
  620. float cost = 0;
  621. + float qenergy = 0.0f;
  622. int qc1, qc2, qc3, qc4;
  623. int curbits = 0;
  624. @@ -1719,6 +1821,9 @@ static float get_band_cost_SPAIR_mips(struct AACEncCon
  625. vec = &p_codes[curidx*2];
  626. vec2 = &p_codes[curidx2*2];
  627. + qenergy += vec[0]*vec[0] + vec[1]*vec[1]
  628. + + vec2[0]*vec2[0] + vec2[1]*vec2[1];
  629. +
  630. __asm__ volatile (
  631. ".set push \n\t"
  632. ".set noreorder \n\t"
  633. @@ -1753,6 +1858,8 @@ static float get_band_cost_SPAIR_mips(struct AACEncCon
  634. if (bits)
  635. *bits = curbits;
  636. + if (energy)
  637. + *energy = qenergy * (IQ*IQ);
  638. return cost * lambda + curbits;
  639. }
  640. @@ -1760,12 +1867,13 @@ static float get_band_cost_UPAIR7_mips(struct AACEncCo
  641. PutBitContext *pb, const float *in,
  642. const float *scaled, int size, int scale_idx,
  643. int cb, const float lambda, const float uplim,
  644. - int *bits)
  645. + int *bits, float *energy)
  646. {
  647. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  648. const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
  649. int i;
  650. float cost = 0;
  651. + float qenergy = 0.0f;
  652. int qc1, qc2, qc3, qc4;
  653. int curbits = 0;
  654. @@ -1849,6 +1957,9 @@ static float get_band_cost_UPAIR7_mips(struct AACEncCo
  655. curbits += upair7_sign_bits[curidx2];
  656. vec2 = &p_codes[curidx2*2];
  657. + qenergy += vec[0]*vec[0] + vec[1]*vec[1]
  658. + + vec2[0]*vec2[0] + vec2[1]*vec2[1];
  659. +
  660. __asm__ volatile (
  661. ".set push \n\t"
  662. ".set noreorder \n\t"
  663. @@ -1886,6 +1997,8 @@ static float get_band_cost_UPAIR7_mips(struct AACEncCo
  664. if (bits)
  665. *bits = curbits;
  666. + if (energy)
  667. + *energy = qenergy * (IQ*IQ);
  668. return cost * lambda + curbits;
  669. }
  670. @@ -1893,12 +2006,13 @@ static float get_band_cost_UPAIR12_mips(struct AACEncC
  671. PutBitContext *pb, const float *in,
  672. const float *scaled, int size, int scale_idx,
  673. int cb, const float lambda, const float uplim,
  674. - int *bits)
  675. + int *bits, float *energy)
  676. {
  677. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  678. const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
  679. int i;
  680. float cost = 0;
  681. + float qenergy = 0.0f;
  682. int qc1, qc2, qc3, qc4;
  683. int curbits = 0;
  684. @@ -1982,6 +2096,9 @@ static float get_band_cost_UPAIR12_mips(struct AACEncC
  685. vec = &p_codes[curidx*2];
  686. vec2 = &p_codes[curidx2*2];
  687. + qenergy += vec[0]*vec[0] + vec[1]*vec[1]
  688. + + vec2[0]*vec2[0] + vec2[1]*vec2[1];
  689. +
  690. __asm__ volatile (
  691. ".set push \n\t"
  692. ".set noreorder \n\t"
  693. @@ -2019,6 +2136,8 @@ static float get_band_cost_UPAIR12_mips(struct AACEncC
  694. if (bits)
  695. *bits = curbits;
  696. + if (energy)
  697. + *energy = qenergy * (IQ*IQ);
  698. return cost * lambda + curbits;
  699. }
  700. @@ -2026,13 +2145,14 @@ static float get_band_cost_ESC_mips(struct AACEncConte
  701. PutBitContext *pb, const float *in,
  702. const float *scaled, int size, int scale_idx,
  703. int cb, const float lambda, const float uplim,
  704. - int *bits)
  705. + int *bits, float *energy)
  706. {
  707. const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
  708. const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
  709. const float CLIPPED_ESCAPE = 165140.0f * IQ;
  710. int i;
  711. float cost = 0;
  712. + float qenergy = 0.0f;
  713. int qc1, qc2, qc3, qc4;
  714. int curbits = 0;
  715. @@ -2042,7 +2162,7 @@ static float get_band_cost_ESC_mips(struct AACEncConte
  716. for (i = 0; i < size; i += 4) {
  717. const float *vec, *vec2;
  718. int curidx, curidx2;
  719. - float t1, t2, t3, t4;
  720. + float t1, t2, t3, t4, V;
  721. float di1, di2, di3, di4;
  722. int cond0, cond1, cond2, cond3;
  723. int c1, c2, c3, c4;
  724. @@ -2114,38 +2234,54 @@ static float get_band_cost_ESC_mips(struct AACEncConte
  725. if (cond0) {
  726. if (t1 >= CLIPPED_ESCAPE) {
  727. di1 = t1 - CLIPPED_ESCAPE;
  728. + qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
  729. } else {
  730. - di1 = t1 - c1 * cbrtf(c1) * IQ;
  731. + di1 = t1 - (V = c1 * cbrtf(c1) * IQ);
  732. + qenergy += V*V;
  733. }
  734. - } else
  735. - di1 = t1 - vec[0] * IQ;
  736. + } else {
  737. + di1 = t1 - (V = vec[0] * IQ);
  738. + qenergy += V*V;
  739. + }
  740. if (cond1) {
  741. if (t2 >= CLIPPED_ESCAPE) {
  742. di2 = t2 - CLIPPED_ESCAPE;
  743. + qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
  744. } else {
  745. - di2 = t2 - c2 * cbrtf(c2) * IQ;
  746. + di2 = t2 - (V = c2 * cbrtf(c2) * IQ);
  747. + qenergy += V*V;
  748. }
  749. - } else
  750. - di2 = t2 - vec[1] * IQ;
  751. + } else {
  752. + di2 = t2 - (V = vec[1] * IQ);
  753. + qenergy += V*V;
  754. + }
  755. if (cond2) {
  756. if (t3 >= CLIPPED_ESCAPE) {
  757. di3 = t3 - CLIPPED_ESCAPE;
  758. + qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
  759. } else {
  760. - di3 = t3 - c3 * cbrtf(c3) * IQ;
  761. + di3 = t3 - (V = c3 * cbrtf(c3) * IQ);
  762. + qenergy += V*V;
  763. }
  764. - } else
  765. - di3 = t3 - vec2[0] * IQ;
  766. + } else {
  767. + di3 = t3 - (V = vec2[0] * IQ);
  768. + qenergy += V*V;
  769. + }
  770. if (cond3) {
  771. if (t4 >= CLIPPED_ESCAPE) {
  772. di4 = t4 - CLIPPED_ESCAPE;
  773. + qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
  774. } else {
  775. - di4 = t4 - c4 * cbrtf(c4) * IQ;
  776. + di4 = t4 - (V = c4 * cbrtf(c4) * IQ);
  777. + qenergy += V*V;
  778. }
  779. - } else
  780. - di4 = t4 - vec2[1]*IQ;
  781. + } else {
  782. + di4 = t4 - (V = vec2[1]*IQ);
  783. + qenergy += V*V;
  784. + }
  785. cost += di1 * di1 + di2 * di2
  786. + di3 * di3 + di4 * di4;
  787. @@ -2160,7 +2296,7 @@ static float (*const get_band_cost_arr[])(struct AACEn
  788. PutBitContext *pb, const float *in,
  789. const float *scaled, int size, int scale_idx,
  790. int cb, const float lambda, const float uplim,
  791. - int *bits) = {
  792. + int *bits, float *energy) = {
  793. get_band_cost_ZERO_mips,
  794. get_band_cost_SQUAD_mips,
  795. get_band_cost_SQUAD_mips,
  796. @@ -2181,404 +2317,182 @@ static float (*const get_band_cost_arr[])(struct AACEn
  797. #define get_band_cost( \
  798. s, pb, in, scaled, size, scale_idx, cb, \
  799. - lambda, uplim, bits) \
  800. + lambda, uplim, bits, energy) \
  801. get_band_cost_arr[cb]( \
  802. s, pb, in, scaled, size, scale_idx, cb, \
  803. - lambda, uplim, bits)
  804. + lambda, uplim, bits, energy)
  805. static float quantize_band_cost(struct AACEncContext *s, const float *in,
  806. const float *scaled, int size, int scale_idx,
  807. int cb, const float lambda, const float uplim,
  808. - int *bits)
  809. + int *bits, float *energy, int rtz)
  810. {
  811. - return get_band_cost(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits);
  812. + return get_band_cost(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits, energy);
  813. }
  814. -static void search_for_quantizers_twoloop_mips(AVCodecContext *avctx,
  815. - AACEncContext *s,
  816. - SingleChannelElement *sce,
  817. - const float lambda)
  818. -{
  819. - int start = 0, i, w, w2, g;
  820. - int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate / avctx->channels;
  821. - float dists[128] = { 0 }, uplims[128];
  822. - float maxvals[128];
  823. - int fflag, minscaler;
  824. - int its = 0;
  825. - int allz = 0;
  826. - float minthr = INFINITY;
  827. +#include "libavcodec/aacenc_quantization_misc.h"
  828. - destbits = FFMIN(destbits, 5800);
  829. - for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  830. - for (g = 0; g < sce->ics.num_swb; g++) {
  831. - int nz = 0;
  832. - float uplim = 0.0f;
  833. - for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  834. - FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
  835. - uplim += band->threshold;
  836. - if (band->energy <= band->threshold || band->threshold == 0.0f) {
  837. - sce->zeroes[(w+w2)*16+g] = 1;
  838. - continue;
  839. - }
  840. - nz = 1;
  841. - }
  842. - uplims[w*16+g] = uplim *512;
  843. - sce->zeroes[w*16+g] = !nz;
  844. - if (nz)
  845. - minthr = FFMIN(minthr, uplim);
  846. - allz |= nz;
  847. - }
  848. - }
  849. - for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  850. - for (g = 0; g < sce->ics.num_swb; g++) {
  851. - if (sce->zeroes[w*16+g]) {
  852. - sce->sf_idx[w*16+g] = SCALE_ONE_POS;
  853. - continue;
  854. - }
  855. - sce->sf_idx[w*16+g] = SCALE_ONE_POS + FFMIN(log2f(uplims[w*16+g]/minthr)*4,59);
  856. - }
  857. - }
  858. +#include "libavcodec/aaccoder_twoloop.h"
  859. - if (!allz)
  860. - return;
  861. - abs_pow34_v(s->scoefs, sce->coeffs, 1024);
  862. -
  863. - for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  864. - start = w*128;
  865. - for (g = 0; g < sce->ics.num_swb; g++) {
  866. - const float *scaled = s->scoefs + start;
  867. - maxvals[w*16+g] = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], scaled);
  868. - start += sce->ics.swb_sizes[g];
  869. - }
  870. - }
  871. -
  872. - do {
  873. - int tbits, qstep;
  874. - minscaler = sce->sf_idx[0];
  875. - qstep = its ? 1 : 32;
  876. - do {
  877. - int prev = -1;
  878. - tbits = 0;
  879. - fflag = 0;
  880. -
  881. - if (qstep > 1) {
  882. - for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  883. - start = w*128;
  884. - for (g = 0; g < sce->ics.num_swb; g++) {
  885. - const float *coefs = sce->coeffs + start;
  886. - const float *scaled = s->scoefs + start;
  887. - int bits = 0;
  888. - int cb;
  889. -
  890. - if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
  891. - start += sce->ics.swb_sizes[g];
  892. - continue;
  893. - }
  894. - minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
  895. - cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
  896. - for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  897. - int b;
  898. - bits += quantize_band_cost_bits(s, coefs + w2*128,
  899. - scaled + w2*128,
  900. - sce->ics.swb_sizes[g],
  901. - sce->sf_idx[w*16+g],
  902. - cb,
  903. - 1.0f,
  904. - INFINITY,
  905. - &b);
  906. - }
  907. - if (prev != -1) {
  908. - bits += ff_aac_scalefactor_bits[sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO];
  909. - }
  910. - tbits += bits;
  911. - start += sce->ics.swb_sizes[g];
  912. - prev = sce->sf_idx[w*16+g];
  913. - }
  914. - }
  915. - }
  916. - else {
  917. - for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  918. - start = w*128;
  919. - for (g = 0; g < sce->ics.num_swb; g++) {
  920. - const float *coefs = sce->coeffs + start;
  921. - const float *scaled = s->scoefs + start;
  922. - int bits = 0;
  923. - int cb;
  924. - float dist = 0.0f;
  925. -
  926. - if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
  927. - start += sce->ics.swb_sizes[g];
  928. - continue;
  929. - }
  930. - minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
  931. - cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
  932. - for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  933. - int b;
  934. - dist += quantize_band_cost(s, coefs + w2*128,
  935. - scaled + w2*128,
  936. - sce->ics.swb_sizes[g],
  937. - sce->sf_idx[w*16+g],
  938. - cb,
  939. - 1.0f,
  940. - INFINITY,
  941. - &b);
  942. - bits += b;
  943. - }
  944. - dists[w*16+g] = dist - bits;
  945. - if (prev != -1) {
  946. - bits += ff_aac_scalefactor_bits[sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO];
  947. - }
  948. - tbits += bits;
  949. - start += sce->ics.swb_sizes[g];
  950. - prev = sce->sf_idx[w*16+g];
  951. - }
  952. - }
  953. - }
  954. - if (tbits > destbits) {
  955. - for (i = 0; i < 128; i++)
  956. - if (sce->sf_idx[i] < 218 - qstep)
  957. - sce->sf_idx[i] += qstep;
  958. - } else {
  959. - for (i = 0; i < 128; i++)
  960. - if (sce->sf_idx[i] > 60 - qstep)
  961. - sce->sf_idx[i] -= qstep;
  962. - }
  963. - qstep >>= 1;
  964. - if (!qstep && tbits > destbits*1.02 && sce->sf_idx[0] < 217)
  965. - qstep = 1;
  966. - } while (qstep);
  967. -
  968. - fflag = 0;
  969. - minscaler = av_clip(minscaler, 60, 255 - SCALE_MAX_DIFF);
  970. - for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  971. - for (g = 0; g < sce->ics.num_swb; g++) {
  972. - int prevsc = sce->sf_idx[w*16+g];
  973. - if (dists[w*16+g] > uplims[w*16+g] && sce->sf_idx[w*16+g] > 60) {
  974. - if (find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]-1))
  975. - sce->sf_idx[w*16+g]--;
  976. - else
  977. - sce->sf_idx[w*16+g]-=2;
  978. - }
  979. - sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF);
  980. - sce->sf_idx[w*16+g] = FFMIN(sce->sf_idx[w*16+g], 219);
  981. - if (sce->sf_idx[w*16+g] != prevsc)
  982. - fflag = 1;
  983. - sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
  984. - }
  985. - }
  986. - its++;
  987. - } while (fflag && its < 10);
  988. -}
  989. -
  990. static void search_for_ms_mips(AACEncContext *s, ChannelElement *cpe)
  991. {
  992. - int start = 0, i, w, w2, g;
  993. + int start = 0, i, w, w2, g, sid_sf_boost, prev_mid, prev_side;
  994. + uint8_t nextband0[128], nextband1[128];
  995. float M[128], S[128];
  996. float *L34 = s->scoefs, *R34 = s->scoefs + 128, *M34 = s->scoefs + 128*2, *S34 = s->scoefs + 128*3;
  997. + const float lambda = s->lambda;
  998. + const float mslambda = FFMIN(1.0f, lambda / 120.f);
  999. SingleChannelElement *sce0 = &cpe->ch[0];
  1000. SingleChannelElement *sce1 = &cpe->ch[1];
  1001. if (!cpe->common_window)
  1002. return;
  1003. +
  1004. + /** Scout out next nonzero bands */
  1005. + ff_init_nextband_map(sce0, nextband0);
  1006. + ff_init_nextband_map(sce1, nextband1);
  1007. +
  1008. + prev_mid = sce0->sf_idx[0];
  1009. + prev_side = sce1->sf_idx[0];
  1010. for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
  1011. + start = 0;
  1012. for (g = 0; g < sce0->ics.num_swb; g++) {
  1013. - if (!cpe->ch[0].zeroes[w*16+g] && !cpe->ch[1].zeroes[w*16+g]) {
  1014. - float dist1 = 0.0f, dist2 = 0.0f;
  1015. + float bmax = bval2bmax(g * 17.0f / sce0->ics.num_swb) / 0.0045f;
  1016. + if (!cpe->is_mask[w*16+g])
  1017. + cpe->ms_mask[w*16+g] = 0;
  1018. + if (!sce0->zeroes[w*16+g] && !sce1->zeroes[w*16+g] && !cpe->is_mask[w*16+g]) {
  1019. + float Mmax = 0.0f, Smax = 0.0f;
  1020. +
  1021. + /* Must compute mid/side SF and book for the whole window group */
  1022. for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
  1023. - FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
  1024. - FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g];
  1025. - float minthr = FFMIN(band0->threshold, band1->threshold);
  1026. - float maxthr = FFMAX(band0->threshold, band1->threshold);
  1027. - for (i = 0; i < sce0->ics.swb_sizes[g]; i+=4) {
  1028. - M[i ] = (sce0->coeffs[start+w2*128+i ]
  1029. - + sce1->coeffs[start+w2*128+i ]) * 0.5;
  1030. - M[i+1] = (sce0->coeffs[start+w2*128+i+1]
  1031. - + sce1->coeffs[start+w2*128+i+1]) * 0.5;
  1032. - M[i+2] = (sce0->coeffs[start+w2*128+i+2]
  1033. - + sce1->coeffs[start+w2*128+i+2]) * 0.5;
  1034. - M[i+3] = (sce0->coeffs[start+w2*128+i+3]
  1035. - + sce1->coeffs[start+w2*128+i+3]) * 0.5;
  1036. + for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
  1037. + M[i] = (sce0->coeffs[start+(w+w2)*128+i]
  1038. + + sce1->coeffs[start+(w+w2)*128+i]) * 0.5;
  1039. + S[i] = M[i]
  1040. + - sce1->coeffs[start+(w+w2)*128+i];
  1041. + }
  1042. + abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]);
  1043. + abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]);
  1044. + for (i = 0; i < sce0->ics.swb_sizes[g]; i++ ) {
  1045. + Mmax = FFMAX(Mmax, M34[i]);
  1046. + Smax = FFMAX(Smax, S34[i]);
  1047. + }
  1048. + }
  1049. - S[i ] = M[i ]
  1050. - - sce1->coeffs[start+w2*128+i ];
  1051. - S[i+1] = M[i+1]
  1052. - - sce1->coeffs[start+w2*128+i+1];
  1053. - S[i+2] = M[i+2]
  1054. - - sce1->coeffs[start+w2*128+i+2];
  1055. - S[i+3] = M[i+3]
  1056. - - sce1->coeffs[start+w2*128+i+3];
  1057. - }
  1058. - abs_pow34_v(L34, sce0->coeffs+start+w2*128, sce0->ics.swb_sizes[g]);
  1059. - abs_pow34_v(R34, sce1->coeffs+start+w2*128, sce0->ics.swb_sizes[g]);
  1060. - abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]);
  1061. - abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]);
  1062. - dist1 += quantize_band_cost(s, sce0->coeffs + start + w2*128,
  1063. - L34,
  1064. - sce0->ics.swb_sizes[g],
  1065. - sce0->sf_idx[(w+w2)*16+g],
  1066. - sce0->band_type[(w+w2)*16+g],
  1067. - s->lambda / band0->threshold, INFINITY, NULL);
  1068. - dist1 += quantize_band_cost(s, sce1->coeffs + start + w2*128,
  1069. - R34,
  1070. - sce1->ics.swb_sizes[g],
  1071. - sce1->sf_idx[(w+w2)*16+g],
  1072. - sce1->band_type[(w+w2)*16+g],
  1073. - s->lambda / band1->threshold, INFINITY, NULL);
  1074. - dist2 += quantize_band_cost(s, M,
  1075. - M34,
  1076. - sce0->ics.swb_sizes[g],
  1077. - sce0->sf_idx[(w+w2)*16+g],
  1078. - sce0->band_type[(w+w2)*16+g],
  1079. - s->lambda / maxthr, INFINITY, NULL);
  1080. - dist2 += quantize_band_cost(s, S,
  1081. - S34,
  1082. - sce1->ics.swb_sizes[g],
  1083. - sce1->sf_idx[(w+w2)*16+g],
  1084. - sce1->band_type[(w+w2)*16+g],
  1085. - s->lambda / minthr, INFINITY, NULL);
  1086. + for (sid_sf_boost = 0; sid_sf_boost < 4; sid_sf_boost++) {
  1087. + float dist1 = 0.0f, dist2 = 0.0f;
  1088. + int B0 = 0, B1 = 0;
  1089. + int minidx;
  1090. + int mididx, sididx;
  1091. + int midcb, sidcb;
  1092. +
  1093. + minidx = FFMIN(sce0->sf_idx[w*16+g], sce1->sf_idx[w*16+g]);
  1094. + mididx = av_clip(minidx, 0, SCALE_MAX_POS - SCALE_DIV_512);
  1095. + sididx = av_clip(minidx - sid_sf_boost * 3, 0, SCALE_MAX_POS - SCALE_DIV_512);
  1096. + if (sce0->band_type[w*16+g] != NOISE_BT && sce1->band_type[w*16+g] != NOISE_BT
  1097. + && ( !ff_sfdelta_can_replace(sce0, nextband0, prev_mid, mididx, w*16+g)
  1098. + || !ff_sfdelta_can_replace(sce1, nextband1, prev_side, sididx, w*16+g))) {
  1099. + /* scalefactor range violation, bad stuff, will decrease quality unacceptably */
  1100. + continue;
  1101. + }
  1102. +
  1103. + midcb = find_min_book(Mmax, mididx);
  1104. + sidcb = find_min_book(Smax, sididx);
  1105. +
  1106. + /* No CB can be zero */
  1107. + midcb = FFMAX(1,midcb);
  1108. + sidcb = FFMAX(1,sidcb);
  1109. +
  1110. + for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
  1111. + FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
  1112. + FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g];
  1113. + float minthr = FFMIN(band0->threshold, band1->threshold);
  1114. + int b1,b2,b3,b4;
  1115. + for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
  1116. + M[i] = (sce0->coeffs[start+(w+w2)*128+i]
  1117. + + sce1->coeffs[start+(w+w2)*128+i]) * 0.5;
  1118. + S[i] = M[i]
  1119. + - sce1->coeffs[start+(w+w2)*128+i];
  1120. + }
  1121. +
  1122. + abs_pow34_v(L34, sce0->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
  1123. + abs_pow34_v(R34, sce1->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
  1124. + abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]);
  1125. + abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]);
  1126. + dist1 += quantize_band_cost(s, &sce0->coeffs[start + (w+w2)*128],
  1127. + L34,
  1128. + sce0->ics.swb_sizes[g],
  1129. + sce0->sf_idx[w*16+g],
  1130. + sce0->band_type[w*16+g],
  1131. + lambda / band0->threshold, INFINITY, &b1, NULL, 0);
  1132. + dist1 += quantize_band_cost(s, &sce1->coeffs[start + (w+w2)*128],
  1133. + R34,
  1134. + sce1->ics.swb_sizes[g],
  1135. + sce1->sf_idx[w*16+g],
  1136. + sce1->band_type[w*16+g],
  1137. + lambda / band1->threshold, INFINITY, &b2, NULL, 0);
  1138. + dist2 += quantize_band_cost(s, M,
  1139. + M34,
  1140. + sce0->ics.swb_sizes[g],
  1141. + mididx,
  1142. + midcb,
  1143. + lambda / minthr, INFINITY, &b3, NULL, 0);
  1144. + dist2 += quantize_band_cost(s, S,
  1145. + S34,
  1146. + sce1->ics.swb_sizes[g],
  1147. + sididx,
  1148. + sidcb,
  1149. + mslambda / (minthr * bmax), INFINITY, &b4, NULL, 0);
  1150. + B0 += b1+b2;
  1151. + B1 += b3+b4;
  1152. + dist1 -= b1+b2;
  1153. + dist2 -= b3+b4;
  1154. + }
  1155. + cpe->ms_mask[w*16+g] = dist2 <= dist1 && B1 < B0;
  1156. + if (cpe->ms_mask[w*16+g]) {
  1157. + if (sce0->band_type[w*16+g] != NOISE_BT && sce1->band_type[w*16+g] != NOISE_BT) {
  1158. + sce0->sf_idx[w*16+g] = mididx;
  1159. + sce1->sf_idx[w*16+g] = sididx;
  1160. + sce0->band_type[w*16+g] = midcb;
  1161. + sce1->band_type[w*16+g] = sidcb;
  1162. + } else if ((sce0->band_type[w*16+g] != NOISE_BT) ^ (sce1->band_type[w*16+g] != NOISE_BT)) {
  1163. + /* ms_mask unneeded, and it confuses some decoders */
  1164. + cpe->ms_mask[w*16+g] = 0;
  1165. + }
  1166. + break;
  1167. + } else if (B1 > B0) {
  1168. + /* More boost won't fix this */
  1169. + break;
  1170. + }
  1171. }
  1172. - cpe->ms_mask[w*16+g] = dist2 < dist1;
  1173. }
  1174. + if (!sce0->zeroes[w*16+g] && sce0->band_type[w*16+g] < RESERVED_BT)
  1175. + prev_mid = sce0->sf_idx[w*16+g];
  1176. + if (!sce1->zeroes[w*16+g] && !cpe->is_mask[w*16+g] && sce1->band_type[w*16+g] < RESERVED_BT)
  1177. + prev_side = sce1->sf_idx[w*16+g];
  1178. start += sce0->ics.swb_sizes[g];
  1179. }
  1180. }
  1181. }
  1182. #endif /*HAVE_MIPSFPU */
  1183. -static void codebook_trellis_rate_mips(AACEncContext *s, SingleChannelElement *sce,
  1184. - int win, int group_len, const float lambda)
  1185. -{
  1186. - BandCodingPath path[120][12];
  1187. - int w, swb, cb, start, size;
  1188. - int i, j;
  1189. - const int max_sfb = sce->ics.max_sfb;
  1190. - const int run_bits = sce->ics.num_windows == 1 ? 5 : 3;
  1191. - const int run_esc = (1 << run_bits) - 1;
  1192. - int idx, ppos, count;
  1193. - int stackrun[120], stackcb[120], stack_len;
  1194. - float next_minbits = INFINITY;
  1195. - int next_mincb = 0;
  1196. +#include "libavcodec/aaccoder_trellis.h"
  1197. - abs_pow34_v(s->scoefs, sce->coeffs, 1024);
  1198. - start = win*128;
  1199. - for (cb = 0; cb < 12; cb++) {
  1200. - path[0][cb].cost = run_bits+4;
  1201. - path[0][cb].prev_idx = -1;
  1202. - path[0][cb].run = 0;
  1203. - }
  1204. - for (swb = 0; swb < max_sfb; swb++) {
  1205. - size = sce->ics.swb_sizes[swb];
  1206. - if (sce->zeroes[win*16 + swb]) {
  1207. - float cost_stay_here = path[swb][0].cost;
  1208. - float cost_get_here = next_minbits + run_bits + 4;
  1209. - if ( run_value_bits[sce->ics.num_windows == 8][path[swb][0].run]
  1210. - != run_value_bits[sce->ics.num_windows == 8][path[swb][0].run+1])
  1211. - cost_stay_here += run_bits;
  1212. - if (cost_get_here < cost_stay_here) {
  1213. - path[swb+1][0].prev_idx = next_mincb;
  1214. - path[swb+1][0].cost = cost_get_here;
  1215. - path[swb+1][0].run = 1;
  1216. - } else {
  1217. - path[swb+1][0].prev_idx = 0;
  1218. - path[swb+1][0].cost = cost_stay_here;
  1219. - path[swb+1][0].run = path[swb][0].run + 1;
  1220. - }
  1221. - next_minbits = path[swb+1][0].cost;
  1222. - next_mincb = 0;
  1223. - for (cb = 1; cb < 12; cb++) {
  1224. - path[swb+1][cb].cost = 61450;
  1225. - path[swb+1][cb].prev_idx = -1;
  1226. - path[swb+1][cb].run = 0;
  1227. - }
  1228. - } else {
  1229. - float minbits = next_minbits;
  1230. - int mincb = next_mincb;
  1231. - int startcb = sce->band_type[win*16+swb];
  1232. - next_minbits = INFINITY;
  1233. - next_mincb = 0;
  1234. - for (cb = 0; cb < startcb; cb++) {
  1235. - path[swb+1][cb].cost = 61450;
  1236. - path[swb+1][cb].prev_idx = -1;
  1237. - path[swb+1][cb].run = 0;
  1238. - }
  1239. - for (cb = startcb; cb < 12; cb++) {
  1240. - float cost_stay_here, cost_get_here;
  1241. - float bits = 0.0f;
  1242. - for (w = 0; w < group_len; w++) {
  1243. - bits += quantize_band_cost_bits(s, sce->coeffs + start + w*128,
  1244. - s->scoefs + start + w*128, size,
  1245. - sce->sf_idx[(win+w)*16+swb], cb,
  1246. - 0, INFINITY, NULL);
  1247. - }
  1248. - cost_stay_here = path[swb][cb].cost + bits;
  1249. - cost_get_here = minbits + bits + run_bits + 4;
  1250. - if ( run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run]
  1251. - != run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run+1])
  1252. - cost_stay_here += run_bits;
  1253. - if (cost_get_here < cost_stay_here) {
  1254. - path[swb+1][cb].prev_idx = mincb;
  1255. - path[swb+1][cb].cost = cost_get_here;
  1256. - path[swb+1][cb].run = 1;
  1257. - } else {
  1258. - path[swb+1][cb].prev_idx = cb;
  1259. - path[swb+1][cb].cost = cost_stay_here;
  1260. - path[swb+1][cb].run = path[swb][cb].run + 1;
  1261. - }
  1262. - if (path[swb+1][cb].cost < next_minbits) {
  1263. - next_minbits = path[swb+1][cb].cost;
  1264. - next_mincb = cb;
  1265. - }
  1266. - }
  1267. - }
  1268. - start += sce->ics.swb_sizes[swb];
  1269. - }
  1270. -
  1271. - stack_len = 0;
  1272. - idx = 0;
  1273. - for (cb = 1; cb < 12; cb++)
  1274. - if (path[max_sfb][cb].cost < path[max_sfb][idx].cost)
  1275. - idx = cb;
  1276. - ppos = max_sfb;
  1277. - while (ppos > 0) {
  1278. - av_assert1(idx >= 0);
  1279. - cb = idx;
  1280. - stackrun[stack_len] = path[ppos][cb].run;
  1281. - stackcb [stack_len] = cb;
  1282. - idx = path[ppos-path[ppos][cb].run+1][cb].prev_idx;
  1283. - ppos -= path[ppos][cb].run;
  1284. - stack_len++;
  1285. - }
  1286. -
  1287. - start = 0;
  1288. - for (i = stack_len - 1; i >= 0; i--) {
  1289. - put_bits(&s->pb, 4, stackcb[i]);
  1290. - count = stackrun[i];
  1291. - memset(sce->zeroes + win*16 + start, !stackcb[i], count);
  1292. - for (j = 0; j < count; j++) {
  1293. - sce->band_type[win*16 + start] = stackcb[i];
  1294. - start++;
  1295. - }
  1296. - while (count >= run_esc) {
  1297. - put_bits(&s->pb, run_bits, run_esc);
  1298. - count -= run_esc;
  1299. - }
  1300. - put_bits(&s->pb, run_bits, count);
  1301. - }
  1302. -}
  1303. #endif /* HAVE_INLINE_ASM */
  1304. void ff_aac_coder_init_mips(AACEncContext *c) {
  1305. #if HAVE_INLINE_ASM
  1306. AACCoefficientsEncoder *e = c->coder;
  1307. - int option = c->options.aac_coder;
  1308. + int option = c->options.coder;
  1309. if (option == 2) {
  1310. -// Disabled due to failure with fate-aac-pns-encode
  1311. -// e->quantize_and_encode_band = quantize_and_encode_band_mips;
  1312. -// e->encode_window_bands_info = codebook_trellis_rate_mips;
  1313. + e->quantize_and_encode_band = quantize_and_encode_band_mips;
  1314. + e->encode_window_bands_info = codebook_trellis_rate;
  1315. #if HAVE_MIPSFPU
  1316. - e->search_for_quantizers = search_for_quantizers_twoloop_mips;
  1317. - e->search_for_ms = search_for_ms_mips;
  1318. + e->search_for_quantizers = search_for_quantizers_twoloop;
  1319. #endif /* HAVE_MIPSFPU */
  1320. }
  1321. +#if HAVE_MIPSFPU
  1322. + e->search_for_ms = search_for_ms_mips;
  1323. +#endif /* HAVE_MIPSFPU */
  1324. #endif /* HAVE_INLINE_ASM */
  1325. }