1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411 |
- $OpenBSD: patch-libavcodec_mips_aaccoder_mips_c,v 1.5 2016/01/14 06:09:43 ajacoutot Exp $
- AAC encoder: refactor to resynchronize MIPS port
- AAC encoder: simplify and speed up find_min_book
- AAC encoder: Extensive improvements
- AAC encoder: memoize quantize_band_cost
- avcodec/mips/aaccoder_mips: Disable ff_aac_coder_init_mips() to prevent build failure
- avcodec/mips/aaccoder_mips: Sync with the generic code
- AAC encoder: improve SF range utilization
- AAC encoder: various fixes in M/S coding
- --- libavcodec/mips/aaccoder_mips.c.orig Fri Dec 25 03:00:19 2015
- +++ libavcodec/mips/aaccoder_mips.c Wed Jan 13 15:13:20 2016
- @@ -62,6 +62,8 @@
- #include "libavcodec/aac.h"
- #include "libavcodec/aacenc.h"
- #include "libavcodec/aactab.h"
- +#include "libavcodec/aacenctab.h"
- +#include "libavcodec/aacenc_utils.h"
-
- #if HAVE_INLINE_ASM
- typedef struct BandCodingPath {
- @@ -70,21 +72,6 @@ typedef struct BandCodingPath {
- int run;
- } BandCodingPath;
-
- -static const uint8_t run_value_bits_long[64] = {
- - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 10,
- - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
- - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 15
- -};
- -
- -static const uint8_t run_value_bits_short[16] = {
- - 3, 3, 3, 3, 3, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 9
- -};
- -
- -static const uint8_t * const run_value_bits[2] = {
- - run_value_bits_long, run_value_bits_short
- -};
- -
- static const uint8_t uquad_sign_bits[81] = {
- 0, 1, 1, 1, 2, 2, 1, 2, 2,
- 1, 2, 2, 2, 3, 3, 2, 3, 3,
- @@ -144,65 +131,6 @@ static const uint8_t esc_sign_bits[289] = {
- 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
- };
-
- -#define ROUND_STANDARD 0.4054f
- -#define ROUND_TO_ZERO 0.1054f
- -
- -static void abs_pow34_v(float *out, const float *in, const int size) {
- -#ifndef USE_REALLY_FULL_SEARCH
- - int i;
- - float a, b, c, d;
- - float ax, bx, cx, dx;
- -
- - for (i = 0; i < size; i += 4) {
- - a = fabsf(in[i ]);
- - b = fabsf(in[i+1]);
- - c = fabsf(in[i+2]);
- - d = fabsf(in[i+3]);
- -
- - ax = sqrtf(a);
- - bx = sqrtf(b);
- - cx = sqrtf(c);
- - dx = sqrtf(d);
- -
- - a = a * ax;
- - b = b * bx;
- - c = c * cx;
- - d = d * dx;
- -
- - out[i ] = sqrtf(a);
- - out[i+1] = sqrtf(b);
- - out[i+2] = sqrtf(c);
- - out[i+3] = sqrtf(d);
- - }
- -#endif /* USE_REALLY_FULL_SEARCH */
- -}
- -
- -static float find_max_val(int group_len, int swb_size, const float *scaled) {
- - float maxval = 0.0f;
- - int w2, i;
- - for (w2 = 0; w2 < group_len; w2++) {
- - for (i = 0; i < swb_size; i++) {
- - maxval = FFMAX(maxval, scaled[w2*128+i]);
- - }
- - }
- - return maxval;
- -}
- -
- -static int find_min_book(float maxval, int sf) {
- - float Q = ff_aac_pow2sf_tab[POW_SF2_ZERO - sf + SCALE_ONE_POS - SCALE_DIV_512];
- - float Q34 = sqrtf(Q * sqrtf(Q));
- - int qmaxval, cb;
- - qmaxval = maxval * Q34 + 0.4054f;
- - if (qmaxval == 0) cb = 0;
- - else if (qmaxval == 1) cb = 1;
- - else if (qmaxval == 2) cb = 3;
- - else if (qmaxval <= 4) cb = 5;
- - else if (qmaxval <= 7) cb = 7;
- - else if (qmaxval <= 12) cb = 9;
- - else cb = 11;
- - return cb;
- -}
- -
- /**
- * Functions developed from template function and optimized for quantizing and encoding band
- */
- @@ -210,14 +138,17 @@ static void quantize_and_encode_band_cost_SQUAD_mips(s
- PutBitContext *pb, const float *in, float *out,
- const float *scaled, int size, int scale_idx,
- int cb, const float lambda, const float uplim,
- - int *bits, const float ROUNDING)
- + int *bits, float *energy, const float ROUNDING)
- {
- const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
- + const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
- int i;
- int qc1, qc2, qc3, qc4;
- + float qenergy = 0.0f;
-
- uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
- uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
- + float *p_vec = (float *)ff_aac_codebook_vectors[cb-1];
-
- abs_pow34_v(s->scoefs, in, size);
- scaled = s->scoefs;
- @@ -225,6 +156,7 @@ static void quantize_and_encode_band_cost_SQUAD_mips(s
- int curidx;
- int *in_int = (int *)&in[i];
- int t0, t1, t2, t3, t4, t5, t6, t7;
- + const float *vec;
-
- qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
- qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
- @@ -276,21 +208,43 @@ static void quantize_and_encode_band_cost_SQUAD_mips(s
- curidx += 40;
-
- put_bits(pb, p_bits[curidx], p_codes[curidx]);
- +
- + if (out || energy) {
- + float e1,e2,e3,e4;
- + vec = &p_vec[curidx*4];
- + e1 = vec[0] * IQ;
- + e2 = vec[1] * IQ;
- + e3 = vec[2] * IQ;
- + e4 = vec[3] * IQ;
- + if (out) {
- + out[i+0] = e1;
- + out[i+1] = e2;
- + out[i+2] = e3;
- + out[i+3] = e4;
- + }
- + if (energy)
- + qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
- + }
- }
- + if (energy)
- + *energy = qenergy;
- }
-
- static void quantize_and_encode_band_cost_UQUAD_mips(struct AACEncContext *s,
- PutBitContext *pb, const float *in, float *out,
- const float *scaled, int size, int scale_idx,
- int cb, const float lambda, const float uplim,
- - int *bits, const float ROUNDING)
- + int *bits, float *energy, const float ROUNDING)
- {
- const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
- + const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
- int i;
- int qc1, qc2, qc3, qc4;
- + float qenergy = 0.0f;
-
- uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
- uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
- + float *p_vec = (float *)ff_aac_codebook_vectors[cb-1];
-
- abs_pow34_v(s->scoefs, in, size);
- scaled = s->scoefs;
- @@ -300,6 +254,7 @@ static void quantize_and_encode_band_cost_UQUAD_mips(s
- uint8_t v_bits;
- unsigned int v_codes;
- int t0, t1, t2, t3, t4;
- + const float *vec;
-
- qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
- qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
- @@ -368,21 +323,43 @@ static void quantize_and_encode_band_cost_UQUAD_mips(s
- v_codes = (p_codes[curidx] << count) | (sign & ((1 << count) - 1));
- v_bits = p_bits[curidx] + count;
- put_bits(pb, v_bits, v_codes);
- +
- + if (out || energy) {
- + float e1,e2,e3,e4;
- + vec = &p_vec[curidx*4];
- + e1 = copysignf(vec[0] * IQ, in[i+0]);
- + e2 = copysignf(vec[1] * IQ, in[i+1]);
- + e3 = copysignf(vec[2] * IQ, in[i+2]);
- + e4 = copysignf(vec[3] * IQ, in[i+3]);
- + if (out) {
- + out[i+0] = e1;
- + out[i+1] = e2;
- + out[i+2] = e3;
- + out[i+3] = e4;
- + }
- + if (energy)
- + qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
- + }
- }
- + if (energy)
- + *energy = qenergy;
- }
-
- static void quantize_and_encode_band_cost_SPAIR_mips(struct AACEncContext *s,
- PutBitContext *pb, const float *in, float *out,
- const float *scaled, int size, int scale_idx,
- int cb, const float lambda, const float uplim,
- - int *bits, const float ROUNDING)
- + int *bits, float *energy, const float ROUNDING)
- {
- const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
- + const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
- int i;
- int qc1, qc2, qc3, qc4;
- + float qenergy = 0.0f;
-
- uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
- uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
- + float *p_vec = (float *)ff_aac_codebook_vectors[cb-1];
-
- abs_pow34_v(s->scoefs, in, size);
- scaled = s->scoefs;
- @@ -392,6 +369,7 @@ static void quantize_and_encode_band_cost_SPAIR_mips(s
- uint8_t v_bits;
- unsigned int v_codes;
- int t0, t1, t2, t3, t4, t5, t6, t7;
- + const float *vec1, *vec2;
-
- qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
- qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
- @@ -447,30 +425,54 @@ static void quantize_and_encode_band_cost_SPAIR_mips(s
- v_codes = (p_codes[curidx] << p_bits[curidx2]) | (p_codes[curidx2]);
- v_bits = p_bits[curidx] + p_bits[curidx2];
- put_bits(pb, v_bits, v_codes);
- +
- + if (out || energy) {
- + float e1,e2,e3,e4;
- + vec1 = &p_vec[curidx*2 ];
- + vec2 = &p_vec[curidx2*2];
- + e1 = vec1[0] * IQ;
- + e2 = vec1[1] * IQ;
- + e3 = vec2[0] * IQ;
- + e4 = vec2[1] * IQ;
- + if (out) {
- + out[i+0] = e1;
- + out[i+1] = e2;
- + out[i+2] = e3;
- + out[i+3] = e4;
- + }
- + if (energy)
- + qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
- + }
- }
- + if (energy)
- + *energy = qenergy;
- }
-
- static void quantize_and_encode_band_cost_UPAIR7_mips(struct AACEncContext *s,
- PutBitContext *pb, const float *in, float *out,
- const float *scaled, int size, int scale_idx,
- int cb, const float lambda, const float uplim,
- - int *bits, const float ROUNDING)
- + int *bits, float *energy, const float ROUNDING)
- {
- const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
- + const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
- int i;
- int qc1, qc2, qc3, qc4;
- + float qenergy = 0.0f;
-
- uint8_t *p_bits = (uint8_t*) ff_aac_spectral_bits[cb-1];
- uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
- + float *p_vec = (float *)ff_aac_codebook_vectors[cb-1];
-
- abs_pow34_v(s->scoefs, in, size);
- scaled = s->scoefs;
- for (i = 0; i < size; i += 4) {
- - int curidx, sign1, count1, sign2, count2;
- + int curidx1, curidx2, sign1, count1, sign2, count2;
- int *in_int = (int *)&in[i];
- uint8_t v_bits;
- unsigned int v_codes;
- int t0, t1, t2, t3, t4;
- + const float *vec1, *vec2;
-
- qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
- qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
- @@ -528,43 +530,67 @@ static void quantize_and_encode_band_cost_UPAIR7_mips(
- "memory"
- );
-
- - curidx = 8 * qc1;
- - curidx += qc2;
- + curidx1 = 8 * qc1;
- + curidx1 += qc2;
-
- - v_codes = (p_codes[curidx] << count1) | sign1;
- - v_bits = p_bits[curidx] + count1;
- + v_codes = (p_codes[curidx1] << count1) | sign1;
- + v_bits = p_bits[curidx1] + count1;
- put_bits(pb, v_bits, v_codes);
-
- - curidx = 8 * qc3;
- - curidx += qc4;
- + curidx2 = 8 * qc3;
- + curidx2 += qc4;
-
- - v_codes = (p_codes[curidx] << count2) | sign2;
- - v_bits = p_bits[curidx] + count2;
- + v_codes = (p_codes[curidx2] << count2) | sign2;
- + v_bits = p_bits[curidx2] + count2;
- put_bits(pb, v_bits, v_codes);
- +
- + if (out || energy) {
- + float e1,e2,e3,e4;
- + vec1 = &p_vec[curidx1*2];
- + vec2 = &p_vec[curidx2*2];
- + e1 = copysignf(vec1[0] * IQ, in[i+0]);
- + e2 = copysignf(vec1[1] * IQ, in[i+1]);
- + e3 = copysignf(vec2[0] * IQ, in[i+2]);
- + e4 = copysignf(vec2[1] * IQ, in[i+3]);
- + if (out) {
- + out[i+0] = e1;
- + out[i+1] = e2;
- + out[i+2] = e3;
- + out[i+3] = e4;
- + }
- + if (energy)
- + qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
- + }
- }
- + if (energy)
- + *energy = qenergy;
- }
-
- static void quantize_and_encode_band_cost_UPAIR12_mips(struct AACEncContext *s,
- PutBitContext *pb, const float *in, float *out,
- const float *scaled, int size, int scale_idx,
- int cb, const float lambda, const float uplim,
- - int *bits, const float ROUNDING)
- + int *bits, float *energy, const float ROUNDING)
- {
- const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
- + const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
- int i;
- int qc1, qc2, qc3, qc4;
- + float qenergy = 0.0f;
-
- uint8_t *p_bits = (uint8_t*) ff_aac_spectral_bits[cb-1];
- uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
- + float *p_vec = (float *)ff_aac_codebook_vectors[cb-1];
-
- abs_pow34_v(s->scoefs, in, size);
- scaled = s->scoefs;
- for (i = 0; i < size; i += 4) {
- - int curidx, sign1, count1, sign2, count2;
- + int curidx1, curidx2, sign1, count1, sign2, count2;
- int *in_int = (int *)&in[i];
- uint8_t v_bits;
- unsigned int v_codes;
- int t0, t1, t2, t3, t4;
- + const float *vec1, *vec2;
-
- qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
- qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
- @@ -621,31 +647,53 @@ static void quantize_and_encode_band_cost_UPAIR12_mips
- : "memory"
- );
-
- - curidx = 13 * qc1;
- - curidx += qc2;
- + curidx1 = 13 * qc1;
- + curidx1 += qc2;
-
- - v_codes = (p_codes[curidx] << count1) | sign1;
- - v_bits = p_bits[curidx] + count1;
- + v_codes = (p_codes[curidx1] << count1) | sign1;
- + v_bits = p_bits[curidx1] + count1;
- put_bits(pb, v_bits, v_codes);
-
- - curidx = 13 * qc3;
- - curidx += qc4;
- + curidx2 = 13 * qc3;
- + curidx2 += qc4;
-
- - v_codes = (p_codes[curidx] << count2) | sign2;
- - v_bits = p_bits[curidx] + count2;
- + v_codes = (p_codes[curidx2] << count2) | sign2;
- + v_bits = p_bits[curidx2] + count2;
- put_bits(pb, v_bits, v_codes);
- +
- + if (out || energy) {
- + float e1,e2,e3,e4;
- + vec1 = &p_vec[curidx1*2];
- + vec2 = &p_vec[curidx2*2];
- + e1 = copysignf(vec1[0] * IQ, in[i+0]);
- + e2 = copysignf(vec1[1] * IQ, in[i+1]);
- + e3 = copysignf(vec2[0] * IQ, in[i+2]);
- + e4 = copysignf(vec2[1] * IQ, in[i+3]);
- + if (out) {
- + out[i+0] = e1;
- + out[i+1] = e2;
- + out[i+2] = e3;
- + out[i+3] = e4;
- + }
- + if (energy)
- + qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
- + }
- }
- + if (energy)
- + *energy = qenergy;
- }
-
- static void quantize_and_encode_band_cost_ESC_mips(struct AACEncContext *s,
- PutBitContext *pb, const float *in, float *out,
- const float *scaled, int size, int scale_idx,
- int cb, const float lambda, const float uplim,
- - int *bits, const float ROUNDING)
- + int *bits, float *energy, const float ROUNDING)
- {
- const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
- + const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
- int i;
- int qc1, qc2, qc3, qc4;
- + float qenergy = 0.0f;
-
- uint8_t *p_bits = (uint8_t* )ff_aac_spectral_bits[cb-1];
- uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
- @@ -661,6 +709,7 @@ static void quantize_and_encode_band_cost_ESC_mips(str
- uint8_t v_bits;
- unsigned int v_codes;
- int t0, t1, t2, t3, t4;
- + const float *vec1, *vec2;
-
- qc1 = scaled[i ] * Q34 + ROUNDING;
- qc2 = scaled[i+1] * Q34 + ROUNDING;
- @@ -729,6 +778,24 @@ static void quantize_and_encode_band_cost_ESC_mips(str
- v_codes = (p_codes[curidx2] << count2) | sign2;
- v_bits = p_bits[curidx2] + count2;
- put_bits(pb, v_bits, v_codes);
- +
- + if (out || energy) {
- + float e1,e2,e3,e4;
- + vec1 = &p_vectors[curidx*2 ];
- + vec2 = &p_vectors[curidx2*2];
- + e1 = copysignf(vec1[0] * IQ, in[i+0]);
- + e2 = copysignf(vec1[1] * IQ, in[i+1]);
- + e3 = copysignf(vec2[0] * IQ, in[i+2]);
- + e4 = copysignf(vec2[1] * IQ, in[i+3]);
- + if (out) {
- + out[i+0] = e1;
- + out[i+1] = e2;
- + out[i+2] = e3;
- + out[i+3] = e4;
- + }
- + if (energy)
- + qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
- + }
- }
- } else {
- for (i = 0; i < size; i += 4) {
- @@ -839,15 +906,33 @@ static void quantize_and_encode_band_cost_ESC_mips(str
- v_codes = (((1 << (len - 3)) - 2) << len) | (c4 & ((1 << len) - 1));
- put_bits(pb, len * 2 - 3, v_codes);
- }
- +
- + if (out || energy) {
- + float e1, e2, e3, e4;
- + e1 = copysignf(c1 * cbrtf(c1) * IQ, in[i+0]);
- + e2 = copysignf(c2 * cbrtf(c2) * IQ, in[i+1]);
- + e3 = copysignf(c3 * cbrtf(c3) * IQ, in[i+2]);
- + e4 = copysignf(c4 * cbrtf(c4) * IQ, in[i+3]);
- + if (out) {
- + out[i+0] = e1;
- + out[i+1] = e2;
- + out[i+2] = e3;
- + out[i+3] = e4;
- + }
- + if (energy)
- + qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
- + }
- }
- }
- + if (energy)
- + *energy = qenergy;
- }
-
- static void quantize_and_encode_band_cost_NONE_mips(struct AACEncContext *s,
- PutBitContext *pb, const float *in, float *out,
- const float *scaled, int size, int scale_idx,
- int cb, const float lambda, const float uplim,
- - int *bits, const float ROUNDING) {
- + int *bits, float *energy, const float ROUNDING) {
- av_assert0(0);
- }
-
- @@ -855,7 +940,7 @@ static void quantize_and_encode_band_cost_ZERO_mips(st
- PutBitContext *pb, const float *in, float *out,
- const float *scaled, int size, int scale_idx,
- int cb, const float lambda, const float uplim,
- - int *bits, const float ROUNDING) {
- + int *bits, float *energy, const float ROUNDING) {
- int i;
- if (bits)
- *bits = 0;
- @@ -867,13 +952,15 @@ static void quantize_and_encode_band_cost_ZERO_mips(st
- out[i+3] = 0.0f;
- }
- }
- + if (energy)
- + *energy = 0.0f;
- }
-
- static void (*const quantize_and_encode_band_cost_arr[])(struct AACEncContext *s,
- PutBitContext *pb, const float *in, float *out,
- const float *scaled, int size, int scale_idx,
- int cb, const float lambda, const float uplim,
- - int *bits, const float ROUNDING) = {
- + int *bits, float *energy, const float ROUNDING) = {
- quantize_and_encode_band_cost_ZERO_mips,
- quantize_and_encode_band_cost_SQUAD_mips,
- quantize_and_encode_band_cost_SQUAD_mips,
- @@ -894,17 +981,17 @@ static void (*const quantize_and_encode_band_cost_arr[
-
- #define quantize_and_encode_band_cost( \
- s, pb, in, out, scaled, size, scale_idx, cb, \
- - lambda, uplim, bits, ROUNDING) \
- + lambda, uplim, bits, energy, ROUNDING) \
- quantize_and_encode_band_cost_arr[cb]( \
- s, pb, in, out, scaled, size, scale_idx, cb, \
- - lambda, uplim, bits, ROUNDING)
- + lambda, uplim, bits, energy, ROUNDING)
-
- static void quantize_and_encode_band_mips(struct AACEncContext *s, PutBitContext *pb,
- const float *in, float *out, int size, int scale_idx,
- int cb, const float lambda, int rtz)
- {
- quantize_and_encode_band_cost(s, pb, in, out, NULL, size, scale_idx, cb, lambda,
- - INFINITY, NULL, (rtz) ? ROUND_TO_ZERO : ROUND_STANDARD);
- + INFINITY, NULL, NULL, (rtz) ? ROUND_TO_ZERO : ROUND_STANDARD);
- }
-
- /**
- @@ -1384,7 +1471,7 @@ static float (*const get_band_numbits_arr[])(struct AA
- static float quantize_band_cost_bits(struct AACEncContext *s, const float *in,
- const float *scaled, int size, int scale_idx,
- int cb, const float lambda, const float uplim,
- - int *bits)
- + int *bits, float *energy, int rtz)
- {
- return get_band_numbits(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits);
- }
- @@ -1397,7 +1484,7 @@ static float get_band_cost_ZERO_mips(struct AACEncCont
- PutBitContext *pb, const float *in,
- const float *scaled, int size, int scale_idx,
- int cb, const float lambda, const float uplim,
- - int *bits)
- + int *bits, float *energy)
- {
- int i;
- float cost = 0;
- @@ -1410,6 +1497,8 @@ static float get_band_cost_ZERO_mips(struct AACEncCont
- }
- if (bits)
- *bits = 0;
- + if (energy)
- + *energy = 0.0f;
- return cost * lambda;
- }
-
- @@ -1417,7 +1506,7 @@ static float get_band_cost_NONE_mips(struct AACEncCont
- PutBitContext *pb, const float *in,
- const float *scaled, int size, int scale_idx,
- int cb, const float lambda, const float uplim,
- - int *bits)
- + int *bits, float *energy)
- {
- av_assert0(0);
- return 0;
- @@ -1427,12 +1516,13 @@ static float get_band_cost_SQUAD_mips(struct AACEncCon
- PutBitContext *pb, const float *in,
- const float *scaled, int size, int scale_idx,
- int cb, const float lambda, const float uplim,
- - int *bits)
- + int *bits, float *energy)
- {
- const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
- const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
- int i;
- float cost = 0;
- + float qenergy = 0.0f;
- int qc1, qc2, qc3, qc4;
- int curbits = 0;
-
- @@ -1499,6 +1589,9 @@ static float get_band_cost_SQUAD_mips(struct AACEncCon
- curbits += p_bits[curidx];
- vec = &p_codes[curidx*4];
-
- + qenergy += vec[0]*vec[0] + vec[1]*vec[1]
- + + vec[2]*vec[2] + vec[3]*vec[3];
- +
- __asm__ volatile (
- ".set push \n\t"
- ".set noreorder \n\t"
- @@ -1533,6 +1626,8 @@ static float get_band_cost_SQUAD_mips(struct AACEncCon
-
- if (bits)
- *bits = curbits;
- + if (energy)
- + *energy = qenergy * (IQ*IQ);
- return cost * lambda + curbits;
- }
-
- @@ -1540,12 +1635,13 @@ static float get_band_cost_UQUAD_mips(struct AACEncCon
- PutBitContext *pb, const float *in,
- const float *scaled, int size, int scale_idx,
- int cb, const float lambda, const float uplim,
- - int *bits)
- + int *bits, float *energy)
- {
- const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
- const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
- int i;
- float cost = 0;
- + float qenergy = 0.0f;
- int curbits = 0;
- int qc1, qc2, qc3, qc4;
-
- @@ -1598,6 +1694,9 @@ static float get_band_cost_UQUAD_mips(struct AACEncCon
- curbits += uquad_sign_bits[curidx];
- vec = &p_codes[curidx*4];
-
- + qenergy += vec[0]*vec[0] + vec[1]*vec[1]
- + + vec[2]*vec[2] + vec[3]*vec[3];
- +
- __asm__ volatile (
- ".set push \n\t"
- ".set noreorder \n\t"
- @@ -1635,6 +1734,8 @@ static float get_band_cost_UQUAD_mips(struct AACEncCon
-
- if (bits)
- *bits = curbits;
- + if (energy)
- + *energy = qenergy * (IQ*IQ);
- return cost * lambda + curbits;
- }
-
- @@ -1642,12 +1743,13 @@ static float get_band_cost_SPAIR_mips(struct AACEncCon
- PutBitContext *pb, const float *in,
- const float *scaled, int size, int scale_idx,
- int cb, const float lambda, const float uplim,
- - int *bits)
- + int *bits, float *energy)
- {
- const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
- const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
- int i;
- float cost = 0;
- + float qenergy = 0.0f;
- int qc1, qc2, qc3, qc4;
- int curbits = 0;
-
- @@ -1719,6 +1821,9 @@ static float get_band_cost_SPAIR_mips(struct AACEncCon
- vec = &p_codes[curidx*2];
- vec2 = &p_codes[curidx2*2];
-
- + qenergy += vec[0]*vec[0] + vec[1]*vec[1]
- + + vec2[0]*vec2[0] + vec2[1]*vec2[1];
- +
- __asm__ volatile (
- ".set push \n\t"
- ".set noreorder \n\t"
- @@ -1753,6 +1858,8 @@ static float get_band_cost_SPAIR_mips(struct AACEncCon
-
- if (bits)
- *bits = curbits;
- + if (energy)
- + *energy = qenergy * (IQ*IQ);
- return cost * lambda + curbits;
- }
-
- @@ -1760,12 +1867,13 @@ static float get_band_cost_UPAIR7_mips(struct AACEncCo
- PutBitContext *pb, const float *in,
- const float *scaled, int size, int scale_idx,
- int cb, const float lambda, const float uplim,
- - int *bits)
- + int *bits, float *energy)
- {
- const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
- const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
- int i;
- float cost = 0;
- + float qenergy = 0.0f;
- int qc1, qc2, qc3, qc4;
- int curbits = 0;
-
- @@ -1849,6 +1957,9 @@ static float get_band_cost_UPAIR7_mips(struct AACEncCo
- curbits += upair7_sign_bits[curidx2];
- vec2 = &p_codes[curidx2*2];
-
- + qenergy += vec[0]*vec[0] + vec[1]*vec[1]
- + + vec2[0]*vec2[0] + vec2[1]*vec2[1];
- +
- __asm__ volatile (
- ".set push \n\t"
- ".set noreorder \n\t"
- @@ -1886,6 +1997,8 @@ static float get_band_cost_UPAIR7_mips(struct AACEncCo
-
- if (bits)
- *bits = curbits;
- + if (energy)
- + *energy = qenergy * (IQ*IQ);
- return cost * lambda + curbits;
- }
-
- @@ -1893,12 +2006,13 @@ static float get_band_cost_UPAIR12_mips(struct AACEncC
- PutBitContext *pb, const float *in,
- const float *scaled, int size, int scale_idx,
- int cb, const float lambda, const float uplim,
- - int *bits)
- + int *bits, float *energy)
- {
- const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
- const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
- int i;
- float cost = 0;
- + float qenergy = 0.0f;
- int qc1, qc2, qc3, qc4;
- int curbits = 0;
-
- @@ -1982,6 +2096,9 @@ static float get_band_cost_UPAIR12_mips(struct AACEncC
- vec = &p_codes[curidx*2];
- vec2 = &p_codes[curidx2*2];
-
- + qenergy += vec[0]*vec[0] + vec[1]*vec[1]
- + + vec2[0]*vec2[0] + vec2[1]*vec2[1];
- +
- __asm__ volatile (
- ".set push \n\t"
- ".set noreorder \n\t"
- @@ -2019,6 +2136,8 @@ static float get_band_cost_UPAIR12_mips(struct AACEncC
-
- if (bits)
- *bits = curbits;
- + if (energy)
- + *energy = qenergy * (IQ*IQ);
- return cost * lambda + curbits;
- }
-
- @@ -2026,13 +2145,14 @@ static float get_band_cost_ESC_mips(struct AACEncConte
- PutBitContext *pb, const float *in,
- const float *scaled, int size, int scale_idx,
- int cb, const float lambda, const float uplim,
- - int *bits)
- + int *bits, float *energy)
- {
- const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
- const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
- const float CLIPPED_ESCAPE = 165140.0f * IQ;
- int i;
- float cost = 0;
- + float qenergy = 0.0f;
- int qc1, qc2, qc3, qc4;
- int curbits = 0;
-
- @@ -2042,7 +2162,7 @@ static float get_band_cost_ESC_mips(struct AACEncConte
- for (i = 0; i < size; i += 4) {
- const float *vec, *vec2;
- int curidx, curidx2;
- - float t1, t2, t3, t4;
- + float t1, t2, t3, t4, V;
- float di1, di2, di3, di4;
- int cond0, cond1, cond2, cond3;
- int c1, c2, c3, c4;
- @@ -2114,38 +2234,54 @@ static float get_band_cost_ESC_mips(struct AACEncConte
- if (cond0) {
- if (t1 >= CLIPPED_ESCAPE) {
- di1 = t1 - CLIPPED_ESCAPE;
- + qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
- } else {
- - di1 = t1 - c1 * cbrtf(c1) * IQ;
- + di1 = t1 - (V = c1 * cbrtf(c1) * IQ);
- + qenergy += V*V;
- }
- - } else
- - di1 = t1 - vec[0] * IQ;
- + } else {
- + di1 = t1 - (V = vec[0] * IQ);
- + qenergy += V*V;
- + }
-
- if (cond1) {
- if (t2 >= CLIPPED_ESCAPE) {
- di2 = t2 - CLIPPED_ESCAPE;
- + qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
- } else {
- - di2 = t2 - c2 * cbrtf(c2) * IQ;
- + di2 = t2 - (V = c2 * cbrtf(c2) * IQ);
- + qenergy += V*V;
- }
- - } else
- - di2 = t2 - vec[1] * IQ;
- + } else {
- + di2 = t2 - (V = vec[1] * IQ);
- + qenergy += V*V;
- + }
-
- if (cond2) {
- if (t3 >= CLIPPED_ESCAPE) {
- di3 = t3 - CLIPPED_ESCAPE;
- + qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
- } else {
- - di3 = t3 - c3 * cbrtf(c3) * IQ;
- + di3 = t3 - (V = c3 * cbrtf(c3) * IQ);
- + qenergy += V*V;
- }
- - } else
- - di3 = t3 - vec2[0] * IQ;
- + } else {
- + di3 = t3 - (V = vec2[0] * IQ);
- + qenergy += V*V;
- + }
-
- if (cond3) {
- if (t4 >= CLIPPED_ESCAPE) {
- di4 = t4 - CLIPPED_ESCAPE;
- + qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
- } else {
- - di4 = t4 - c4 * cbrtf(c4) * IQ;
- + di4 = t4 - (V = c4 * cbrtf(c4) * IQ);
- + qenergy += V*V;
- }
- - } else
- - di4 = t4 - vec2[1]*IQ;
- + } else {
- + di4 = t4 - (V = vec2[1]*IQ);
- + qenergy += V*V;
- + }
-
- cost += di1 * di1 + di2 * di2
- + di3 * di3 + di4 * di4;
- @@ -2160,7 +2296,7 @@ static float (*const get_band_cost_arr[])(struct AACEn
- PutBitContext *pb, const float *in,
- const float *scaled, int size, int scale_idx,
- int cb, const float lambda, const float uplim,
- - int *bits) = {
- + int *bits, float *energy) = {
- get_band_cost_ZERO_mips,
- get_band_cost_SQUAD_mips,
- get_band_cost_SQUAD_mips,
- @@ -2181,404 +2317,182 @@ static float (*const get_band_cost_arr[])(struct AACEn
-
- #define get_band_cost( \
- s, pb, in, scaled, size, scale_idx, cb, \
- - lambda, uplim, bits) \
- + lambda, uplim, bits, energy) \
- get_band_cost_arr[cb]( \
- s, pb, in, scaled, size, scale_idx, cb, \
- - lambda, uplim, bits)
- + lambda, uplim, bits, energy)
-
- static float quantize_band_cost(struct AACEncContext *s, const float *in,
- const float *scaled, int size, int scale_idx,
- int cb, const float lambda, const float uplim,
- - int *bits)
- + int *bits, float *energy, int rtz)
- {
- - return get_band_cost(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits);
- + return get_band_cost(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits, energy);
- }
-
- -static void search_for_quantizers_twoloop_mips(AVCodecContext *avctx,
- - AACEncContext *s,
- - SingleChannelElement *sce,
- - const float lambda)
- -{
- - int start = 0, i, w, w2, g;
- - int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate / avctx->channels;
- - float dists[128] = { 0 }, uplims[128];
- - float maxvals[128];
- - int fflag, minscaler;
- - int its = 0;
- - int allz = 0;
- - float minthr = INFINITY;
- +#include "libavcodec/aacenc_quantization_misc.h"
-
- - destbits = FFMIN(destbits, 5800);
- - for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
- - for (g = 0; g < sce->ics.num_swb; g++) {
- - int nz = 0;
- - float uplim = 0.0f;
- - for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
- - FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
- - uplim += band->threshold;
- - if (band->energy <= band->threshold || band->threshold == 0.0f) {
- - sce->zeroes[(w+w2)*16+g] = 1;
- - continue;
- - }
- - nz = 1;
- - }
- - uplims[w*16+g] = uplim *512;
- - sce->zeroes[w*16+g] = !nz;
- - if (nz)
- - minthr = FFMIN(minthr, uplim);
- - allz |= nz;
- - }
- - }
- - for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
- - for (g = 0; g < sce->ics.num_swb; g++) {
- - if (sce->zeroes[w*16+g]) {
- - sce->sf_idx[w*16+g] = SCALE_ONE_POS;
- - continue;
- - }
- - sce->sf_idx[w*16+g] = SCALE_ONE_POS + FFMIN(log2f(uplims[w*16+g]/minthr)*4,59);
- - }
- - }
- +#include "libavcodec/aaccoder_twoloop.h"
-
- - if (!allz)
- - return;
- - abs_pow34_v(s->scoefs, sce->coeffs, 1024);
- -
- - for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
- - start = w*128;
- - for (g = 0; g < sce->ics.num_swb; g++) {
- - const float *scaled = s->scoefs + start;
- - maxvals[w*16+g] = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], scaled);
- - start += sce->ics.swb_sizes[g];
- - }
- - }
- -
- - do {
- - int tbits, qstep;
- - minscaler = sce->sf_idx[0];
- - qstep = its ? 1 : 32;
- - do {
- - int prev = -1;
- - tbits = 0;
- - fflag = 0;
- -
- - if (qstep > 1) {
- - for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
- - start = w*128;
- - for (g = 0; g < sce->ics.num_swb; g++) {
- - const float *coefs = sce->coeffs + start;
- - const float *scaled = s->scoefs + start;
- - int bits = 0;
- - int cb;
- -
- - if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
- - start += sce->ics.swb_sizes[g];
- - continue;
- - }
- - minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
- - cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
- - for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
- - int b;
- - bits += quantize_band_cost_bits(s, coefs + w2*128,
- - scaled + w2*128,
- - sce->ics.swb_sizes[g],
- - sce->sf_idx[w*16+g],
- - cb,
- - 1.0f,
- - INFINITY,
- - &b);
- - }
- - if (prev != -1) {
- - bits += ff_aac_scalefactor_bits[sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO];
- - }
- - tbits += bits;
- - start += sce->ics.swb_sizes[g];
- - prev = sce->sf_idx[w*16+g];
- - }
- - }
- - }
- - else {
- - for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
- - start = w*128;
- - for (g = 0; g < sce->ics.num_swb; g++) {
- - const float *coefs = sce->coeffs + start;
- - const float *scaled = s->scoefs + start;
- - int bits = 0;
- - int cb;
- - float dist = 0.0f;
- -
- - if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
- - start += sce->ics.swb_sizes[g];
- - continue;
- - }
- - minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
- - cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
- - for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
- - int b;
- - dist += quantize_band_cost(s, coefs + w2*128,
- - scaled + w2*128,
- - sce->ics.swb_sizes[g],
- - sce->sf_idx[w*16+g],
- - cb,
- - 1.0f,
- - INFINITY,
- - &b);
- - bits += b;
- - }
- - dists[w*16+g] = dist - bits;
- - if (prev != -1) {
- - bits += ff_aac_scalefactor_bits[sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO];
- - }
- - tbits += bits;
- - start += sce->ics.swb_sizes[g];
- - prev = sce->sf_idx[w*16+g];
- - }
- - }
- - }
- - if (tbits > destbits) {
- - for (i = 0; i < 128; i++)
- - if (sce->sf_idx[i] < 218 - qstep)
- - sce->sf_idx[i] += qstep;
- - } else {
- - for (i = 0; i < 128; i++)
- - if (sce->sf_idx[i] > 60 - qstep)
- - sce->sf_idx[i] -= qstep;
- - }
- - qstep >>= 1;
- - if (!qstep && tbits > destbits*1.02 && sce->sf_idx[0] < 217)
- - qstep = 1;
- - } while (qstep);
- -
- - fflag = 0;
- - minscaler = av_clip(minscaler, 60, 255 - SCALE_MAX_DIFF);
- - for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
- - for (g = 0; g < sce->ics.num_swb; g++) {
- - int prevsc = sce->sf_idx[w*16+g];
- - if (dists[w*16+g] > uplims[w*16+g] && sce->sf_idx[w*16+g] > 60) {
- - if (find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]-1))
- - sce->sf_idx[w*16+g]--;
- - else
- - sce->sf_idx[w*16+g]-=2;
- - }
- - sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF);
- - sce->sf_idx[w*16+g] = FFMIN(sce->sf_idx[w*16+g], 219);
- - if (sce->sf_idx[w*16+g] != prevsc)
- - fflag = 1;
- - sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
- - }
- - }
- - its++;
- - } while (fflag && its < 10);
- -}
- -
- static void search_for_ms_mips(AACEncContext *s, ChannelElement *cpe)
- {
- - int start = 0, i, w, w2, g;
- + int start = 0, i, w, w2, g, sid_sf_boost, prev_mid, prev_side;
- + uint8_t nextband0[128], nextband1[128];
- float M[128], S[128];
- float *L34 = s->scoefs, *R34 = s->scoefs + 128, *M34 = s->scoefs + 128*2, *S34 = s->scoefs + 128*3;
- + const float lambda = s->lambda;
- + const float mslambda = FFMIN(1.0f, lambda / 120.f);
- SingleChannelElement *sce0 = &cpe->ch[0];
- SingleChannelElement *sce1 = &cpe->ch[1];
- if (!cpe->common_window)
- return;
- +
- + /** Scout out next nonzero bands */
- + ff_init_nextband_map(sce0, nextband0);
- + ff_init_nextband_map(sce1, nextband1);
- +
- + prev_mid = sce0->sf_idx[0];
- + prev_side = sce1->sf_idx[0];
- for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
- + start = 0;
- for (g = 0; g < sce0->ics.num_swb; g++) {
- - if (!cpe->ch[0].zeroes[w*16+g] && !cpe->ch[1].zeroes[w*16+g]) {
- - float dist1 = 0.0f, dist2 = 0.0f;
- + float bmax = bval2bmax(g * 17.0f / sce0->ics.num_swb) / 0.0045f;
- + if (!cpe->is_mask[w*16+g])
- + cpe->ms_mask[w*16+g] = 0;
- + if (!sce0->zeroes[w*16+g] && !sce1->zeroes[w*16+g] && !cpe->is_mask[w*16+g]) {
- + float Mmax = 0.0f, Smax = 0.0f;
- +
- + /* Must compute mid/side SF and book for the whole window group */
- for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
- - FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
- - FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g];
- - float minthr = FFMIN(band0->threshold, band1->threshold);
- - float maxthr = FFMAX(band0->threshold, band1->threshold);
- - for (i = 0; i < sce0->ics.swb_sizes[g]; i+=4) {
- - M[i ] = (sce0->coeffs[start+w2*128+i ]
- - + sce1->coeffs[start+w2*128+i ]) * 0.5;
- - M[i+1] = (sce0->coeffs[start+w2*128+i+1]
- - + sce1->coeffs[start+w2*128+i+1]) * 0.5;
- - M[i+2] = (sce0->coeffs[start+w2*128+i+2]
- - + sce1->coeffs[start+w2*128+i+2]) * 0.5;
- - M[i+3] = (sce0->coeffs[start+w2*128+i+3]
- - + sce1->coeffs[start+w2*128+i+3]) * 0.5;
- + for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
- + M[i] = (sce0->coeffs[start+(w+w2)*128+i]
- + + sce1->coeffs[start+(w+w2)*128+i]) * 0.5;
- + S[i] = M[i]
- + - sce1->coeffs[start+(w+w2)*128+i];
- + }
- + abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]);
- + abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]);
- + for (i = 0; i < sce0->ics.swb_sizes[g]; i++ ) {
- + Mmax = FFMAX(Mmax, M34[i]);
- + Smax = FFMAX(Smax, S34[i]);
- + }
- + }
-
- - S[i ] = M[i ]
- - - sce1->coeffs[start+w2*128+i ];
- - S[i+1] = M[i+1]
- - - sce1->coeffs[start+w2*128+i+1];
- - S[i+2] = M[i+2]
- - - sce1->coeffs[start+w2*128+i+2];
- - S[i+3] = M[i+3]
- - - sce1->coeffs[start+w2*128+i+3];
- - }
- - abs_pow34_v(L34, sce0->coeffs+start+w2*128, sce0->ics.swb_sizes[g]);
- - abs_pow34_v(R34, sce1->coeffs+start+w2*128, sce0->ics.swb_sizes[g]);
- - abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]);
- - abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]);
- - dist1 += quantize_band_cost(s, sce0->coeffs + start + w2*128,
- - L34,
- - sce0->ics.swb_sizes[g],
- - sce0->sf_idx[(w+w2)*16+g],
- - sce0->band_type[(w+w2)*16+g],
- - s->lambda / band0->threshold, INFINITY, NULL);
- - dist1 += quantize_band_cost(s, sce1->coeffs + start + w2*128,
- - R34,
- - sce1->ics.swb_sizes[g],
- - sce1->sf_idx[(w+w2)*16+g],
- - sce1->band_type[(w+w2)*16+g],
- - s->lambda / band1->threshold, INFINITY, NULL);
- - dist2 += quantize_band_cost(s, M,
- - M34,
- - sce0->ics.swb_sizes[g],
- - sce0->sf_idx[(w+w2)*16+g],
- - sce0->band_type[(w+w2)*16+g],
- - s->lambda / maxthr, INFINITY, NULL);
- - dist2 += quantize_band_cost(s, S,
- - S34,
- - sce1->ics.swb_sizes[g],
- - sce1->sf_idx[(w+w2)*16+g],
- - sce1->band_type[(w+w2)*16+g],
- - s->lambda / minthr, INFINITY, NULL);
- + for (sid_sf_boost = 0; sid_sf_boost < 4; sid_sf_boost++) {
- + float dist1 = 0.0f, dist2 = 0.0f;
- + int B0 = 0, B1 = 0;
- + int minidx;
- + int mididx, sididx;
- + int midcb, sidcb;
- +
- + minidx = FFMIN(sce0->sf_idx[w*16+g], sce1->sf_idx[w*16+g]);
- + mididx = av_clip(minidx, 0, SCALE_MAX_POS - SCALE_DIV_512);
- + sididx = av_clip(minidx - sid_sf_boost * 3, 0, SCALE_MAX_POS - SCALE_DIV_512);
- + if (sce0->band_type[w*16+g] != NOISE_BT && sce1->band_type[w*16+g] != NOISE_BT
- + && ( !ff_sfdelta_can_replace(sce0, nextband0, prev_mid, mididx, w*16+g)
- + || !ff_sfdelta_can_replace(sce1, nextband1, prev_side, sididx, w*16+g))) {
- + /* scalefactor range violation, bad stuff, will decrease quality unacceptably */
- + continue;
- + }
- +
- + midcb = find_min_book(Mmax, mididx);
- + sidcb = find_min_book(Smax, sididx);
- +
- + /* No CB can be zero */
- + midcb = FFMAX(1,midcb);
- + sidcb = FFMAX(1,sidcb);
- +
- + for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
- + FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
- + FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g];
- + float minthr = FFMIN(band0->threshold, band1->threshold);
- + int b1,b2,b3,b4;
- + for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
- + M[i] = (sce0->coeffs[start+(w+w2)*128+i]
- + + sce1->coeffs[start+(w+w2)*128+i]) * 0.5;
- + S[i] = M[i]
- + - sce1->coeffs[start+(w+w2)*128+i];
- + }
- +
- + abs_pow34_v(L34, sce0->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
- + abs_pow34_v(R34, sce1->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
- + abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]);
- + abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]);
- + dist1 += quantize_band_cost(s, &sce0->coeffs[start + (w+w2)*128],
- + L34,
- + sce0->ics.swb_sizes[g],
- + sce0->sf_idx[w*16+g],
- + sce0->band_type[w*16+g],
- + lambda / band0->threshold, INFINITY, &b1, NULL, 0);
- + dist1 += quantize_band_cost(s, &sce1->coeffs[start + (w+w2)*128],
- + R34,
- + sce1->ics.swb_sizes[g],
- + sce1->sf_idx[w*16+g],
- + sce1->band_type[w*16+g],
- + lambda / band1->threshold, INFINITY, &b2, NULL, 0);
- + dist2 += quantize_band_cost(s, M,
- + M34,
- + sce0->ics.swb_sizes[g],
- + mididx,
- + midcb,
- + lambda / minthr, INFINITY, &b3, NULL, 0);
- + dist2 += quantize_band_cost(s, S,
- + S34,
- + sce1->ics.swb_sizes[g],
- + sididx,
- + sidcb,
- + mslambda / (minthr * bmax), INFINITY, &b4, NULL, 0);
- + B0 += b1+b2;
- + B1 += b3+b4;
- + dist1 -= b1+b2;
- + dist2 -= b3+b4;
- + }
- + cpe->ms_mask[w*16+g] = dist2 <= dist1 && B1 < B0;
- + if (cpe->ms_mask[w*16+g]) {
- + if (sce0->band_type[w*16+g] != NOISE_BT && sce1->band_type[w*16+g] != NOISE_BT) {
- + sce0->sf_idx[w*16+g] = mididx;
- + sce1->sf_idx[w*16+g] = sididx;
- + sce0->band_type[w*16+g] = midcb;
- + sce1->band_type[w*16+g] = sidcb;
- + } else if ((sce0->band_type[w*16+g] != NOISE_BT) ^ (sce1->band_type[w*16+g] != NOISE_BT)) {
- + /* ms_mask unneeded, and it confuses some decoders */
- + cpe->ms_mask[w*16+g] = 0;
- + }
- + break;
- + } else if (B1 > B0) {
- + /* More boost won't fix this */
- + break;
- + }
- }
- - cpe->ms_mask[w*16+g] = dist2 < dist1;
- }
- + if (!sce0->zeroes[w*16+g] && sce0->band_type[w*16+g] < RESERVED_BT)
- + prev_mid = sce0->sf_idx[w*16+g];
- + if (!sce1->zeroes[w*16+g] && !cpe->is_mask[w*16+g] && sce1->band_type[w*16+g] < RESERVED_BT)
- + prev_side = sce1->sf_idx[w*16+g];
- start += sce0->ics.swb_sizes[g];
- }
- }
- }
- #endif /*HAVE_MIPSFPU */
-
- -static void codebook_trellis_rate_mips(AACEncContext *s, SingleChannelElement *sce,
- - int win, int group_len, const float lambda)
- -{
- - BandCodingPath path[120][12];
- - int w, swb, cb, start, size;
- - int i, j;
- - const int max_sfb = sce->ics.max_sfb;
- - const int run_bits = sce->ics.num_windows == 1 ? 5 : 3;
- - const int run_esc = (1 << run_bits) - 1;
- - int idx, ppos, count;
- - int stackrun[120], stackcb[120], stack_len;
- - float next_minbits = INFINITY;
- - int next_mincb = 0;
- +#include "libavcodec/aaccoder_trellis.h"
-
- - abs_pow34_v(s->scoefs, sce->coeffs, 1024);
- - start = win*128;
- - for (cb = 0; cb < 12; cb++) {
- - path[0][cb].cost = run_bits+4;
- - path[0][cb].prev_idx = -1;
- - path[0][cb].run = 0;
- - }
- - for (swb = 0; swb < max_sfb; swb++) {
- - size = sce->ics.swb_sizes[swb];
- - if (sce->zeroes[win*16 + swb]) {
- - float cost_stay_here = path[swb][0].cost;
- - float cost_get_here = next_minbits + run_bits + 4;
- - if ( run_value_bits[sce->ics.num_windows == 8][path[swb][0].run]
- - != run_value_bits[sce->ics.num_windows == 8][path[swb][0].run+1])
- - cost_stay_here += run_bits;
- - if (cost_get_here < cost_stay_here) {
- - path[swb+1][0].prev_idx = next_mincb;
- - path[swb+1][0].cost = cost_get_here;
- - path[swb+1][0].run = 1;
- - } else {
- - path[swb+1][0].prev_idx = 0;
- - path[swb+1][0].cost = cost_stay_here;
- - path[swb+1][0].run = path[swb][0].run + 1;
- - }
- - next_minbits = path[swb+1][0].cost;
- - next_mincb = 0;
- - for (cb = 1; cb < 12; cb++) {
- - path[swb+1][cb].cost = 61450;
- - path[swb+1][cb].prev_idx = -1;
- - path[swb+1][cb].run = 0;
- - }
- - } else {
- - float minbits = next_minbits;
- - int mincb = next_mincb;
- - int startcb = sce->band_type[win*16+swb];
- - next_minbits = INFINITY;
- - next_mincb = 0;
- - for (cb = 0; cb < startcb; cb++) {
- - path[swb+1][cb].cost = 61450;
- - path[swb+1][cb].prev_idx = -1;
- - path[swb+1][cb].run = 0;
- - }
- - for (cb = startcb; cb < 12; cb++) {
- - float cost_stay_here, cost_get_here;
- - float bits = 0.0f;
- - for (w = 0; w < group_len; w++) {
- - bits += quantize_band_cost_bits(s, sce->coeffs + start + w*128,
- - s->scoefs + start + w*128, size,
- - sce->sf_idx[(win+w)*16+swb], cb,
- - 0, INFINITY, NULL);
- - }
- - cost_stay_here = path[swb][cb].cost + bits;
- - cost_get_here = minbits + bits + run_bits + 4;
- - if ( run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run]
- - != run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run+1])
- - cost_stay_here += run_bits;
- - if (cost_get_here < cost_stay_here) {
- - path[swb+1][cb].prev_idx = mincb;
- - path[swb+1][cb].cost = cost_get_here;
- - path[swb+1][cb].run = 1;
- - } else {
- - path[swb+1][cb].prev_idx = cb;
- - path[swb+1][cb].cost = cost_stay_here;
- - path[swb+1][cb].run = path[swb][cb].run + 1;
- - }
- - if (path[swb+1][cb].cost < next_minbits) {
- - next_minbits = path[swb+1][cb].cost;
- - next_mincb = cb;
- - }
- - }
- - }
- - start += sce->ics.swb_sizes[swb];
- - }
- -
- - stack_len = 0;
- - idx = 0;
- - for (cb = 1; cb < 12; cb++)
- - if (path[max_sfb][cb].cost < path[max_sfb][idx].cost)
- - idx = cb;
- - ppos = max_sfb;
- - while (ppos > 0) {
- - av_assert1(idx >= 0);
- - cb = idx;
- - stackrun[stack_len] = path[ppos][cb].run;
- - stackcb [stack_len] = cb;
- - idx = path[ppos-path[ppos][cb].run+1][cb].prev_idx;
- - ppos -= path[ppos][cb].run;
- - stack_len++;
- - }
- -
- - start = 0;
- - for (i = stack_len - 1; i >= 0; i--) {
- - put_bits(&s->pb, 4, stackcb[i]);
- - count = stackrun[i];
- - memset(sce->zeroes + win*16 + start, !stackcb[i], count);
- - for (j = 0; j < count; j++) {
- - sce->band_type[win*16 + start] = stackcb[i];
- - start++;
- - }
- - while (count >= run_esc) {
- - put_bits(&s->pb, run_bits, run_esc);
- - count -= run_esc;
- - }
- - put_bits(&s->pb, run_bits, count);
- - }
- -}
- #endif /* HAVE_INLINE_ASM */
-
- void ff_aac_coder_init_mips(AACEncContext *c) {
- #if HAVE_INLINE_ASM
- AACCoefficientsEncoder *e = c->coder;
- - int option = c->options.aac_coder;
- + int option = c->options.coder;
-
- if (option == 2) {
- -// Disabled due to failure with fate-aac-pns-encode
- -// e->quantize_and_encode_band = quantize_and_encode_band_mips;
- -// e->encode_window_bands_info = codebook_trellis_rate_mips;
- + e->quantize_and_encode_band = quantize_and_encode_band_mips;
- + e->encode_window_bands_info = codebook_trellis_rate;
- #if HAVE_MIPSFPU
- - e->search_for_quantizers = search_for_quantizers_twoloop_mips;
- - e->search_for_ms = search_for_ms_mips;
- + e->search_for_quantizers = search_for_quantizers_twoloop;
- #endif /* HAVE_MIPSFPU */
- }
- +#if HAVE_MIPSFPU
- + e->search_for_ms = search_for_ms_mips;
- +#endif /* HAVE_MIPSFPU */
- #endif /* HAVE_INLINE_ASM */
- }
|