patch-libavcodec_aacenc_c 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682
  1. $OpenBSD: patch-libavcodec_aacenc_c,v 1.19 2016/12/05 09:02:29 ajacoutot Exp $
  2. aacenc: copy PRNG from the decoder
  3. avcodec/aacenc: use AV_OPT_TYPE_BOOL
  4. AAC encoder: tweak rate-distortion logic
  5. AAC encoder: Extensive improvements
  6. AAC encoder: memoize quantize_band_cost
  7. aacenc: add support for encoding 7.1 channel audio
  8. aacenc: add support for changing options based on a profile
  9. aacenc: shorten name of ff_aac_adjust_common_prediction
  10. aacenc: indicate that TNS is off by default
  11. aacenc: slightly simplify and remove a redundant variable
  12. aacenc: correctly zero prediction_used array
  13. aacenc: (re)enable Mid/Side coding by default
  14. aacenc: add support for encoding files using Long Term Prediction
  15. aacenc: partially revert previous commits to set options via a profile
  16. aacenc_tns: enable Temporal Noise Shaping by default
  17. avcodec/aacenc: Fix "libavcodec/aacenc.c:540:13: warning: ISO C90 forbids mixed declarations and code"
  18. AAC encoder: Fix application of M/S with PNS
  19. AAC encoder: improve SF range utilization
  20. aac: Provide more information on the failure message
  21. aacenc: mark the "faac"-like coder for removal
  22. aacenc: mark coders other than twoloop as experimental
  23. aacenc: remove the experimental flag
  24. aacenc: fix aac_pred option triggering an error
  25. aacenc: move the TNS search and filtering before PNS
  26. aacenc: switch to using the RNG from libavutil
  27. AAC encoder: don't apply MS on special bands
  28. acenc: remove deprecated avctx->frame_bits use
  29. avcodec/aacenc: Fix NAN check
  30. avcodec/aacenc: mark output as const as its not written to
  31. avcodec/aacenc: Check for +-Inf too
  32. lavc/aacenc: use isfinite to simplify isnan/isinf logic
  33. aacenc: mark LTP mode as experimental
  34. aacenc: remove FAAC-like coder
  35. avcodec/aacenc: Check all coefficients for finiteness
  36. aacenc: make a better estimate for the audio bitrate if not provided
  37. aacenc: temporarily disable Mid/Side coding with multichannel files
  38. aacenc: use generational cache instead of resetting.
  39. AAC encoder: fix valgrind errors
  40. aacenc: unmark the fast coder as experimental
  41. aacenc: fix various typos and an error message
  42. aacenc: use the decoder's lcg PRNG
  43. aacenc: quit when the audio queue reaches 0 rather than keeping track of empty frames
  44. --- libavcodec/aacenc.c.orig Sat Aug 27 22:51:29 2016
  45. +++ libavcodec/aacenc.c Thu Nov 10 19:22:09 2016
  46. @@ -29,6 +29,7 @@
  47. * add sane pulse detection
  48. ***********************************/
  49. +#include "libavutil/libm.h"
  50. #include "libavutil/float_dsp.h"
  51. #include "libavutil/opt.h"
  52. #include "avcodec.h"
  53. @@ -54,11 +55,12 @@ static void put_audio_specific_config(AVCodecContext *
  54. {
  55. PutBitContext pb;
  56. AACEncContext *s = avctx->priv_data;
  57. + int channels = s->channels - (s->channels == 8 ? 1 : 0);
  58. init_put_bits(&pb, avctx->extradata, avctx->extradata_size);
  59. put_bits(&pb, 5, s->profile+1); //profile
  60. put_bits(&pb, 4, s->samplerate_index); //sample rate index
  61. - put_bits(&pb, 4, s->channels);
  62. + put_bits(&pb, 4, channels);
  63. //GASpecificConfig
  64. put_bits(&pb, 1, 0); //frame length - 1024 samples
  65. put_bits(&pb, 1, 0); //does not depend on core coder
  66. @@ -71,6 +73,15 @@ static void put_audio_specific_config(AVCodecContext *
  67. flush_put_bits(&pb);
  68. }
  69. +void ff_quantize_band_cost_cache_init(struct AACEncContext *s)
  70. +{
  71. + ++s->quantize_band_cost_cache_generation;
  72. + if (s->quantize_band_cost_cache_generation == 0) {
  73. + memset(s->quantize_band_cost_cache, 0, sizeof(s->quantize_band_cost_cache));
  74. + s->quantize_band_cost_cache_generation = 1;
  75. + }
  76. +}
  77. +
  78. #define WINDOW_FUNC(type) \
  79. static void apply_ ##type ##_window(AVFloatDSPContext *fdsp, \
  80. SingleChannelElement *sce, \
  81. @@ -140,7 +151,7 @@ static void apply_window_and_mdct(AACEncContext *s, Si
  82. float *audio)
  83. {
  84. int i;
  85. - float *output = sce->ret_buf;
  86. + const float *output = sce->ret_buf;
  87. apply_window[sce->ics.window_sequence[0]](s->fdsp, sce, audio);
  88. @@ -258,6 +269,8 @@ static void apply_intensity_stereo(ChannelElement *cpe
  89. start += ics->swb_sizes[g];
  90. continue;
  91. }
  92. + if (cpe->ms_mask[w*16 + g])
  93. + p *= -1;
  94. for (i = 0; i < ics->swb_sizes[g]; i++) {
  95. float sum = (cpe->ch[0].coeffs[start+i] + p*cpe->ch[1].coeffs[start+i])*scale;
  96. cpe->ch[0].coeffs[start+i] = sum;
  97. @@ -279,7 +292,13 @@ static void apply_mid_side_stereo(ChannelElement *cpe)
  98. for (w2 = 0; w2 < ics->group_len[w]; w2++) {
  99. int start = (w+w2) * 128;
  100. for (g = 0; g < ics->num_swb; g++) {
  101. - if (!cpe->ms_mask[w*16 + g]) {
  102. + /* ms_mask can be used for other purposes in PNS and I/S,
  103. + * so must not apply M/S if any band uses either, even if
  104. + * ms_mask is set.
  105. + */
  106. + if (!cpe->ms_mask[w*16 + g] || cpe->is_mask[w*16 + g]
  107. + || cpe->ch[0].band_type[w*16 + g] >= NOISE_BT
  108. + || cpe->ch[1].band_type[w*16 + g] >= NOISE_BT) {
  109. start += ics->swb_sizes[g];
  110. continue;
  111. }
  112. @@ -424,6 +443,8 @@ static int encode_individual_channel(AVCodecContext *a
  113. put_ics_info(s, &sce->ics);
  114. if (s->coder->encode_main_pred)
  115. s->coder->encode_main_pred(s, sce);
  116. + if (s->coder->encode_ltp_info)
  117. + s->coder->encode_ltp_info(s, sce, 0);
  118. }
  119. encode_band_info(s, sce);
  120. encode_scale_factors(avctx, s, sce);
  121. @@ -489,19 +510,21 @@ static int aac_encode_frame(AVCodecContext *avctx, AVP
  122. float **samples = s->planar_samples, *samples2, *la, *overlap;
  123. ChannelElement *cpe;
  124. SingleChannelElement *sce;
  125. - int i, ch, w, chans, tag, start_ch, ret;
  126. + IndividualChannelStream *ics;
  127. + int i, its, ch, w, chans, tag, start_ch, ret, frame_bits;
  128. + int target_bits, rate_bits, too_many_bits, too_few_bits;
  129. int ms_mode = 0, is_mode = 0, tns_mode = 0, pred_mode = 0;
  130. int chan_el_counter[4];
  131. FFPsyWindowInfo windows[AAC_MAX_CHANNELS];
  132. int k;
  133. - if (s->last_frame == 2)
  134. - return 0;
  135. -
  136. /* add current frame to queue */
  137. if (frame) {
  138. if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
  139. return ret;
  140. + } else {
  141. + if (!s->afq.remaining_samples || (!s->afq.frame_alloc && !s->afq.frame_count))
  142. + return 0;
  143. }
  144. copy_input_samples(s, frame);
  145. @@ -518,19 +541,22 @@ static int aac_encode_frame(AVCodecContext *avctx, AVP
  146. chans = tag == TYPE_CPE ? 2 : 1;
  147. cpe = &s->cpe[i];
  148. for (ch = 0; ch < chans; ch++) {
  149. - IndividualChannelStream *ics = &cpe->ch[ch].ics;
  150. - int cur_channel = start_ch + ch;
  151. + int k;
  152. float clip_avoidance_factor;
  153. - overlap = &samples[cur_channel][0];
  154. + sce = &cpe->ch[ch];
  155. + ics = &sce->ics;
  156. + s->cur_channel = start_ch + ch;
  157. + overlap = &samples[s->cur_channel][0];
  158. samples2 = overlap + 1024;
  159. la = samples2 + (448+64);
  160. if (!frame)
  161. la = NULL;
  162. if (tag == TYPE_LFE) {
  163. - wi[ch].window_type[0] = ONLY_LONG_SEQUENCE;
  164. + wi[ch].window_type[0] = wi[ch].window_type[1] = ONLY_LONG_SEQUENCE;
  165. wi[ch].window_shape = 0;
  166. wi[ch].num_windows = 1;
  167. wi[ch].grouping[0] = 1;
  168. + wi[ch].clipping[0] = 0;
  169. /* Only the lowest 12 coefficients are used in a LFE channel.
  170. * The expression below results in only the bottom 8 coefficients
  171. @@ -538,7 +564,7 @@ static int aac_encode_frame(AVCodecContext *avctx, AVP
  172. */
  173. ics->num_swb = s->samplerate_index >= 8 ? 1 : 3;
  174. } else {
  175. - wi[ch] = s->psy.model->window(&s->psy, samples2, la, cur_channel,
  176. + wi[ch] = s->psy.model->window(&s->psy, samples2, la, s->cur_channel,
  177. ics->window_sequence[0]);
  178. }
  179. ics->window_sequence[1] = ics->window_sequence[0];
  180. @@ -555,10 +581,23 @@ static int aac_encode_frame(AVCodecContext *avctx, AVP
  181. ics->tns_max_bands = wi[ch].window_type[0] == EIGHT_SHORT_SEQUENCE ?
  182. ff_tns_max_bands_128 [s->samplerate_index]:
  183. ff_tns_max_bands_1024[s->samplerate_index];
  184. - clip_avoidance_factor = 0.0f;
  185. +
  186. for (w = 0; w < ics->num_windows; w++)
  187. ics->group_len[w] = wi[ch].grouping[w];
  188. +
  189. + /* Calculate input sample maximums and evaluate clipping risk */
  190. + clip_avoidance_factor = 0.0f;
  191. for (w = 0; w < ics->num_windows; w++) {
  192. + const float *wbuf = overlap + w * 128;
  193. + const int wlen = 2048 / ics->num_windows;
  194. + float max = 0;
  195. + int j;
  196. + /* mdct input is 2 * output */
  197. + for (j = 0; j < wlen; j++)
  198. + max = FFMAX(max, fabsf(wbuf[j]));
  199. + wi[ch].clipping[w] = max;
  200. + }
  201. + for (w = 0; w < ics->num_windows; w++) {
  202. if (wi[ch].clipping[w] > CLIP_AVOIDANCE_FACTOR) {
  203. ics->window_clipping[w] = 1;
  204. clip_avoidance_factor = FFMAX(clip_avoidance_factor, wi[ch].clipping[w]);
  205. @@ -610,15 +649,28 @@ static int aac_encode_frame(AVCodecContext *avctx, AVP
  206. sce = &cpe->ch[ch];
  207. coeffs[ch] = sce->coeffs;
  208. sce->ics.predictor_present = 0;
  209. - memset(&sce->ics.prediction_used, 0, sizeof(sce->ics.prediction_used));
  210. + sce->ics.ltp.present = 0;
  211. + memset(sce->ics.ltp.used, 0, sizeof(sce->ics.ltp.used));
  212. + memset(sce->ics.prediction_used, 0, sizeof(sce->ics.prediction_used));
  213. memset(&sce->tns, 0, sizeof(TemporalNoiseShaping));
  214. for (w = 0; w < 128; w++)
  215. if (sce->band_type[w] > RESERVED_BT)
  216. sce->band_type[w] = 0;
  217. }
  218. + s->psy.bitres.alloc = -1;
  219. + s->psy.bitres.bits = s->last_frame_pb_count / s->channels;
  220. s->psy.model->analyze(&s->psy, start_ch, coeffs, wi);
  221. + if (s->psy.bitres.alloc > 0) {
  222. + /* Lambda unused here on purpose, we need to take psy's unscaled allocation */
  223. + target_bits += s->psy.bitres.alloc
  224. + * (s->lambda / (avctx->global_quality ? avctx->global_quality : 120));
  225. + s->psy.bitres.alloc /= chans;
  226. + }
  227. + s->cur_type = tag;
  228. for (ch = 0; ch < chans; ch++) {
  229. s->cur_channel = start_ch + ch;
  230. + if (s->options.pns && s->coder->mark_pns)
  231. + s->coder->mark_pns(s, avctx, &cpe->ch[ch]);
  232. s->coder->search_for_quantizers(avctx, s, &cpe->ch[ch], s->lambda);
  233. }
  234. if (chans > 1
  235. @@ -636,14 +688,14 @@ static int aac_encode_frame(AVCodecContext *avctx, AVP
  236. for (ch = 0; ch < chans; ch++) { /* TNS and PNS */
  237. sce = &cpe->ch[ch];
  238. s->cur_channel = start_ch + ch;
  239. - if (s->options.pns && s->coder->search_for_pns)
  240. - s->coder->search_for_pns(s, avctx, sce);
  241. if (s->options.tns && s->coder->search_for_tns)
  242. s->coder->search_for_tns(s, sce);
  243. if (s->options.tns && s->coder->apply_tns_filt)
  244. s->coder->apply_tns_filt(s, sce);
  245. if (sce->tns.present)
  246. tns_mode = 1;
  247. + if (s->options.pns && s->coder->search_for_pns)
  248. + s->coder->search_for_pns(s, avctx, sce);
  249. }
  250. s->cur_channel = start_ch;
  251. if (s->options.intensity_stereo) { /* Intensity Stereo */
  252. @@ -660,8 +712,8 @@ static int aac_encode_frame(AVCodecContext *avctx, AVP
  253. s->coder->search_for_pred(s, sce);
  254. if (cpe->ch[ch].ics.predictor_present) pred_mode = 1;
  255. }
  256. - if (s->coder->adjust_common_prediction)
  257. - s->coder->adjust_common_prediction(s, cpe);
  258. + if (s->coder->adjust_common_pred)
  259. + s->coder->adjust_common_pred(s, cpe);
  260. for (ch = 0; ch < chans; ch++) {
  261. sce = &cpe->ch[ch];
  262. s->cur_channel = start_ch + ch;
  263. @@ -670,22 +722,34 @@ static int aac_encode_frame(AVCodecContext *avctx, AVP
  264. }
  265. s->cur_channel = start_ch;
  266. }
  267. - if (s->options.stereo_mode) { /* Mid/Side stereo */
  268. - if (s->options.stereo_mode == -1 && s->coder->search_for_ms)
  269. + if (s->options.mid_side) { /* Mid/Side stereo */
  270. + if (s->options.mid_side == -1 && s->coder->search_for_ms)
  271. s->coder->search_for_ms(s, cpe);
  272. else if (cpe->common_window)
  273. memset(cpe->ms_mask, 1, sizeof(cpe->ms_mask));
  274. - for (w = 0; w < 128; w++)
  275. - cpe->ms_mask[w] = cpe->is_mask[w] ? 0 : cpe->ms_mask[w];
  276. apply_mid_side_stereo(cpe);
  277. }
  278. adjust_frame_information(cpe, chans);
  279. + if (s->options.ltp) { /* LTP */
  280. + for (ch = 0; ch < chans; ch++) {
  281. + sce = &cpe->ch[ch];
  282. + s->cur_channel = start_ch + ch;
  283. + if (s->coder->search_for_ltp)
  284. + s->coder->search_for_ltp(s, sce, cpe->common_window);
  285. + if (sce->ics.ltp.present) pred_mode = 1;
  286. + }
  287. + s->cur_channel = start_ch;
  288. + if (s->coder->adjust_common_ltp)
  289. + s->coder->adjust_common_ltp(s, cpe);
  290. + }
  291. if (chans == 2) {
  292. put_bits(&s->pb, 1, cpe->common_window);
  293. if (cpe->common_window) {
  294. put_ics_info(s, &cpe->ch[0].ics);
  295. if (s->coder->encode_main_pred)
  296. s->coder->encode_main_pred(s, &cpe->ch[0]);
  297. + if (s->coder->encode_ltp_info)
  298. + s->coder->encode_ltp_info(s, &cpe->ch[0], 1);
  299. encode_ms_info(&s->pb, cpe);
  300. if (cpe->ms_mode) ms_mode = 1;
  301. }
  302. @@ -697,38 +761,77 @@ static int aac_encode_frame(AVCodecContext *avctx, AVP
  303. start_ch += chans;
  304. }
  305. - frame_bits = put_bits_count(&s->pb);
  306. - if (frame_bits <= 6144 * s->channels - 3) {
  307. - s->psy.bitres.bits = frame_bits / s->channels;
  308. + if (avctx->flags & CODEC_FLAG_QSCALE) {
  309. + /* When using a constant Q-scale, don't mess with lambda */
  310. break;
  311. }
  312. - if (is_mode || ms_mode || tns_mode || pred_mode) {
  313. - for (i = 0; i < s->chan_map[0]; i++) {
  314. - // Must restore coeffs
  315. - chans = tag == TYPE_CPE ? 2 : 1;
  316. - cpe = &s->cpe[i];
  317. - for (ch = 0; ch < chans; ch++)
  318. - memcpy(cpe->ch[ch].coeffs, cpe->ch[ch].pcoeffs, sizeof(cpe->ch[ch].coeffs));
  319. - }
  320. - }
  321. - s->lambda *= avctx->bit_rate * 1024.0f / avctx->sample_rate / frame_bits;
  322. + /* rate control stuff
  323. + * allow between the nominal bitrate, and what psy's bit reservoir says to target
  324. + * but drift towards the nominal bitrate always
  325. + */
  326. + frame_bits = put_bits_count(&s->pb);
  327. + rate_bits = avctx->bit_rate * 1024 / avctx->sample_rate;
  328. + rate_bits = FFMIN(rate_bits, 6144 * s->channels - 3);
  329. + too_many_bits = FFMAX(target_bits, rate_bits);
  330. + too_many_bits = FFMIN(too_many_bits, 6144 * s->channels - 3);
  331. + too_few_bits = FFMIN(FFMAX(rate_bits - rate_bits/4, target_bits), too_many_bits);
  332. + /* When using ABR, be strict (but only for increasing) */
  333. + too_few_bits = too_few_bits - too_few_bits/8;
  334. + too_many_bits = too_many_bits + too_many_bits/2;
  335. +
  336. + if ( its == 0 /* for steady-state Q-scale tracking */
  337. + || (its < 5 && (frame_bits < too_few_bits || frame_bits > too_many_bits))
  338. + || frame_bits >= 6144 * s->channels - 3 )
  339. + {
  340. + float ratio = ((float)rate_bits) / frame_bits;
  341. +
  342. + if (frame_bits >= too_few_bits && frame_bits <= too_many_bits) {
  343. + /*
  344. + * This path is for steady-state Q-scale tracking
  345. + * When frame bits fall within the stable range, we still need to adjust
  346. + * lambda to maintain it like so in a stable fashion (large jumps in lambda
  347. + * create artifacts and should be avoided), but slowly
  348. + */
  349. + ratio = sqrtf(sqrtf(ratio));
  350. + ratio = av_clipf(ratio, 0.9f, 1.1f);
  351. + } else {
  352. + /* Not so fast though */
  353. + ratio = sqrtf(ratio);
  354. + }
  355. + s->lambda = FFMIN(s->lambda * ratio, 65536.f);
  356. +
  357. + /* Keep iterating if we must reduce and lambda is in the sky */
  358. + if (ratio > 0.9f && ratio < 1.1f) {
  359. + break;
  360. + } else {
  361. + if (is_mode || ms_mode || tns_mode || pred_mode) {
  362. + for (i = 0; i < s->chan_map[0]; i++) {
  363. + // Must restore coeffs
  364. + chans = tag == TYPE_CPE ? 2 : 1;
  365. + cpe = &s->cpe[i];
  366. + for (ch = 0; ch < chans; ch++)
  367. + memcpy(cpe->ch[ch].coeffs, cpe->ch[ch].pcoeffs, sizeof(cpe->ch[ch].coeffs));
  368. + }
  369. + }
  370. + its++;
  371. + }
  372. + } else {
  373. + break;
  374. + }
  375. } while (1);
  376. + if (s->options.ltp && s->coder->ltp_insert_new_frame)
  377. + s->coder->ltp_insert_new_frame(s);
  378. +
  379. put_bits(&s->pb, 3, TYPE_END);
  380. flush_put_bits(&s->pb);
  381. - avctx->frame_bits = put_bits_count(&s->pb);
  382. - // rate control stuff
  383. - if (!(avctx->flags & AV_CODEC_FLAG_QSCALE)) {
  384. - float ratio = avctx->bit_rate * 1024.0f / avctx->sample_rate / avctx->frame_bits;
  385. - s->lambda *= ratio;
  386. - s->lambda = FFMIN(s->lambda, 65536.f);
  387. - }
  388. + s->last_frame_pb_count = put_bits_count(&s->pb);
  389. - if (!frame)
  390. - s->last_frame++;
  391. + s->lambda_sum += s->lambda;
  392. + s->lambda_count++;
  393. ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
  394. &avpkt->duration);
  395. @@ -742,6 +845,8 @@ static av_cold int aac_encode_end(AVCodecContext *avct
  396. {
  397. AACEncContext *s = avctx->priv_data;
  398. + av_log(avctx, AV_LOG_INFO, "Qavg: %.3f\n", s->lambda_sum / s->lambda_count);
  399. +
  400. ff_mdct_end(&s->mdct1024);
  401. ff_mdct_end(&s->mdct128);
  402. ff_psy_end(&s->psy);
  403. @@ -800,76 +905,123 @@ static av_cold int aac_encode_init(AVCodecContext *avc
  404. uint8_t grouping[AAC_MAX_CHANNELS];
  405. int lengths[2];
  406. + /* Constants */
  407. + s->last_frame_pb_count = 0;
  408. + avctx->extradata_size = 5;
  409. avctx->frame_size = 1024;
  410. + avctx->initial_padding = 1024;
  411. + s->lambda = avctx->global_quality > 0 ? avctx->global_quality : 120;
  412. + /* Channel map and unspecified bitrate guessing */
  413. + s->channels = avctx->channels;
  414. + ERROR_IF(s->channels > AAC_MAX_CHANNELS || s->channels == 7,
  415. + "Unsupported number of channels: %d\n", s->channels);
  416. + s->chan_map = aac_chan_configs[s->channels-1];
  417. + if (!avctx->bit_rate) {
  418. + for (i = 1; i <= s->chan_map[0]; i++) {
  419. + avctx->bit_rate += s->chan_map[i] == TYPE_CPE ? 128000 : /* Pair */
  420. + s->chan_map[i] == TYPE_LFE ? 16000 : /* LFE */
  421. + 69000 ; /* SCE */
  422. + }
  423. + }
  424. +
  425. + /* Samplerate */
  426. for (i = 0; i < 16; i++)
  427. if (avctx->sample_rate == avpriv_mpeg4audio_sample_rates[i])
  428. break;
  429. -
  430. - s->channels = avctx->channels;
  431. -
  432. - ERROR_IF(i == 16 || i >= ff_aac_swb_size_1024_len || i >= ff_aac_swb_size_128_len,
  433. + s->samplerate_index = i;
  434. + ERROR_IF(s->samplerate_index == 16 ||
  435. + s->samplerate_index >= ff_aac_swb_size_1024_len ||
  436. + s->samplerate_index >= ff_aac_swb_size_128_len,
  437. "Unsupported sample rate %d\n", avctx->sample_rate);
  438. - ERROR_IF(s->channels > AAC_MAX_CHANNELS,
  439. - "Unsupported number of channels: %d\n", s->channels);
  440. +
  441. + /* Bitrate limiting */
  442. WARN_IF(1024.0 * avctx->bit_rate / avctx->sample_rate > 6144 * s->channels,
  443. - "Too many bits per frame requested, clamping to max\n");
  444. - if (avctx->profile == FF_PROFILE_AAC_MAIN) {
  445. + "Too many bits %f > %d per frame requested, clamping to max\n",
  446. + 1024.0 * avctx->bit_rate / avctx->sample_rate,
  447. + 6144 * s->channels);
  448. + avctx->bit_rate = (int64_t)FFMIN(6144 * s->channels / 1024.0 * avctx->sample_rate,
  449. + avctx->bit_rate);
  450. +
  451. + /* Profile and option setting */
  452. + avctx->profile = avctx->profile == FF_PROFILE_UNKNOWN ? FF_PROFILE_AAC_LOW :
  453. + avctx->profile;
  454. + for (i = 0; i < FF_ARRAY_ELEMS(aacenc_profiles); i++)
  455. + if (avctx->profile == aacenc_profiles[i])
  456. + break;
  457. + if (avctx->profile == FF_PROFILE_MPEG2_AAC_LOW) {
  458. + avctx->profile = FF_PROFILE_AAC_LOW;
  459. + ERROR_IF(s->options.pred,
  460. + "Main prediction unavailable in the \"mpeg2_aac_low\" profile\n");
  461. + ERROR_IF(s->options.ltp,
  462. + "LTP prediction unavailable in the \"mpeg2_aac_low\" profile\n");
  463. + WARN_IF(s->options.pns,
  464. + "PNS unavailable in the \"mpeg2_aac_low\" profile, turning off\n");
  465. + s->options.pns = 0;
  466. + } else if (avctx->profile == FF_PROFILE_AAC_LTP) {
  467. + s->options.ltp = 1;
  468. + ERROR_IF(s->options.pred,
  469. + "Main prediction unavailable in the \"aac_ltp\" profile\n");
  470. + } else if (avctx->profile == FF_PROFILE_AAC_MAIN) {
  471. s->options.pred = 1;
  472. - } else if ((avctx->profile == FF_PROFILE_AAC_LOW ||
  473. - avctx->profile == FF_PROFILE_UNKNOWN) && s->options.pred) {
  474. - s->profile = 0; /* Main */
  475. - WARN_IF(1, "Prediction requested, changing profile to AAC-Main\n");
  476. - } else if (avctx->profile == FF_PROFILE_AAC_LOW ||
  477. - avctx->profile == FF_PROFILE_UNKNOWN) {
  478. - s->profile = 1; /* Low */
  479. - } else {
  480. - ERROR_IF(1, "Unsupported profile %d\n", avctx->profile);
  481. + ERROR_IF(s->options.ltp,
  482. + "LTP prediction unavailable in the \"aac_main\" profile\n");
  483. + } else if (s->options.ltp) {
  484. + avctx->profile = FF_PROFILE_AAC_LTP;
  485. + WARN_IF(1,
  486. + "Chainging profile to \"aac_ltp\"\n");
  487. + ERROR_IF(s->options.pred,
  488. + "Main prediction unavailable in the \"aac_ltp\" profile\n");
  489. + } else if (s->options.pred) {
  490. + avctx->profile = FF_PROFILE_AAC_MAIN;
  491. + WARN_IF(1,
  492. + "Chainging profile to \"aac_main\"\n");
  493. + ERROR_IF(s->options.ltp,
  494. + "LTP prediction unavailable in the \"aac_main\" profile\n");
  495. }
  496. + s->profile = avctx->profile;
  497. - if (s->options.aac_coder != AAC_CODER_TWOLOOP) {
  498. + /* Coder limitations */
  499. + s->coder = &ff_aac_coders[s->options.coder];
  500. + if (s->options.coder == AAC_CODER_ANMR) {
  501. + ERROR_IF(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL,
  502. + "The ANMR coder is considered experimental, add -strict -2 to enable!\n");
  503. s->options.intensity_stereo = 0;
  504. s->options.pns = 0;
  505. }
  506. + ERROR_IF(s->options.ltp && avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL,
  507. + "The LPT profile requires experimental compliance, add -strict -2 to enable!\n");
  508. - avctx->bit_rate = (int)FFMIN(
  509. - 6144 * s->channels / 1024.0 * avctx->sample_rate,
  510. - avctx->bit_rate);
  511. + /* M/S introduces horrible artifacts with multichannel files, this is temporary */
  512. + if (s->channels > 3)
  513. + s->options.mid_side = 0;
  514. - s->samplerate_index = i;
  515. -
  516. - s->chan_map = aac_chan_configs[s->channels-1];
  517. -
  518. if ((ret = dsp_init(avctx, s)) < 0)
  519. goto fail;
  520. if ((ret = alloc_buffers(avctx, s)) < 0)
  521. goto fail;
  522. - avctx->extradata_size = 5;
  523. put_audio_specific_config(avctx);
  524. - sizes[0] = ff_aac_swb_size_1024[i];
  525. - sizes[1] = ff_aac_swb_size_128[i];
  526. - lengths[0] = ff_aac_num_swb_1024[i];
  527. - lengths[1] = ff_aac_num_swb_128[i];
  528. + sizes[0] = ff_aac_swb_size_1024[s->samplerate_index];
  529. + sizes[1] = ff_aac_swb_size_128[s->samplerate_index];
  530. + lengths[0] = ff_aac_num_swb_1024[s->samplerate_index];
  531. + lengths[1] = ff_aac_num_swb_128[s->samplerate_index];
  532. for (i = 0; i < s->chan_map[0]; i++)
  533. grouping[i] = s->chan_map[i + 1] == TYPE_CPE;
  534. if ((ret = ff_psy_init(&s->psy, avctx, 2, sizes, lengths,
  535. s->chan_map[0], grouping)) < 0)
  536. goto fail;
  537. s->psypp = ff_psy_preprocess_init(avctx);
  538. - s->coder = &ff_aac_coders[s->options.aac_coder];
  539. ff_lpc_init(&s->lpc, 2*avctx->frame_size, TNS_MAX_ORDER, FF_LPC_TYPE_LEVINSON);
  540. + s->random_state = 0x1f2e3d4c;
  541. if (HAVE_MIPSDSPR1)
  542. ff_aac_coder_init_mips(s);
  543. - s->lambda = avctx->global_quality > 0 ? avctx->global_quality : 120;
  544. -
  545. ff_aac_tableinit();
  546. - avctx->initial_padding = 1024;
  547. ff_af_queue_init(avctx, &s->afq);
  548. return 0;
  549. @@ -880,27 +1032,16 @@ fail:
  550. #define AACENC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
  551. static const AVOption aacenc_options[] = {
  552. - {"stereo_mode", "Stereo coding method", offsetof(AACEncContext, options.stereo_mode), AV_OPT_TYPE_INT, {.i64 = 0}, -1, 1, AACENC_FLAGS, "stereo_mode"},
  553. - {"auto", "Selected by the Encoder", 0, AV_OPT_TYPE_CONST, {.i64 = -1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
  554. - {"ms_off", "Disable Mid/Side coding", 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
  555. - {"ms_force", "Force Mid/Side for the whole frame if possible", 0, AV_OPT_TYPE_CONST, {.i64 = 1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
  556. - {"aac_coder", "Coding algorithm", offsetof(AACEncContext, options.aac_coder), AV_OPT_TYPE_INT, {.i64 = AAC_CODER_TWOLOOP}, 0, AAC_CODER_NB-1, AACENC_FLAGS, "aac_coder"},
  557. - {"faac", "FAAC-inspired method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAAC}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
  558. - {"anmr", "ANMR method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_ANMR}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
  559. - {"twoloop", "Two loop searching method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_TWOLOOP}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
  560. - {"fast", "Constant quantizer", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAST}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
  561. - {"aac_pns", "Perceptual Noise Substitution", offsetof(AACEncContext, options.pns), AV_OPT_TYPE_INT, {.i64 = 1}, 0, 1, AACENC_FLAGS, "aac_pns"},
  562. - {"disable", "Disable perceptual noise substitution", 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_pns"},
  563. - {"enable", "Enable perceptual noise substitution", 0, AV_OPT_TYPE_CONST, {.i64 = 1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_pns"},
  564. - {"aac_is", "Intensity stereo coding", offsetof(AACEncContext, options.intensity_stereo), AV_OPT_TYPE_INT, {.i64 = 1}, 0, 1, AACENC_FLAGS, "intensity_stereo"},
  565. - {"disable", "Disable intensity stereo coding", 0, AV_OPT_TYPE_CONST, {.i64 = 0}, INT_MIN, INT_MAX, AACENC_FLAGS, "intensity_stereo"},
  566. - {"enable", "Enable intensity stereo coding", 0, AV_OPT_TYPE_CONST, {.i64 = 1}, INT_MIN, INT_MAX, AACENC_FLAGS, "intensity_stereo"},
  567. - {"aac_tns", "Temporal noise shaping", offsetof(AACEncContext, options.tns), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, AACENC_FLAGS, "aac_tns"},
  568. - {"disable", "Disable temporal noise shaping", 0, AV_OPT_TYPE_CONST, {.i64 = 0}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_tns"},
  569. - {"enable", "Enable temporal noise shaping", 0, AV_OPT_TYPE_CONST, {.i64 = 1}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_tns"},
  570. - {"aac_pred", "AAC-Main prediction", offsetof(AACEncContext, options.pred), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, AACENC_FLAGS, "aac_pred"},
  571. - {"disable", "Disable AAC-Main prediction", 0, AV_OPT_TYPE_CONST, {.i64 = 0}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_pred"},
  572. - {"enable", "Enable AAC-Main prediction", 0, AV_OPT_TYPE_CONST, {.i64 = 1}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_pred"},
  573. + {"aac_coder", "Coding algorithm", offsetof(AACEncContext, options.coder), AV_OPT_TYPE_INT, {.i64 = AAC_CODER_TWOLOOP}, 0, AAC_CODER_NB-1, AACENC_FLAGS, "coder"},
  574. + {"anmr", "ANMR method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_ANMR}, INT_MIN, INT_MAX, AACENC_FLAGS, "coder"},
  575. + {"twoloop", "Two loop searching method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_TWOLOOP}, INT_MIN, INT_MAX, AACENC_FLAGS, "coder"},
  576. + {"fast", "Constant quantizer", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAST}, INT_MIN, INT_MAX, AACENC_FLAGS, "coder"},
  577. + {"aac_ms", "Force M/S stereo coding", offsetof(AACEncContext, options.mid_side), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, AACENC_FLAGS},
  578. + {"aac_is", "Intensity stereo coding", offsetof(AACEncContext, options.intensity_stereo), AV_OPT_TYPE_BOOL, {.i64 = 1}, -1, 1, AACENC_FLAGS},
  579. + {"aac_pns", "Perceptual noise substitution", offsetof(AACEncContext, options.pns), AV_OPT_TYPE_BOOL, {.i64 = 1}, -1, 1, AACENC_FLAGS},
  580. + {"aac_tns", "Temporal noise shaping", offsetof(AACEncContext, options.tns), AV_OPT_TYPE_BOOL, {.i64 = 1}, -1, 1, AACENC_FLAGS},
  581. + {"aac_ltp", "Long term prediction", offsetof(AACEncContext, options.ltp), AV_OPT_TYPE_BOOL, {.i64 = 0}, -1, 1, AACENC_FLAGS},
  582. + {"aac_pred", "AAC-Main prediction", offsetof(AACEncContext, options.pred), AV_OPT_TYPE_BOOL, {.i64 = 0}, -1, 1, AACENC_FLAGS},
  583. {NULL}
  584. };
  585. @@ -911,6 +1052,11 @@ static const AVClass aacenc_class = {
  586. LIBAVUTIL_VERSION_INT,
  587. };
  588. +static const AVCodecDefault aac_encode_defaults[] = {
  589. + { "b", "0" },
  590. + { NULL }
  591. +};
  592. +
  593. AVCodec ff_aac_encoder = {
  594. .name = "aac",
  595. .long_name = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"),
  596. @@ -920,9 +1066,9 @@ AVCodec ff_aac_encoder = {
  597. .init = aac_encode_init,
  598. .encode2 = aac_encode_frame,
  599. .close = aac_encode_end,
  600. + .defaults = aac_encode_defaults,
  601. .supported_samplerates = mpeg4audio_sample_rates,
  602. - .capabilities = AV_CODEC_CAP_SMALL_LAST_FRAME | AV_CODEC_CAP_DELAY |
  603. - AV_CODEC_CAP_EXPERIMENTAL,
  604. + .capabilities = AV_CODEC_CAP_SMALL_LAST_FRAME | AV_CODEC_CAP_DELAY,
  605. .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLTP,
  606. AV_SAMPLE_FMT_NONE },
  607. .priv_class = &aacenc_class,