patch-libavcodec_mips_aacpsy_mips_h 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105
  1. $OpenBSD: patch-libavcodec_mips_aacpsy_mips_h,v 1.1 2015/12/06 08:51:46 ajacoutot Exp $
  2. AAC encoder: improve SF range utilization
  3. --- libavcodec/mips/aacpsy_mips.h.orig Wed Jul 22 04:29:21 2015
  4. +++ libavcodec/mips/aacpsy_mips.h Sat Dec 5 15:01:19 2015
  5. @@ -61,58 +61,62 @@
  6. #if HAVE_INLINE_ASM && HAVE_MIPSFPU && ( PSY_LAME_FIR_LEN == 21 )
  7. static void calc_thr_3gpp_mips(const FFPsyWindowInfo *wi, const int num_bands,
  8. AacPsyChannel *pch, const uint8_t *band_sizes,
  9. - const float *coefs)
  10. + const float *coefs, const int cutoff)
  11. {
  12. int i, w, g;
  13. - int start = 0;
  14. + int start = 0, wstart = 0;
  15. for (w = 0; w < wi->num_windows*16; w += 16) {
  16. + wstart = 0;
  17. for (g = 0; g < num_bands; g++) {
  18. AacPsyBand *band = &pch->band[w+g];
  19. float form_factor = 0.0f;
  20. float Temp;
  21. band->energy = 0.0f;
  22. - for (i = 0; i < band_sizes[g]; i+=4) {
  23. - float a, b, c, d;
  24. - float ax, bx, cx, dx;
  25. - float *cf = (float *)&coefs[start+i];
  26. + if (wstart < cutoff) {
  27. + for (i = 0; i < band_sizes[g]; i+=4) {
  28. + float a, b, c, d;
  29. + float ax, bx, cx, dx;
  30. + float *cf = (float *)&coefs[start+i];
  31. - __asm__ volatile (
  32. - "lwc1 %[a], 0(%[cf]) \n\t"
  33. - "lwc1 %[b], 4(%[cf]) \n\t"
  34. - "lwc1 %[c], 8(%[cf]) \n\t"
  35. - "lwc1 %[d], 12(%[cf]) \n\t"
  36. - "abs.s %[a], %[a] \n\t"
  37. - "abs.s %[b], %[b] \n\t"
  38. - "abs.s %[c], %[c] \n\t"
  39. - "abs.s %[d], %[d] \n\t"
  40. - "sqrt.s %[ax], %[a] \n\t"
  41. - "sqrt.s %[bx], %[b] \n\t"
  42. - "sqrt.s %[cx], %[c] \n\t"
  43. - "sqrt.s %[dx], %[d] \n\t"
  44. - "madd.s %[e], %[e], %[a], %[a] \n\t"
  45. - "madd.s %[e], %[e], %[b], %[b] \n\t"
  46. - "madd.s %[e], %[e], %[c], %[c] \n\t"
  47. - "madd.s %[e], %[e], %[d], %[d] \n\t"
  48. - "add.s %[f], %[f], %[ax] \n\t"
  49. - "add.s %[f], %[f], %[bx] \n\t"
  50. - "add.s %[f], %[f], %[cx] \n\t"
  51. - "add.s %[f], %[f], %[dx] \n\t"
  52. + __asm__ volatile (
  53. + "lwc1 %[a], 0(%[cf]) \n\t"
  54. + "lwc1 %[b], 4(%[cf]) \n\t"
  55. + "lwc1 %[c], 8(%[cf]) \n\t"
  56. + "lwc1 %[d], 12(%[cf]) \n\t"
  57. + "abs.s %[a], %[a] \n\t"
  58. + "abs.s %[b], %[b] \n\t"
  59. + "abs.s %[c], %[c] \n\t"
  60. + "abs.s %[d], %[d] \n\t"
  61. + "sqrt.s %[ax], %[a] \n\t"
  62. + "sqrt.s %[bx], %[b] \n\t"
  63. + "sqrt.s %[cx], %[c] \n\t"
  64. + "sqrt.s %[dx], %[d] \n\t"
  65. + "madd.s %[e], %[e], %[a], %[a] \n\t"
  66. + "madd.s %[e], %[e], %[b], %[b] \n\t"
  67. + "madd.s %[e], %[e], %[c], %[c] \n\t"
  68. + "madd.s %[e], %[e], %[d], %[d] \n\t"
  69. + "add.s %[f], %[f], %[ax] \n\t"
  70. + "add.s %[f], %[f], %[bx] \n\t"
  71. + "add.s %[f], %[f], %[cx] \n\t"
  72. + "add.s %[f], %[f], %[dx] \n\t"
  73. - : [a]"=&f"(a), [b]"=&f"(b),
  74. - [c]"=&f"(c), [d]"=&f"(d),
  75. - [e]"+f"(band->energy), [f]"+f"(form_factor),
  76. - [ax]"=&f"(ax), [bx]"=&f"(bx),
  77. - [cx]"=&f"(cx), [dx]"=&f"(dx)
  78. - : [cf]"r"(cf)
  79. - : "memory"
  80. - );
  81. + : [a]"=&f"(a), [b]"=&f"(b),
  82. + [c]"=&f"(c), [d]"=&f"(d),
  83. + [e]"+f"(band->energy), [f]"+f"(form_factor),
  84. + [ax]"=&f"(ax), [bx]"=&f"(bx),
  85. + [cx]"=&f"(cx), [dx]"=&f"(dx)
  86. + : [cf]"r"(cf)
  87. + : "memory"
  88. + );
  89. + }
  90. }
  91. Temp = sqrtf((float)band_sizes[g] / band->energy);
  92. band->thr = band->energy * 0.001258925f;
  93. band->nz_lines = form_factor * sqrtf(Temp);
  94. start += band_sizes[g];
  95. + wstart += band_sizes[g];
  96. }
  97. }
  98. }