armstate.c 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220
  1. /********************************************************************
  2. * *
  3. * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
  4. * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
  5. * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
  6. * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
  7. * *
  8. * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2010 *
  9. * by the Xiph.Org Foundation and contributors http://www.xiph.org/ *
  10. * *
  11. ********************************************************************
  12. function:
  13. last mod: $Id: x86state.c 17344 2010-07-21 01:42:18Z tterribe $
  14. ********************************************************************/
  15. #include "armint.h"
  16. #if defined(OC_ARM_ASM)
  17. # if defined(OC_ARM_ASM_NEON)
  18. /*This table has been modified from OC_FZIG_ZAG by baking an 8x8 transpose into
  19. the destination.*/
  20. static const unsigned char OC_FZIG_ZAG_NEON[128]={
  21. 0, 8, 1, 2, 9,16,24,17,
  22. 10, 3, 4,11,18,25,32,40,
  23. 33,26,19,12, 5, 6,13,20,
  24. 27,34,41,48,56,49,42,35,
  25. 28,21,14, 7,15,22,29,36,
  26. 43,50,57,58,51,44,37,30,
  27. 23,31,38,45,52,59,60,53,
  28. 46,39,47,54,61,62,55,63,
  29. 64,64,64,64,64,64,64,64,
  30. 64,64,64,64,64,64,64,64,
  31. 64,64,64,64,64,64,64,64,
  32. 64,64,64,64,64,64,64,64,
  33. 64,64,64,64,64,64,64,64,
  34. 64,64,64,64,64,64,64,64,
  35. 64,64,64,64,64,64,64,64,
  36. 64,64,64,64,64,64,64,64
  37. };
  38. # endif
  39. void oc_state_accel_init_arm(oc_theora_state *_state){
  40. oc_state_accel_init_c(_state);
  41. _state->cpu_flags=oc_cpu_flags_get();
  42. # if defined(OC_STATE_USE_VTABLE)
  43. _state->opt_vtable.frag_copy_list=oc_frag_copy_list_arm;
  44. _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_arm;
  45. _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_arm;
  46. _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_arm;
  47. _state->opt_vtable.idct8x8=oc_idct8x8_arm;
  48. _state->opt_vtable.state_frag_recon=oc_state_frag_recon_arm;
  49. /*Note: We _must_ set this function pointer, because the macro in armint.h
  50. calls it with different arguments, so the C version will segfault.*/
  51. _state->opt_vtable.state_loop_filter_frag_rows=
  52. (oc_state_loop_filter_frag_rows_func)oc_loop_filter_frag_rows_arm;
  53. # endif
  54. # if defined(OC_ARM_ASM_EDSP)
  55. if(_state->cpu_flags&OC_CPU_ARM_EDSP){
  56. # if defined(OC_STATE_USE_VTABLE)
  57. _state->opt_vtable.frag_copy_list=oc_frag_copy_list_edsp;
  58. # endif
  59. }
  60. # if defined(OC_ARM_ASM_MEDIA)
  61. if(_state->cpu_flags&OC_CPU_ARM_MEDIA){
  62. # if defined(OC_STATE_USE_VTABLE)
  63. _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_v6;
  64. _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_v6;
  65. _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_v6;
  66. _state->opt_vtable.idct8x8=oc_idct8x8_v6;
  67. _state->opt_vtable.state_frag_recon=oc_state_frag_recon_v6;
  68. _state->opt_vtable.loop_filter_init=oc_loop_filter_init_v6;
  69. _state->opt_vtable.state_loop_filter_frag_rows=
  70. (oc_state_loop_filter_frag_rows_func)oc_loop_filter_frag_rows_v6;
  71. # endif
  72. }
  73. # if defined(OC_ARM_ASM_NEON)
  74. if(_state->cpu_flags&OC_CPU_ARM_NEON){
  75. # if defined(OC_STATE_USE_VTABLE)
  76. _state->opt_vtable.frag_copy_list=oc_frag_copy_list_neon;
  77. _state->opt_vtable.frag_recon_intra=oc_frag_recon_intra_neon;
  78. _state->opt_vtable.frag_recon_inter=oc_frag_recon_inter_neon;
  79. _state->opt_vtable.frag_recon_inter2=oc_frag_recon_inter2_neon;
  80. _state->opt_vtable.state_frag_recon=oc_state_frag_recon_neon;
  81. _state->opt_vtable.loop_filter_init=oc_loop_filter_init_neon;
  82. _state->opt_vtable.state_loop_filter_frag_rows=
  83. (oc_state_loop_filter_frag_rows_func)oc_loop_filter_frag_rows_neon;
  84. _state->opt_vtable.idct8x8=oc_idct8x8_neon;
  85. # endif
  86. _state->opt_data.dct_fzig_zag=OC_FZIG_ZAG_NEON;
  87. }
  88. # endif
  89. # endif
  90. # endif
  91. }
  92. void oc_state_frag_recon_arm(const oc_theora_state *_state,ptrdiff_t _fragi,
  93. int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant){
  94. unsigned char *dst;
  95. ptrdiff_t frag_buf_off;
  96. int ystride;
  97. int refi;
  98. /*Apply the inverse transform.*/
  99. /*Special case only having a DC component.*/
  100. if(_last_zzi<2){
  101. ogg_uint16_t p;
  102. /*We round this dequant product (and not any of the others) because there's
  103. no iDCT rounding.*/
  104. p=(ogg_uint16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5);
  105. oc_idct8x8_1_arm(_dct_coeffs+64,p);
  106. }
  107. else{
  108. /*First, dequantize the DC coefficient.*/
  109. _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant);
  110. oc_idct8x8_arm(_dct_coeffs+64,_dct_coeffs,_last_zzi);
  111. }
  112. /*Fill in the target buffer.*/
  113. frag_buf_off=_state->frag_buf_offs[_fragi];
  114. refi=_state->frags[_fragi].refi;
  115. ystride=_state->ref_ystride[_pli];
  116. dst=_state->ref_frame_data[OC_FRAME_SELF]+frag_buf_off;
  117. if(refi==OC_FRAME_SELF)oc_frag_recon_intra_arm(dst,ystride,_dct_coeffs+64);
  118. else{
  119. const unsigned char *ref;
  120. int mvoffsets[2];
  121. ref=_state->ref_frame_data[refi]+frag_buf_off;
  122. if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
  123. _state->frag_mvs[_fragi])>1){
  124. oc_frag_recon_inter2_arm(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,
  125. _dct_coeffs+64);
  126. }
  127. else oc_frag_recon_inter_arm(dst,ref+mvoffsets[0],ystride,_dct_coeffs+64);
  128. }
  129. }
  130. # if defined(OC_ARM_ASM_MEDIA)
  131. void oc_state_frag_recon_v6(const oc_theora_state *_state,ptrdiff_t _fragi,
  132. int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant){
  133. unsigned char *dst;
  134. ptrdiff_t frag_buf_off;
  135. int ystride;
  136. int refi;
  137. /*Apply the inverse transform.*/
  138. /*Special case only having a DC component.*/
  139. if(_last_zzi<2){
  140. ogg_uint16_t p;
  141. /*We round this dequant product (and not any of the others) because there's
  142. no iDCT rounding.*/
  143. p=(ogg_uint16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5);
  144. oc_idct8x8_1_v6(_dct_coeffs+64,p);
  145. }
  146. else{
  147. /*First, dequantize the DC coefficient.*/
  148. _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant);
  149. oc_idct8x8_v6(_dct_coeffs+64,_dct_coeffs,_last_zzi);
  150. }
  151. /*Fill in the target buffer.*/
  152. frag_buf_off=_state->frag_buf_offs[_fragi];
  153. refi=_state->frags[_fragi].refi;
  154. ystride=_state->ref_ystride[_pli];
  155. dst=_state->ref_frame_data[OC_FRAME_SELF]+frag_buf_off;
  156. if(refi==OC_FRAME_SELF)oc_frag_recon_intra_v6(dst,ystride,_dct_coeffs+64);
  157. else{
  158. const unsigned char *ref;
  159. int mvoffsets[2];
  160. ref=_state->ref_frame_data[refi]+frag_buf_off;
  161. if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
  162. _state->frag_mvs[_fragi])>1){
  163. oc_frag_recon_inter2_v6(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,
  164. _dct_coeffs+64);
  165. }
  166. else oc_frag_recon_inter_v6(dst,ref+mvoffsets[0],ystride,_dct_coeffs+64);
  167. }
  168. }
  169. # if defined(OC_ARM_ASM_NEON)
  170. void oc_state_frag_recon_neon(const oc_theora_state *_state,ptrdiff_t _fragi,
  171. int _pli,ogg_int16_t _dct_coeffs[128],int _last_zzi,ogg_uint16_t _dc_quant){
  172. unsigned char *dst;
  173. ptrdiff_t frag_buf_off;
  174. int ystride;
  175. int refi;
  176. /*Apply the inverse transform.*/
  177. /*Special case only having a DC component.*/
  178. if(_last_zzi<2){
  179. ogg_uint16_t p;
  180. /*We round this dequant product (and not any of the others) because there's
  181. no iDCT rounding.*/
  182. p=(ogg_uint16_t)(_dct_coeffs[0]*(ogg_int32_t)_dc_quant+15>>5);
  183. oc_idct8x8_1_neon(_dct_coeffs+64,p);
  184. }
  185. else{
  186. /*First, dequantize the DC coefficient.*/
  187. _dct_coeffs[0]=(ogg_int16_t)(_dct_coeffs[0]*(int)_dc_quant);
  188. oc_idct8x8_neon(_dct_coeffs+64,_dct_coeffs,_last_zzi);
  189. }
  190. /*Fill in the target buffer.*/
  191. frag_buf_off=_state->frag_buf_offs[_fragi];
  192. refi=_state->frags[_fragi].refi;
  193. ystride=_state->ref_ystride[_pli];
  194. dst=_state->ref_frame_data[OC_FRAME_SELF]+frag_buf_off;
  195. if(refi==OC_FRAME_SELF)oc_frag_recon_intra_neon(dst,ystride,_dct_coeffs+64);
  196. else{
  197. const unsigned char *ref;
  198. int mvoffsets[2];
  199. ref=_state->ref_frame_data[refi]+frag_buf_off;
  200. if(oc_state_get_mv_offsets(_state,mvoffsets,_pli,
  201. _state->frag_mvs[_fragi])>1){
  202. oc_frag_recon_inter2_neon(dst,ref+mvoffsets[0],ref+mvoffsets[1],ystride,
  203. _dct_coeffs+64);
  204. }
  205. else oc_frag_recon_inter_neon(dst,ref+mvoffsets[0],ystride,_dct_coeffs+64);
  206. }
  207. }
  208. # endif
  209. # endif
  210. #endif