aom-avx2.diff 2.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061
  1. diff --git a/build/cmake/cpu.cmake b/build/cmake/cpu.cmake
  2. index acebe20..8c67d89 100644
  3. --- a/build/cmake/cpu.cmake
  4. +++ b/build/cmake/cpu.cmake
  5. @@ -120,6 +120,19 @@ elseif("${AOM_TARGET_CPU}" MATCHES "^x86")
  6. set(RTCD_ARCH_X86_64 "yes")
  7. endif()
  8. + # AVX2 requires __m256i definition starting v3.9.0
  9. +
  10. + if(ENABLE_AVX2)
  11. + aom_check_source_compiles("x86_64_avx2_m256i_available" "
  12. +#include <emmintrin.h>
  13. +#ifndef __m256i
  14. +#error 1
  15. +#endif" HAVE_AVX2_M256I)
  16. + if(HAVE_AVX2_M256I EQUAL 0)
  17. + set(ENABLE_AVX2 0)
  18. + endif()
  19. + endif()
  20. +
  21. set(X86_FLAVORS "MMX;SSE;SSE2;SSE3;SSSE3;SSE4_1;SSE4_2;AVX;AVX2")
  22. foreach(flavor ${X86_FLAVORS})
  23. if(ENABLE_${flavor} AND NOT disable_remaining_flavors)
  24. diff --git a/aom_dsp/x86/synonyms.h b/aom_dsp/x86/synonyms.h
  25. index 0d51cdf..6744ec5 100644
  26. --- a/aom_dsp/x86/synonyms.h
  27. +++ b/aom_dsp/x86/synonyms.h
  28. @@ -46,13 +46,6 @@ static INLINE __m128i xx_loadu_128(const void *a) {
  29. return _mm_loadu_si128((const __m128i *)a);
  30. }
  31. -// Load 64 bits from each of hi and low, and pack into an SSE register
  32. -// Since directly loading as `int64_t`s and using _mm_set_epi64 may violate
  33. -// the strict aliasing rule, this takes a different approach
  34. -static INLINE __m128i xx_loadu_2x64(const void *hi, const void *lo) {
  35. - return _mm_unpacklo_epi64(_mm_loadu_si64(lo), _mm_loadu_si64(hi));
  36. -}
  37. -
  38. static INLINE void xx_storel_32(void *const a, const __m128i v) {
  39. const int val = _mm_cvtsi128_si32(v);
  40. memcpy(a, &val, sizeof(val));
  41. diff --git a/aom_dsp/x86/synonyms_avx2.h b/aom_dsp/x86/synonyms_avx2.h
  42. index d4e8f69..45be17e 100644
  43. --- a/aom_dsp/x86/synonyms_avx2.h
  44. +++ b/aom_dsp/x86/synonyms_avx2.h
  45. @@ -25,6 +25,13 @@
  46. * Intrinsics prefixed with yy_ operate on or return 256bit YMM registers.
  47. */
  48. +// Load 64 bits from each of hi and low, and pack into an SSE register
  49. +// Since directly loading as `int64_t`s and using _mm_set_epi64 may violate
  50. +// the strict aliasing rule, this takes a different approach
  51. +static INLINE __m128i xx_loadu_2x64(const void *hi, const void *lo) {
  52. + return _mm_unpacklo_epi64(_mm_loadu_si64(lo), _mm_loadu_si64(hi));
  53. +}
  54. +
  55. // Loads and stores to do away with the tedium of casting the address
  56. // to the right type.
  57. static INLINE __m256i yy_load_256(const void *a) {