simd_sse3.c 894 B

123456789101112131415161718192021222324252627282930313233343536373839
  1. #include<simdconfig.h>
  2. #include<simdfuncs.h>
  3. #ifdef _MSC_VER
  4. #include<intrin.h>
  5. int sse3_available() {
  6. return 1;
  7. }
  8. #else
  9. #include<pmmintrin.h>
  10. #include<cpuid.h>
  11. #include<stdint.h>
  12. #if defined(__APPLE__)
  13. int sse3_available() { return 1; }
  14. #else
  15. int sse3_available() {
  16. return __builtin_cpu_supports("sse3");
  17. }
  18. #endif
  19. #endif
  20. void increment_sse3(float arr[4]) {
  21. double darr[4];
  22. __m128d val1 = _mm_set_pd(arr[0], arr[1]);
  23. __m128d val2 = _mm_set_pd(arr[2], arr[3]);
  24. __m128d one = _mm_set_pd(1.0, 1.0);
  25. __m128d result = _mm_add_pd(val1, one);
  26. _mm_store_pd(darr, result);
  27. result = _mm_add_pd(val2, one);
  28. _mm_store_pd(&darr[2], result);
  29. result = _mm_hadd_pd(val1, val2); /* This does nothing. Only here so we use an SSE3 instruction. */
  30. arr[0] = (float)darr[1];
  31. arr[1] = (float)darr[0];
  32. arr[2] = (float)darr[3];
  33. arr[3] = (float)darr[2];
  34. }