simd_mmx.c 1.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364
  1. #include<simdconfig.h>
  2. #include<simdfuncs.h>
  3. #include<stdint.h>
  4. #ifdef _MSC_VER
  5. #include<intrin.h>
  6. int mmx_available() {
  7. return 1;
  8. }
  9. /* Contrary to MSDN documentation, MMX intrinsics
  10. * just plain don't work.
  11. */
  12. void increment_mmx(float arr[4]) {
  13. arr[0]++;
  14. arr[1]++;
  15. arr[2]++;
  16. arr[3]++;
  17. }
  18. #elif defined(__MINGW32__)
  19. int mmx_available() {
  20. return 1;
  21. }
  22. /* MinGW does not seem to ship with MMX or it is broken.
  23. */
  24. void increment_mmx(float arr[4]) {
  25. arr[0]++;
  26. arr[1]++;
  27. arr[2]++;
  28. arr[3]++;
  29. }
  30. #else
  31. #include<mmintrin.h>
  32. #include<cpuid.h>
  33. #if defined(__APPLE__)
  34. int mmx_available() { return 1; }
  35. #else
  36. int mmx_available() {
  37. return __builtin_cpu_supports("mmx");
  38. }
  39. #endif
  40. void increment_mmx(float arr[4]) {
  41. /* Super ugly but we know that values in arr are always small
  42. * enough to fit in int16;
  43. */
  44. int i;
  45. __m64 packed = _mm_set_pi16(arr[3], arr[2], arr[1], arr[0]);
  46. __m64 incr = _mm_set1_pi16(1);
  47. __m64 result = _mm_add_pi16(packed, incr);
  48. /* Should be
  49. * int64_t unpacker = _m_to_int64(result);
  50. * but it does not exist on 32 bit platforms for some reason.
  51. */
  52. int64_t unpacker = (int64_t)(result);
  53. _mm_empty();
  54. for(i=0; i<4; i++) {
  55. arr[i] = (float)(unpacker & ((1<<16)-1));
  56. unpacker >>= 16;
  57. }
  58. }
  59. #endif