12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364 |
- #include<simdconfig.h>
- #include<simdfuncs.h>
- #include<stdint.h>
- #ifdef _MSC_VER
- #include<intrin.h>
- int mmx_available() {
- return 1;
- }
- /* Contrary to MSDN documentation, MMX intrinsics
- * just plain don't work.
- */
- void increment_mmx(float arr[4]) {
- arr[0]++;
- arr[1]++;
- arr[2]++;
- arr[3]++;
- }
- #elif defined(__MINGW32__)
- int mmx_available() {
- return 1;
- }
- /* MinGW does not seem to ship with MMX or it is broken.
- */
- void increment_mmx(float arr[4]) {
- arr[0]++;
- arr[1]++;
- arr[2]++;
- arr[3]++;
- }
- #else
- #include<mmintrin.h>
- #include<cpuid.h>
- #if defined(__APPLE__)
- int mmx_available() { return 1; }
- #else
- int mmx_available() {
- return __builtin_cpu_supports("mmx");
- }
- #endif
- void increment_mmx(float arr[4]) {
- /* Super ugly but we know that values in arr are always small
- * enough to fit in int16;
- */
- int i;
- __m64 packed = _mm_set_pi16(arr[3], arr[2], arr[1], arr[0]);
- __m64 incr = _mm_set1_pi16(1);
- __m64 result = _mm_add_pi16(packed, incr);
- /* Should be
- * int64_t unpacker = _m_to_int64(result);
- * but it does not exist on 32 bit platforms for some reason.
- */
- int64_t unpacker = (int64_t)(result);
- _mm_empty();
- for(i=0; i<4; i++) {
- arr[i] = (float)(unpacker & ((1<<16)-1));
- unpacker >>= 16;
- }
- }
- #endif
|