simd-3.c 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132
  1. /* { dg-do run } */
  2. /* { dg-options "-O2" } */
  3. /* { dg-additional-options "-msse2" { target sse2_runtime } } */
  4. /* { dg-additional-options "-mavx" { target avx_runtime } } */
  5. extern void abort ();
  6. int a[1024] __attribute__((aligned (32))) = { 1 };
  7. int b[1024] __attribute__((aligned (32))) = { 1 };
  8. unsigned char c[1024] __attribute__((aligned (32))) = { 1 };
  9. int k, m;
  10. __UINTPTR_TYPE__ u, u2, u3;
  11. __attribute__((noinline, noclone)) int
  12. foo (int *p)
  13. {
  14. int i, s = 0, s2 = 0, t, t2;
  15. #pragma omp simd aligned(a, b, p : 32) linear(k: m + 1) reduction(+:s) \
  16. lastprivate (t2)
  17. for (i = 0; i < 512; i++)
  18. {
  19. a[i] *= p[i];
  20. t2 = k + p[i];
  21. k += m + 1;
  22. s += p[i] + k;
  23. c[i]++;
  24. }
  25. #pragma omp simd aligned(a, b, p : 32) linear(k: m + 1) reduction(+:s2) \
  26. lastprivate (t, u, u2, u3)
  27. for (i = 512; i < 1024; i++)
  28. {
  29. a[i] *= p[i];
  30. k += m + 1;
  31. t = k + p[i];
  32. u = (__UINTPTR_TYPE__) &k;
  33. u2 = (__UINTPTR_TYPE__) &s2;
  34. u3 = (__UINTPTR_TYPE__) &t;
  35. s2 += t;
  36. c[i]++;
  37. }
  38. return s + s2 + t + t2;
  39. }
  40. __attribute__((noinline, noclone)) long int
  41. bar (int *p, long int n, long int o)
  42. {
  43. long int i, s = 0, s2 = 0, t, t2;
  44. #pragma omp simd aligned(a, b, p : 32) linear(k: m + 1) reduction(+:s) \
  45. lastprivate (t2)
  46. for (i = 0; i < n; i++)
  47. {
  48. a[i] *= p[i];
  49. t2 = k + p[i];
  50. k += m + 1;
  51. s += p[i] + k;
  52. c[i]++;
  53. }
  54. #pragma omp simd aligned(a, b, p : 32) linear(k: m + 1) reduction(+:s2) \
  55. lastprivate (t, u, u2, u3)
  56. for (i = n; i < o; i++)
  57. {
  58. a[i] *= p[i];
  59. k += m + 1;
  60. t = k + p[i];
  61. u = (__UINTPTR_TYPE__) &k;
  62. u2 = (__UINTPTR_TYPE__) &s2;
  63. u3 = (__UINTPTR_TYPE__) &t;
  64. s2 += t;
  65. c[i]++;
  66. }
  67. return s + s2 + t + t2;
  68. }
  69. int
  70. main ()
  71. {
  72. #if __SIZEOF_INT__ >= 4
  73. int i;
  74. k = 4;
  75. m = 2;
  76. for (i = 0; i < 1024; i++)
  77. {
  78. a[i] = i - 512;
  79. b[i] = (i - 51) % 39;
  80. c[i] = (unsigned char) i;
  81. }
  82. int s = foo (b);
  83. for (i = 0; i < 1024; i++)
  84. {
  85. if (b[i] != (i - 51) % 39
  86. || a[i] != (i - 512) * b[i]
  87. || c[i] != (unsigned char) (i + 1))
  88. abort ();
  89. a[i] = i - 512;
  90. }
  91. if (k != 4 + 3 * 1024
  92. || s != 1596127 + (4 + 3 * 511 + b[511]) + (4 + 3 * 1024 + b[1023]))
  93. abort ();
  94. k = 4;
  95. s = bar (b, 512, 1024);
  96. for (i = 0; i < 1024; i++)
  97. {
  98. if (b[i] != (i - 51) % 39
  99. || a[i] != (i - 512) * b[i]
  100. || c[i] != (unsigned char) (i + 2))
  101. abort ();
  102. a[i] = i - 512;
  103. }
  104. if (k != 4 + 3 * 1024
  105. || s != 1596127 + (4 + 3 * 511 + b[511]) + (4 + 3 * 1024 + b[1023]))
  106. abort ();
  107. k = 4;
  108. s = bar (b, 511, 1021);
  109. for (i = 0; i < 1021; i++)
  110. {
  111. if (b[i] != (i - 51) % 39
  112. || a[i] != (i - 512) * b[i]
  113. || c[i] != (unsigned char) (i + 3))
  114. abort ();
  115. a[i] = i - 512;
  116. }
  117. for (i = 1021; i < 1024; i++)
  118. if (b[i] != (i - 51) % 39
  119. || a[i] != i - 512
  120. || c[i] != (unsigned char) (i + 2))
  121. abort ();
  122. if (k != 4 + 3 * 1021
  123. || s != 1586803 + (4 + 3 * 510 + b[510]) + (4 + 3 * 1021 + b[1020]))
  124. abort ();
  125. #endif
  126. return 0;
  127. }