dSFMT-common.h 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. #pragma once
  2. /**
  3. * @file dSFMT-common.h
  4. *
  5. * @brief SIMD oriented Fast Mersenne Twister(SFMT) pseudorandom
  6. * number generator with jump function. This file includes common functions
  7. * used in random number generation and jump.
  8. *
  9. * @author Mutsuo Saito (Hiroshima University)
  10. * @author Makoto Matsumoto (The University of Tokyo)
  11. *
  12. * Copyright (C) 2006, 2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima
  13. * University.
  14. * Copyright (C) 2012 Mutsuo Saito, Makoto Matsumoto, Hiroshima
  15. * University and The University of Tokyo.
  16. * All rights reserved.
  17. *
  18. * The 3-clause BSD License is applied to this software, see
  19. * LICENSE.txt
  20. */
  21. #ifndef DSFMT_COMMON_H
  22. #define DSFMT_COMMON_H
  23. #include "dSFMT.h"
  24. #if defined(HAVE_SSE2)
  25. # include <emmintrin.h>
  26. union X128I_T {
  27. uint64_t u[2];
  28. __m128i i128;
  29. };
  30. union X128D_T {
  31. double d[2];
  32. __m128d d128;
  33. };
  34. /** mask data for sse2 */
  35. static const union X128I_T sse2_param_mask = {{DSFMT_MSK1, DSFMT_MSK2}};
  36. #endif
  37. #if defined(HAVE_ALTIVEC)
  38. inline static void do_recursion(w128_t *r, w128_t *a, w128_t * b,
  39. w128_t *lung) {
  40. const vector unsigned char sl1 = ALTI_SL1;
  41. const vector unsigned char sl1_perm = ALTI_SL1_PERM;
  42. const vector unsigned int sl1_msk = ALTI_SL1_MSK;
  43. const vector unsigned char sr1 = ALTI_SR;
  44. const vector unsigned char sr1_perm = ALTI_SR_PERM;
  45. const vector unsigned int sr1_msk = ALTI_SR_MSK;
  46. const vector unsigned char perm = ALTI_PERM;
  47. const vector unsigned int msk1 = ALTI_MSK;
  48. vector unsigned int w, x, y, z;
  49. z = a->s;
  50. w = lung->s;
  51. x = vec_perm(w, (vector unsigned int)perm, perm);
  52. y = vec_perm(z, (vector unsigned int)sl1_perm, sl1_perm);
  53. y = vec_sll(y, sl1);
  54. y = vec_and(y, sl1_msk);
  55. w = vec_xor(x, b->s);
  56. w = vec_xor(w, y);
  57. x = vec_perm(w, (vector unsigned int)sr1_perm, sr1_perm);
  58. x = vec_srl(x, sr1);
  59. x = vec_and(x, sr1_msk);
  60. y = vec_and(w, msk1);
  61. z = vec_xor(z, y);
  62. r->s = vec_xor(z, x);
  63. lung->s = w;
  64. }
  65. #elif defined(HAVE_SSE2)
  66. /**
  67. * This function represents the recursion formula.
  68. * @param r output 128-bit
  69. * @param a a 128-bit part of the internal state array
  70. * @param b a 128-bit part of the internal state array
  71. * @param d a 128-bit part of the internal state array (I/O)
  72. */
  73. inline static void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *u) {
  74. __m128i v, w, x, y, z;
  75. x = a->si;
  76. z = _mm_slli_epi64(x, DSFMT_SL1);
  77. y = _mm_shuffle_epi32(u->si, SSE2_SHUFF);
  78. z = _mm_xor_si128(z, b->si);
  79. y = _mm_xor_si128(y, z);
  80. v = _mm_srli_epi64(y, DSFMT_SR);
  81. w = _mm_and_si128(y, sse2_param_mask.i128);
  82. v = _mm_xor_si128(v, x);
  83. v = _mm_xor_si128(v, w);
  84. r->si = v;
  85. u->si = y;
  86. }
  87. #else
  88. /**
  89. * This function represents the recursion formula.
  90. * @param r output 128-bit
  91. * @param a a 128-bit part of the internal state array
  92. * @param b a 128-bit part of the internal state array
  93. * @param lung a 128-bit part of the internal state array (I/O)
  94. */
  95. inline static void do_recursion(w128_t *r, w128_t *a, w128_t * b,
  96. w128_t *lung) {
  97. uint64_t t0, t1, L0, L1;
  98. t0 = a->u[0];
  99. t1 = a->u[1];
  100. L0 = lung->u[0];
  101. L1 = lung->u[1];
  102. lung->u[0] = (t0 << DSFMT_SL1) ^ (L1 >> 32) ^ (L1 << 32) ^ b->u[0];
  103. lung->u[1] = (t1 << DSFMT_SL1) ^ (L0 >> 32) ^ (L0 << 32) ^ b->u[1];
  104. r->u[0] = (lung->u[0] >> DSFMT_SR) ^ (lung->u[0] & DSFMT_MSK1) ^ t0;
  105. r->u[1] = (lung->u[1] >> DSFMT_SR) ^ (lung->u[1] & DSFMT_MSK2) ^ t1;
  106. }
  107. #endif
  108. #endif