fpu-387.h 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506
  1. /* FPU-related code for x86 and x86_64 processors.
  2. Copyright (C) 2005-2015 Free Software Foundation, Inc.
  3. Contributed by Francois-Xavier Coudert <coudert@clipper.ens.fr>
  4. This file is part of the GNU Fortran 95 runtime library (libgfortran).
  5. Libgfortran is free software; you can redistribute it and/or
  6. modify it under the terms of the GNU General Public
  7. License as published by the Free Software Foundation; either
  8. version 3 of the License, or (at your option) any later version.
  9. Libgfortran is distributed in the hope that it will be useful,
  10. but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. Under Section 7 of GPL version 3, you are granted additional
  14. permissions described in the GCC Runtime Library Exception, version
  15. 3.1, as published by the Free Software Foundation.
  16. You should have received a copy of the GNU General Public License and
  17. a copy of the GCC Runtime Library Exception along with this program;
  18. see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
  19. <http://www.gnu.org/licenses/>. */
  20. #ifndef __SSE_MATH__
  21. #include "cpuid.h"
  22. #endif
  23. static int
  24. has_sse (void)
  25. {
  26. #ifndef __SSE_MATH__
  27. unsigned int eax, ebx, ecx, edx;
  28. if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx))
  29. return 0;
  30. return edx & bit_SSE;
  31. #else
  32. return 1;
  33. #endif
  34. }
  35. /* i387 exceptions -- see linux <fpu_control.h> header file for details. */
  36. #define _FPU_MASK_IM 0x01
  37. #define _FPU_MASK_DM 0x02
  38. #define _FPU_MASK_ZM 0x04
  39. #define _FPU_MASK_OM 0x08
  40. #define _FPU_MASK_UM 0x10
  41. #define _FPU_MASK_PM 0x20
  42. #define _FPU_MASK_ALL 0x3f
  43. #define _FPU_EX_ALL 0x3f
  44. /* i387 rounding modes. */
  45. #define _FPU_RC_NEAREST 0x0
  46. #define _FPU_RC_DOWN 0x1
  47. #define _FPU_RC_UP 0x2
  48. #define _FPU_RC_ZERO 0x3
  49. #define _FPU_RC_MASK 0x3
  50. /* Enable flush to zero mode. */
  51. #define MXCSR_FTZ (1 << 15)
  52. /* This structure corresponds to the layout of the block
  53. written by FSTENV. */
  54. typedef struct
  55. {
  56. unsigned short int __control_word;
  57. unsigned short int __unused1;
  58. unsigned short int __status_word;
  59. unsigned short int __unused2;
  60. unsigned short int __tags;
  61. unsigned short int __unused3;
  62. unsigned int __eip;
  63. unsigned short int __cs_selector;
  64. unsigned short int __opcode;
  65. unsigned int __data_offset;
  66. unsigned short int __data_selector;
  67. unsigned short int __unused5;
  68. unsigned int __mxcsr;
  69. }
  70. my_fenv_t;
  71. /* Check we can actually store the FPU state in the allocated size. */
  72. _Static_assert (sizeof(my_fenv_t) <= (size_t) GFC_FPE_STATE_BUFFER_SIZE,
  73. "GFC_FPE_STATE_BUFFER_SIZE is too small");
  74. /* Raise the supported floating-point exceptions from EXCEPTS. Other
  75. bits in EXCEPTS are ignored. Code originally borrowed from
  76. libatomic/config/x86/fenv.c. */
  77. static void
  78. local_feraiseexcept (int excepts)
  79. {
  80. if (excepts & _FPU_MASK_IM)
  81. {
  82. float f = 0.0f;
  83. #ifdef __SSE_MATH__
  84. volatile float r __attribute__ ((unused));
  85. __asm__ __volatile__ ("%vdivss\t{%0, %d0|%d0, %0}" : "+x" (f));
  86. r = f; /* Needed to trigger exception. */
  87. #else
  88. __asm__ __volatile__ ("fdiv\t{%y0, %0|%0, %y0}" : "+t" (f));
  89. /* No need for fwait, exception is triggered by emitted fstp. */
  90. #endif
  91. }
  92. if (excepts & _FPU_MASK_DM)
  93. {
  94. my_fenv_t temp;
  95. __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
  96. temp.__status_word |= _FPU_MASK_DM;
  97. __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
  98. __asm__ __volatile__ ("fwait");
  99. }
  100. if (excepts & _FPU_MASK_ZM)
  101. {
  102. float f = 1.0f, g = 0.0f;
  103. #ifdef __SSE_MATH__
  104. volatile float r __attribute__ ((unused));
  105. __asm__ __volatile__ ("%vdivss\t{%1, %d0|%d0, %1}" : "+x" (f) : "xm" (g));
  106. r = f; /* Needed to trigger exception. */
  107. #else
  108. __asm__ __volatile__ ("fdivs\t%1" : "+t" (f) : "m" (g));
  109. /* No need for fwait, exception is triggered by emitted fstp. */
  110. #endif
  111. }
  112. if (excepts & _FPU_MASK_OM)
  113. {
  114. my_fenv_t temp;
  115. __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
  116. temp.__status_word |= _FPU_MASK_OM;
  117. __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
  118. __asm__ __volatile__ ("fwait");
  119. }
  120. if (excepts & _FPU_MASK_UM)
  121. {
  122. my_fenv_t temp;
  123. __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
  124. temp.__status_word |= _FPU_MASK_UM;
  125. __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
  126. __asm__ __volatile__ ("fwait");
  127. }
  128. if (excepts & _FPU_MASK_PM)
  129. {
  130. float f = 1.0f, g = 3.0f;
  131. #ifdef __SSE_MATH__
  132. volatile float r __attribute__ ((unused));
  133. __asm__ __volatile__ ("%vdivss\t{%1, %d0|%d0, %1}" : "+x" (f) : "xm" (g));
  134. r = f; /* Needed to trigger exception. */
  135. #else
  136. __asm__ __volatile__ ("fdivs\t%1" : "+t" (f) : "m" (g));
  137. /* No need for fwait, exception is triggered by emitted fstp. */
  138. #endif
  139. }
  140. }
  141. void
  142. set_fpu_trap_exceptions (int trap, int notrap)
  143. {
  144. int exc_set = 0, exc_clr = 0;
  145. unsigned short cw;
  146. if (trap & GFC_FPE_INVALID) exc_set |= _FPU_MASK_IM;
  147. if (trap & GFC_FPE_DENORMAL) exc_set |= _FPU_MASK_DM;
  148. if (trap & GFC_FPE_ZERO) exc_set |= _FPU_MASK_ZM;
  149. if (trap & GFC_FPE_OVERFLOW) exc_set |= _FPU_MASK_OM;
  150. if (trap & GFC_FPE_UNDERFLOW) exc_set |= _FPU_MASK_UM;
  151. if (trap & GFC_FPE_INEXACT) exc_set |= _FPU_MASK_PM;
  152. if (notrap & GFC_FPE_INVALID) exc_clr |= _FPU_MASK_IM;
  153. if (notrap & GFC_FPE_DENORMAL) exc_clr |= _FPU_MASK_DM;
  154. if (notrap & GFC_FPE_ZERO) exc_clr |= _FPU_MASK_ZM;
  155. if (notrap & GFC_FPE_OVERFLOW) exc_clr |= _FPU_MASK_OM;
  156. if (notrap & GFC_FPE_UNDERFLOW) exc_clr |= _FPU_MASK_UM;
  157. if (notrap & GFC_FPE_INEXACT) exc_clr |= _FPU_MASK_PM;
  158. __asm__ __volatile__ ("fstcw\t%0" : "=m" (cw));
  159. cw |= exc_clr;
  160. cw &= ~exc_set;
  161. __asm__ __volatile__ ("fnclex\n\tfldcw\t%0" : : "m" (cw));
  162. if (has_sse())
  163. {
  164. unsigned int cw_sse;
  165. __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
  166. /* The SSE exception masks are shifted by 7 bits. */
  167. cw_sse |= (exc_clr << 7);
  168. cw_sse &= ~(exc_set << 7);
  169. /* Clear stalled exception flags. */
  170. cw_sse &= ~_FPU_EX_ALL;
  171. __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
  172. }
  173. }
  174. void
  175. set_fpu (void)
  176. {
  177. set_fpu_trap_exceptions (options.fpe, 0);
  178. }
  179. int
  180. get_fpu_trap_exceptions (void)
  181. {
  182. int res = 0;
  183. unsigned short cw;
  184. __asm__ __volatile__ ("fstcw\t%0" : "=m" (cw));
  185. cw &= _FPU_MASK_ALL;
  186. if (has_sse())
  187. {
  188. unsigned int cw_sse;
  189. __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
  190. /* The SSE exception masks are shifted by 7 bits. */
  191. cw = cw | ((cw_sse >> 7) & _FPU_MASK_ALL);
  192. }
  193. if (~cw & _FPU_MASK_IM) res |= GFC_FPE_INVALID;
  194. if (~cw & _FPU_MASK_DM) res |= GFC_FPE_DENORMAL;
  195. if (~cw & _FPU_MASK_ZM) res |= GFC_FPE_ZERO;
  196. if (~cw & _FPU_MASK_OM) res |= GFC_FPE_OVERFLOW;
  197. if (~cw & _FPU_MASK_UM) res |= GFC_FPE_UNDERFLOW;
  198. if (~cw & _FPU_MASK_PM) res |= GFC_FPE_INEXACT;
  199. return res;
  200. }
  201. int
  202. support_fpu_trap (int flag __attribute__((unused)))
  203. {
  204. return 1;
  205. }
  206. int
  207. get_fpu_except_flags (void)
  208. {
  209. unsigned short cw;
  210. int excepts;
  211. int result = 0;
  212. __asm__ __volatile__ ("fnstsw\t%0" : "=am" (cw));
  213. excepts = cw;
  214. if (has_sse())
  215. {
  216. unsigned int cw_sse;
  217. __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
  218. excepts |= cw_sse;
  219. }
  220. excepts &= _FPU_EX_ALL;
  221. if (excepts & _FPU_MASK_IM) result |= GFC_FPE_INVALID;
  222. if (excepts & _FPU_MASK_DM) result |= GFC_FPE_DENORMAL;
  223. if (excepts & _FPU_MASK_ZM) result |= GFC_FPE_ZERO;
  224. if (excepts & _FPU_MASK_OM) result |= GFC_FPE_OVERFLOW;
  225. if (excepts & _FPU_MASK_UM) result |= GFC_FPE_UNDERFLOW;
  226. if (excepts & _FPU_MASK_PM) result |= GFC_FPE_INEXACT;
  227. return result;
  228. }
  229. void
  230. set_fpu_except_flags (int set, int clear)
  231. {
  232. my_fenv_t temp;
  233. int exc_set = 0, exc_clr = 0;
  234. /* Translate from GFC_PE_* values to _FPU_MASK_* values. */
  235. if (set & GFC_FPE_INVALID)
  236. exc_set |= _FPU_MASK_IM;
  237. if (clear & GFC_FPE_INVALID)
  238. exc_clr |= _FPU_MASK_IM;
  239. if (set & GFC_FPE_DENORMAL)
  240. exc_set |= _FPU_MASK_DM;
  241. if (clear & GFC_FPE_DENORMAL)
  242. exc_clr |= _FPU_MASK_DM;
  243. if (set & GFC_FPE_ZERO)
  244. exc_set |= _FPU_MASK_ZM;
  245. if (clear & GFC_FPE_ZERO)
  246. exc_clr |= _FPU_MASK_ZM;
  247. if (set & GFC_FPE_OVERFLOW)
  248. exc_set |= _FPU_MASK_OM;
  249. if (clear & GFC_FPE_OVERFLOW)
  250. exc_clr |= _FPU_MASK_OM;
  251. if (set & GFC_FPE_UNDERFLOW)
  252. exc_set |= _FPU_MASK_UM;
  253. if (clear & GFC_FPE_UNDERFLOW)
  254. exc_clr |= _FPU_MASK_UM;
  255. if (set & GFC_FPE_INEXACT)
  256. exc_set |= _FPU_MASK_PM;
  257. if (clear & GFC_FPE_INEXACT)
  258. exc_clr |= _FPU_MASK_PM;
  259. /* Change the flags. This is tricky on 387 (unlike SSE), because we have
  260. FNSTSW but no FLDSW instruction. */
  261. __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
  262. temp.__status_word &= ~exc_clr;
  263. __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
  264. /* Change the flags on SSE. */
  265. if (has_sse())
  266. {
  267. unsigned int cw_sse;
  268. __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
  269. cw_sse &= ~exc_clr;
  270. __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
  271. }
  272. local_feraiseexcept (exc_set);
  273. }
  274. int
  275. support_fpu_flag (int flag __attribute__((unused)))
  276. {
  277. return 1;
  278. }
  279. void
  280. set_fpu_rounding_mode (int round)
  281. {
  282. int round_mode;
  283. unsigned short cw;
  284. switch (round)
  285. {
  286. case GFC_FPE_TONEAREST:
  287. round_mode = _FPU_RC_NEAREST;
  288. break;
  289. case GFC_FPE_UPWARD:
  290. round_mode = _FPU_RC_UP;
  291. break;
  292. case GFC_FPE_DOWNWARD:
  293. round_mode = _FPU_RC_DOWN;
  294. break;
  295. case GFC_FPE_TOWARDZERO:
  296. round_mode = _FPU_RC_ZERO;
  297. break;
  298. default:
  299. return; /* Should be unreachable. */
  300. }
  301. __asm__ __volatile__ ("fnstcw\t%0" : "=m" (cw));
  302. /* The x87 round control bits are shifted by 10 bits. */
  303. cw &= ~(_FPU_RC_MASK << 10);
  304. cw |= round_mode << 10;
  305. __asm__ __volatile__ ("fldcw\t%0" : : "m" (cw));
  306. if (has_sse())
  307. {
  308. unsigned int cw_sse;
  309. __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
  310. /* The SSE round control bits are shifted by 13 bits. */
  311. cw_sse &= ~(_FPU_RC_MASK << 13);
  312. cw_sse |= round_mode << 13;
  313. __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
  314. }
  315. }
  316. int
  317. get_fpu_rounding_mode (void)
  318. {
  319. int round_mode;
  320. #ifdef __SSE_MATH__
  321. unsigned int cw;
  322. __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw));
  323. /* The SSE round control bits are shifted by 13 bits. */
  324. round_mode = cw >> 13;
  325. #else
  326. unsigned short cw;
  327. __asm__ __volatile__ ("fnstcw\t%0" : "=m" (cw));
  328. /* The x87 round control bits are shifted by 10 bits. */
  329. round_mode = cw >> 10;
  330. #endif
  331. round_mode &= _FPU_RC_MASK;
  332. switch (round_mode)
  333. {
  334. case _FPU_RC_NEAREST:
  335. return GFC_FPE_TONEAREST;
  336. case _FPU_RC_UP:
  337. return GFC_FPE_UPWARD;
  338. case _FPU_RC_DOWN:
  339. return GFC_FPE_DOWNWARD;
  340. case _FPU_RC_ZERO:
  341. return GFC_FPE_TOWARDZERO;
  342. default:
  343. return 0; /* Should be unreachable. */
  344. }
  345. }
  346. int
  347. support_fpu_rounding_mode (int mode __attribute__((unused)))
  348. {
  349. return 1;
  350. }
  351. void
  352. get_fpu_state (void *state)
  353. {
  354. my_fenv_t *envp = state;
  355. __asm__ __volatile__ ("fnstenv\t%0" : "=m" (*envp));
  356. /* fnstenv has the side effect of masking all exceptions, so we need
  357. to restore the control word after that. */
  358. __asm__ __volatile__ ("fldcw\t%0" : : "m" (envp->__control_word));
  359. if (has_sse())
  360. __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (envp->__mxcsr));
  361. }
  362. void
  363. set_fpu_state (void *state)
  364. {
  365. my_fenv_t *envp = state;
  366. /* glibc sources (sysdeps/x86_64/fpu/fesetenv.c) do something more
  367. complex than this, but I think it suffices in our case. */
  368. __asm__ __volatile__ ("fldenv\t%0" : : "m" (*envp));
  369. if (has_sse())
  370. __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (envp->__mxcsr));
  371. }
  372. int
  373. support_fpu_underflow_control (int kind)
  374. {
  375. if (!has_sse())
  376. return 0;
  377. return (kind == 4 || kind == 8) ? 1 : 0;
  378. }
  379. int
  380. get_fpu_underflow_mode (void)
  381. {
  382. unsigned int cw_sse;
  383. if (!has_sse())
  384. return 1;
  385. __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
  386. /* Return 0 for abrupt underflow (flush to zero), 1 for gradual underflow. */
  387. return (cw_sse & MXCSR_FTZ) ? 0 : 1;
  388. }
  389. void
  390. set_fpu_underflow_mode (int gradual)
  391. {
  392. unsigned int cw_sse;
  393. if (!has_sse())
  394. return;
  395. __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
  396. if (gradual)
  397. cw_sse &= ~MXCSR_FTZ;
  398. else
  399. cw_sse |= MXCSR_FTZ;
  400. __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
  401. }