oacc-ptx.h 14 KB


  1. /* Copyright (C) 2014-2015 Free Software Foundation, Inc.
  2. Contributed by Mentor Embedded.
  3. This file is part of the GNU Offloading and Multi Processing Library
  4. (libgomp).
  5. Libgomp is free software; you can redistribute it and/or modify it
  6. under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 3, or (at your option)
  8. any later version.
  9. Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
  10. WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  11. FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  12. more details.
  13. Under Section 7 of GPL version 3, you are granted additional
  14. permissions described in the GCC Runtime Library Exception, version
  15. 3.1, as published by the Free Software Foundation.
  16. You should have received a copy of the GNU General Public License and
  17. a copy of the GCC Runtime Library Exception along with this program;
  18. see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
  19. <http://www.gnu.org/licenses/>. */
  20. #define ABORT_PTX \
  21. ".version 3.1\n" \
  22. ".target sm_30\n" \
  23. ".address_size 64\n" \
  24. ".visible .func abort;\n" \
  25. ".visible .func abort\n" \
  26. "{\n" \
  27. "trap;\n" \
  28. "ret;\n" \
  29. "}\n" \
  30. ".visible .func _gfortran_abort;\n" \
  31. ".visible .func _gfortran_abort\n" \
  32. "{\n" \
  33. "trap;\n" \
  34. "ret;\n" \
  35. "}\n" \
  36. /* Generated with:
  37. $ echo 'int acc_on_device(int d) { return __builtin_acc_on_device(d); } int acc_on_device_h_(int *d) { return acc_on_device(*d); }' | accel-gcc/xgcc -Baccel-gcc -x c - -o - -S -m64 -O3 -fno-builtin-acc_on_device -fno-inline
  38. */
  39. #define ACC_ON_DEVICE_PTX \
  40. " .version 3.1\n" \
  41. " .target sm_30\n" \
  42. " .address_size 64\n" \
  43. ".visible .func (.param.u32 %out_retval)acc_on_device(.param.u32 %in_ar1);\n" \
  44. ".visible .func (.param.u32 %out_retval)acc_on_device(.param.u32 %in_ar1)\n" \
  45. "{\n" \
  46. " .reg.u32 %ar1;\n" \
  47. ".reg.u32 %retval;\n" \
  48. " .reg.u64 %hr10;\n" \
  49. " .reg.u32 %r24;\n" \
  50. " .reg.u32 %r25;\n" \
  51. " .reg.pred %r27;\n" \
  52. " .reg.u32 %r30;\n" \
  53. " ld.param.u32 %ar1, [%in_ar1];\n" \
  54. " mov.u32 %r24, %ar1;\n" \
  55. " setp.ne.u32 %r27,%r24,4;\n" \
  56. " set.u32.eq.u32 %r30,%r24,5;\n" \
  57. " neg.s32 %r25, %r30;\n" \
  58. " @%r27 bra $L3;\n" \
  59. " mov.u32 %r25, 1;\n" \
  60. "$L3:\n" \
  61. " mov.u32 %retval, %r25;\n" \
  62. " st.param.u32 [%out_retval], %retval;\n" \
  63. " ret;\n" \
  64. " }\n" \
  65. ".visible .func (.param.u32 %out_retval)acc_on_device_h_(.param.u64 %in_ar1);\n" \
  66. ".visible .func (.param.u32 %out_retval)acc_on_device_h_(.param.u64 %in_ar1)\n" \
  67. "{\n" \
  68. " .reg.u64 %ar1;\n" \
  69. ".reg.u32 %retval;\n" \
  70. " .reg.u64 %hr10;\n" \
  71. " .reg.u64 %r25;\n" \
  72. " .reg.u32 %r26;\n" \
  73. " .reg.u32 %r27;\n" \
  74. " ld.param.u64 %ar1, [%in_ar1];\n" \
  75. " mov.u64 %r25, %ar1;\n" \
  76. " ld.u32 %r26, [%r25];\n" \
  77. " {\n" \
  78. " .param.u32 %retval_in;\n" \
  79. " {\n" \
  80. " .param.u32 %out_arg0;\n" \
  81. " st.param.u32 [%out_arg0], %r26;\n" \
  82. " call (%retval_in), acc_on_device, (%out_arg0);\n" \
  83. " }\n" \
  84. " ld.param.u32 %r27, [%retval_in];\n" \
  85. "}\n" \
  86. " mov.u32 %retval, %r27;\n" \
  87. " st.param.u32 [%out_retval], %retval;\n" \
  88. " ret;\n" \
  89. " }"
  90. #define GOACC_INTERNAL_PTX \
  91. ".version 3.1\n" \
  92. ".target sm_30\n" \
  93. ".address_size 64\n" \
  94. ".visible .func (.param .u32 %out_retval) GOACC_tid (.param .u32 %in_ar1);\n" \
  95. ".visible .func (.param .u32 %out_retval) GOACC_ntid (.param .u32 %in_ar1);\n" \
  96. ".visible .func (.param .u32 %out_retval) GOACC_ctaid (.param .u32 %in_ar1);\n" \
  97. ".visible .func (.param .u32 %out_retval) GOACC_nctaid (.param .u32 %in_ar1);\n" \
  98. ".visible .func (.param .u32 %out_retval) GOACC_get_num_threads;\n" \
  99. ".visible .func (.param .u32 %out_retval) GOACC_get_thread_num;\n" \
  100. ".extern .func abort;\n" \
  101. ".visible .func (.param .u32 %out_retval) GOACC_tid (.param .u32 %in_ar1)\n" \
  102. "{\n" \
  103. ".reg .u32 %ar1;\n" \
  104. ".reg .u32 %retval;\n" \
  105. ".reg .u64 %hr10;\n" \
  106. ".reg .u32 %r22;\n" \
  107. ".reg .u32 %r23;\n" \
  108. ".reg .u32 %r24;\n" \
  109. ".reg .u32 %r25;\n" \
  110. ".reg .u32 %r26;\n" \
  111. ".reg .u32 %r27;\n" \
  112. ".reg .u32 %r28;\n" \
  113. ".reg .u32 %r29;\n" \
  114. ".reg .pred %r30;\n" \
  115. ".reg .u32 %r31;\n" \
  116. ".reg .pred %r32;\n" \
  117. ".reg .u32 %r33;\n" \
  118. ".reg .pred %r34;\n" \
  119. ".local .align 8 .b8 %frame[4];\n" \
  120. "ld.param.u32 %ar1,[%in_ar1];\n" \
  121. "mov.u32 %r27,%ar1;\n" \
  122. "st.local.u32 [%frame],%r27;\n" \
  123. "ld.local.u32 %r28,[%frame];\n" \
  124. "mov.u32 %r29,1;\n" \
  125. "setp.eq.u32 %r30,%r28,%r29;\n" \
  126. "@%r30 bra $L4;\n" \
  127. "mov.u32 %r31,2;\n" \
  128. "setp.eq.u32 %r32,%r28,%r31;\n" \
  129. "@%r32 bra $L5;\n" \
  130. "mov.u32 %r33,0;\n" \
  131. "setp.eq.u32 %r34,%r28,%r33;\n" \
  132. "@!%r34 bra $L8;\n" \
  133. "mov.u32 %r23,%tid.x;\n" \
  134. "mov.u32 %r22,%r23;\n" \
  135. "bra $L7;\n" \
  136. "$L4:\n" \
  137. "mov.u32 %r24,%tid.y;\n" \
  138. "mov.u32 %r22,%r24;\n" \
  139. "bra $L7;\n" \
  140. "$L5:\n" \
  141. "mov.u32 %r25,%tid.z;\n" \
  142. "mov.u32 %r22,%r25;\n" \
  143. "bra $L7;\n" \
  144. "$L8:\n" \
  145. "{\n" \
  146. "{\n" \
  147. "call abort;\n" \
  148. "}\n" \
  149. "}\n" \
  150. "$L7:\n" \
  151. "mov.u32 %r26,%r22;\n" \
  152. "mov.u32 %retval,%r26;\n" \
  153. "st.param.u32 [%out_retval],%retval;\n" \
  154. "ret;\n" \
  155. "}\n" \
  156. ".visible .func (.param .u32 %out_retval) GOACC_ntid (.param .u32 %in_ar1)\n" \
  157. "{\n" \
  158. ".reg .u32 %ar1;\n" \
  159. ".reg .u32 %retval;\n" \
  160. ".reg .u64 %hr10;\n" \
  161. ".reg .u32 %r22;\n" \
  162. ".reg .u32 %r23;\n" \
  163. ".reg .u32 %r24;\n" \
  164. ".reg .u32 %r25;\n" \
  165. ".reg .u32 %r26;\n" \
  166. ".reg .u32 %r27;\n" \
  167. ".reg .u32 %r28;\n" \
  168. ".reg .u32 %r29;\n" \
  169. ".reg .pred %r30;\n" \
  170. ".reg .u32 %r31;\n" \
  171. ".reg .pred %r32;\n" \
  172. ".reg .u32 %r33;\n" \
  173. ".reg .pred %r34;\n" \
  174. ".local .align 8 .b8 %frame[4];\n" \
  175. "ld.param.u32 %ar1,[%in_ar1];\n" \
  176. "mov.u32 %r27,%ar1;\n" \
  177. "st.local.u32 [%frame],%r27;\n" \
  178. "ld.local.u32 %r28,[%frame];\n" \
  179. "mov.u32 %r29,1;\n" \
  180. "setp.eq.u32 %r30,%r28,%r29;\n" \
  181. "@%r30 bra $L11;\n" \
  182. "mov.u32 %r31,2;\n" \
  183. "setp.eq.u32 %r32,%r28,%r31;\n" \
  184. "@%r32 bra $L12;\n" \
  185. "mov.u32 %r33,0;\n" \
  186. "setp.eq.u32 %r34,%r28,%r33;\n" \
  187. "@!%r34 bra $L15;\n" \
  188. "mov.u32 %r23,%ntid.x;\n" \
  189. "mov.u32 %r22,%r23;\n" \
  190. "bra $L14;\n" \
  191. "$L11:\n" \
  192. "mov.u32 %r24,%ntid.y;\n" \
  193. "mov.u32 %r22,%r24;\n" \
  194. "bra $L14;\n" \
  195. "$L12:\n" \
  196. "mov.u32 %r25,%ntid.z;\n" \
  197. "mov.u32 %r22,%r25;\n" \
  198. "bra $L14;\n" \
  199. "$L15:\n" \
  200. "{\n" \
  201. "{\n" \
  202. "call abort;\n" \
  203. "}\n" \
  204. "}\n" \
  205. "$L14:\n" \
  206. "mov.u32 %r26,%r22;\n" \
  207. "mov.u32 %retval,%r26;\n" \
  208. "st.param.u32 [%out_retval],%retval;\n" \
  209. "ret;\n" \
  210. "}\n" \
  211. ".visible .func (.param .u32 %out_retval) GOACC_ctaid (.param .u32 %in_ar1)\n" \
  212. "{\n" \
  213. ".reg .u32 %ar1;\n" \
  214. ".reg .u32 %retval;\n" \
  215. ".reg .u64 %hr10;\n" \
  216. ".reg .u32 %r22;\n" \
  217. ".reg .u32 %r23;\n" \
  218. ".reg .u32 %r24;\n" \
  219. ".reg .u32 %r25;\n" \
  220. ".reg .u32 %r26;\n" \
  221. ".reg .u32 %r27;\n" \
  222. ".reg .u32 %r28;\n" \
  223. ".reg .u32 %r29;\n" \
  224. ".reg .pred %r30;\n" \
  225. ".reg .u32 %r31;\n" \
  226. ".reg .pred %r32;\n" \
  227. ".reg .u32 %r33;\n" \
  228. ".reg .pred %r34;\n" \
  229. ".local .align 8 .b8 %frame[4];\n" \
  230. "ld.param.u32 %ar1,[%in_ar1];\n" \
  231. "mov.u32 %r27,%ar1;\n" \
  232. "st.local.u32 [%frame],%r27;\n" \
  233. "ld.local.u32 %r28,[%frame];\n" \
  234. "mov.u32 %r29,1;\n" \
  235. "setp.eq.u32 %r30,%r28,%r29;\n" \
  236. "@%r30 bra $L18;\n" \
  237. "mov.u32 %r31,2;\n" \
  238. "setp.eq.u32 %r32,%r28,%r31;\n" \
  239. "@%r32 bra $L19;\n" \
  240. "mov.u32 %r33,0;\n" \
  241. "setp.eq.u32 %r34,%r28,%r33;\n" \
  242. "@!%r34 bra $L22;\n" \
  243. "mov.u32 %r23,%ctaid.x;\n" \
  244. "mov.u32 %r22,%r23;\n" \
  245. "bra $L21;\n" \
  246. "$L18:\n" \
  247. "mov.u32 %r24,%ctaid.y;\n" \
  248. "mov.u32 %r22,%r24;\n" \
  249. "bra $L21;\n" \
  250. "$L19:\n" \
  251. "mov.u32 %r25,%ctaid.z;\n" \
  252. "mov.u32 %r22,%r25;\n" \
  253. "bra $L21;\n" \
  254. "$L22:\n" \
  255. "{\n" \
  256. "{\n" \
  257. "call abort;\n" \
  258. "}\n" \
  259. "}\n" \
  260. "$L21:\n" \
  261. "mov.u32 %r26,%r22;\n" \
  262. "mov.u32 %retval,%r26;\n" \
  263. "st.param.u32 [%out_retval],%retval;\n" \
  264. "ret;\n" \
  265. "}\n" \
  266. ".visible .func (.param .u32 %out_retval) GOACC_nctaid (.param .u32 %in_ar1)\n" \
  267. "{\n" \
  268. ".reg .u32 %ar1;\n" \
  269. ".reg .u32 %retval;\n" \
  270. ".reg .u64 %hr10;\n" \
  271. ".reg .u32 %r22;\n" \
  272. ".reg .u32 %r23;\n" \
  273. ".reg .u32 %r24;\n" \
  274. ".reg .u32 %r25;\n" \
  275. ".reg .u32 %r26;\n" \
  276. ".reg .u32 %r27;\n" \
  277. ".reg .u32 %r28;\n" \
  278. ".reg .u32 %r29;\n" \
  279. ".reg .pred %r30;\n" \
  280. ".reg .u32 %r31;\n" \
  281. ".reg .pred %r32;\n" \
  282. ".reg .u32 %r33;\n" \
  283. ".reg .pred %r34;\n" \
  284. ".local .align 8 .b8 %frame[4];\n" \
  285. "ld.param.u32 %ar1,[%in_ar1];\n" \
  286. "mov.u32 %r27,%ar1;\n" \
  287. "st.local.u32 [%frame],%r27;\n" \
  288. "ld.local.u32 %r28,[%frame];\n" \
  289. "mov.u32 %r29,1;\n" \
  290. "setp.eq.u32 %r30,%r28,%r29;\n" \
  291. "@%r30 bra $L25;\n" \
  292. "mov.u32 %r31,2;\n" \
  293. "setp.eq.u32 %r32,%r28,%r31;\n" \
  294. "@%r32 bra $L26;\n" \
  295. "mov.u32 %r33,0;\n" \
  296. "setp.eq.u32 %r34,%r28,%r33;\n" \
  297. "@!%r34 bra $L29;\n" \
  298. "mov.u32 %r23,%nctaid.x;\n" \
  299. "mov.u32 %r22,%r23;\n" \
  300. "bra $L28;\n" \
  301. "$L25:\n" \
  302. "mov.u32 %r24,%nctaid.y;\n" \
  303. "mov.u32 %r22,%r24;\n" \
  304. "bra $L28;\n" \
  305. "$L26:\n" \
  306. "mov.u32 %r25,%nctaid.z;\n" \
  307. "mov.u32 %r22,%r25;\n" \
  308. "bra $L28;\n" \
  309. "$L29:\n" \
  310. "{\n" \
  311. "{\n" \
  312. "call abort;\n" \
  313. "}\n" \
  314. "}\n" \
  315. "$L28:\n" \
  316. "mov.u32 %r26,%r22;\n" \
  317. "mov.u32 %retval,%r26;\n" \
  318. "st.param.u32 [%out_retval],%retval;\n" \
  319. "ret;\n" \
  320. "}\n" \
  321. ".visible .func (.param .u32 %out_retval) GOACC_get_num_threads\n" \
  322. "{\n" \
  323. ".reg .u32 %retval;\n" \
  324. ".reg .u64 %hr10;\n" \
  325. ".reg .u32 %r22;\n" \
  326. ".reg .u32 %r23;\n" \
  327. ".reg .u32 %r24;\n" \
  328. ".reg .u32 %r25;\n" \
  329. ".reg .u32 %r26;\n" \
  330. ".reg .u32 %r27;\n" \
  331. ".reg .u32 %r28;\n" \
  332. ".reg .u32 %r29;\n" \
  333. "mov.u32 %r26,0;\n" \
  334. "{\n" \
  335. ".param .u32 %retval_in;\n" \
  336. "{\n" \
  337. ".param .u32 %out_arg0;\n" \
  338. "st.param.u32 [%out_arg0],%r26;\n" \
  339. "call (%retval_in),GOACC_ntid,(%out_arg0);\n" \
  340. "}\n" \
  341. "ld.param.u32 %r27,[%retval_in];\n" \
  342. "}\n" \
  343. "mov.u32 %r22,%r27;\n" \
  344. "mov.u32 %r28,0;\n" \
  345. "{\n" \
  346. ".param .u32 %retval_in;\n" \
  347. "{\n" \
  348. ".param .u32 %out_arg0;\n" \
  349. "st.param.u32 [%out_arg0],%r28;\n" \
  350. "call (%retval_in),GOACC_nctaid,(%out_arg0);\n" \
  351. "}\n" \
  352. "ld.param.u32 %r29,[%retval_in];\n" \
  353. "}\n" \
  354. "mov.u32 %r23,%r29;\n" \
  355. "mul.lo.u32 %r24,%r22,%r23;\n" \
  356. "mov.u32 %r25,%r24;\n" \
  357. "mov.u32 %retval,%r25;\n" \
  358. "st.param.u32 [%out_retval],%retval;\n" \
  359. "ret;\n" \
  360. "}\n" \
  361. ".visible .func (.param .u32 %out_retval) GOACC_get_thread_num\n" \
  362. "{\n" \
  363. ".reg .u32 %retval;\n" \
  364. ".reg .u64 %hr10;\n" \
  365. ".reg .u32 %r22;\n" \
  366. ".reg .u32 %r23;\n" \
  367. ".reg .u32 %r24;\n" \
  368. ".reg .u32 %r25;\n" \
  369. ".reg .u32 %r26;\n" \
  370. ".reg .u32 %r27;\n" \
  371. ".reg .u32 %r28;\n" \
  372. ".reg .u32 %r29;\n" \
  373. ".reg .u32 %r30;\n" \
  374. ".reg .u32 %r31;\n" \
  375. ".reg .u32 %r32;\n" \
  376. ".reg .u32 %r33;\n" \
  377. "mov.u32 %r28,0;\n" \
  378. "{\n" \
  379. ".param .u32 %retval_in;\n" \
  380. "{\n" \
  381. ".param .u32 %out_arg0;\n" \
  382. "st.param.u32 [%out_arg0],%r28;\n" \
  383. "call (%retval_in),GOACC_ntid,(%out_arg0);\n" \
  384. "}\n" \
  385. "ld.param.u32 %r29,[%retval_in];\n" \
  386. "}\n" \
  387. "mov.u32 %r22,%r29;\n" \
  388. "mov.u32 %r30,0;\n" \
  389. "{\n" \
  390. ".param .u32 %retval_in;\n" \
  391. "{\n" \
  392. ".param .u32 %out_arg0;\n" \
  393. "st.param.u32 [%out_arg0],%r30;\n" \
  394. "call (%retval_in),GOACC_ctaid,(%out_arg0);\n" \
  395. "}\n" \
  396. "ld.param.u32 %r31,[%retval_in];\n" \
  397. "}\n" \
  398. "mov.u32 %r23,%r31;\n" \
  399. "mul.lo.u32 %r24,%r22,%r23;\n" \
  400. "mov.u32 %r32,0;\n" \
  401. "{\n" \
  402. ".param .u32 %retval_in;\n" \
  403. "{\n" \
  404. ".param .u32 %out_arg0;\n" \
  405. "st.param.u32 [%out_arg0],%r32;\n" \
  406. "call (%retval_in),GOACC_tid,(%out_arg0);\n" \
  407. "}\n" \
  408. "ld.param.u32 %r33,[%retval_in];\n" \
  409. "}\n" \
  410. "mov.u32 %r25,%r33;\n" \
  411. "add.u32 %r26,%r24,%r25;\n" \
  412. "mov.u32 %r27,%r26;\n" \
  413. "mov.u32 %retval,%r27;\n" \
  414. "st.param.u32 [%out_retval],%retval;\n" \
  415. "ret;\n" \
  416. "}\n"