oacc-ptx.h 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427
  1. /* Copyright (C) 2014-2015 Free Software Foundation, Inc.
  2. Contributed by Mentor Embedded.
  3. This file is part of the GNU Offloading and Multi Processing Library
  4. (libgomp).
  5. Libgomp is free software; you can redistribute it and/or modify it
  6. under the terms of the GNU General Public License as published by
  7. the Free Software Foundation; either version 3, or (at your option)
  8. any later version.
  9. Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
  10. WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  11. FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  12. more details.
  13. Under Section 7 of GPL version 3, you are granted additional
  14. permissions described in the GCC Runtime Library Exception, version
  15. 3.1, as published by the Free Software Foundation.
  16. You should have received a copy of the GNU General Public License and
  17. a copy of the GCC Runtime Library Exception along with this program;
  18. see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
  19. <http://www.gnu.org/licenses/>. */
  20. #define ABORT_PTX \
  21. ".version 3.1\n" \
  22. ".target sm_30\n" \
  23. ".address_size 64\n" \
  24. ".visible .func abort;\n" \
  25. ".visible .func abort\n" \
  26. "{\n" \
  27. "trap;\n" \
  28. "ret;\n" \
  29. "}\n" \
  30. ".visible .func _gfortran_abort;\n" \
  31. ".visible .func _gfortran_abort\n" \
  32. "{\n" \
  33. "trap;\n" \
  34. "ret;\n" \
  35. "}\n" \
  36. /* Generated with:
  37. $ echo 'int acc_on_device(int d) { return __builtin_acc_on_device(d); } int acc_on_device_h_(int *d) { return acc_on_device(*d); }' | accel-gcc/xgcc -Baccel-gcc -x c - -o - -S -m64 -O3 -fno-builtin-acc_on_device -fno-inline
  38. */
  39. #define ACC_ON_DEVICE_PTX \
  40. " .version 3.1\n" \
  41. " .target sm_30\n" \
  42. " .address_size 64\n" \
  43. ".visible .func (.param.u32 %out_retval)acc_on_device(.param.u32 %in_ar1);\n" \
  44. ".visible .func (.param.u32 %out_retval)acc_on_device(.param.u32 %in_ar1)\n" \
  45. "{\n" \
  46. " .reg.u32 %ar1;\n" \
  47. ".reg.u32 %retval;\n" \
  48. " .reg.u64 %hr10;\n" \
  49. " .reg.u32 %r24;\n" \
  50. " .reg.u32 %r25;\n" \
  51. " .reg.pred %r27;\n" \
  52. " .reg.u32 %r30;\n" \
  53. " ld.param.u32 %ar1, [%in_ar1];\n" \
  54. " mov.u32 %r24, %ar1;\n" \
  55. " setp.ne.u32 %r27,%r24,4;\n" \
  56. " set.u32.eq.u32 %r30,%r24,5;\n" \
  57. " neg.s32 %r25, %r30;\n" \
  58. " @%r27 bra $L3;\n" \
  59. " mov.u32 %r25, 1;\n" \
  60. "$L3:\n" \
  61. " mov.u32 %retval, %r25;\n" \
  62. " st.param.u32 [%out_retval], %retval;\n" \
  63. " ret;\n" \
  64. " }\n" \
  65. ".visible .func (.param.u32 %out_retval)acc_on_device_h_(.param.u64 %in_ar1);\n" \
  66. ".visible .func (.param.u32 %out_retval)acc_on_device_h_(.param.u64 %in_ar1)\n" \
  67. "{\n" \
  68. " .reg.u64 %ar1;\n" \
  69. ".reg.u32 %retval;\n" \
  70. " .reg.u64 %hr10;\n" \
  71. " .reg.u64 %r25;\n" \
  72. " .reg.u32 %r26;\n" \
  73. " .reg.u32 %r27;\n" \
  74. " ld.param.u64 %ar1, [%in_ar1];\n" \
  75. " mov.u64 %r25, %ar1;\n" \
  76. " ld.u32 %r26, [%r25];\n" \
  77. " {\n" \
  78. " .param.u32 %retval_in;\n" \
  79. " {\n" \
  80. " .param.u32 %out_arg0;\n" \
  81. " st.param.u32 [%out_arg0], %r26;\n" \
  82. " call (%retval_in), acc_on_device, (%out_arg0);\n" \
  83. " }\n" \
  84. " ld.param.u32 %r27, [%retval_in];\n" \
  85. "}\n" \
  86. " mov.u32 %retval, %r27;\n" \
  87. " st.param.u32 [%out_retval], %retval;\n" \
  88. " ret;\n" \
  89. " }"
  90. #define GOACC_INTERNAL_PTX \
  91. ".version 3.1\n" \
  92. ".target sm_30\n" \
  93. ".address_size 64\n" \
  94. ".visible .func (.param .u32 %out_retval) GOACC_tid (.param .u32 %in_ar1);\n" \
  95. ".visible .func (.param .u32 %out_retval) GOACC_ntid (.param .u32 %in_ar1);\n" \
  96. ".visible .func (.param .u32 %out_retval) GOACC_ctaid (.param .u32 %in_ar1);\n" \
  97. ".visible .func (.param .u32 %out_retval) GOACC_nctaid (.param .u32 %in_ar1);\n" \
  98. ".visible .func (.param .u32 %out_retval) GOACC_get_num_threads;\n" \
  99. ".visible .func (.param .u32 %out_retval) GOACC_get_thread_num;\n" \
  100. ".extern .func abort;\n" \
  101. ".visible .func (.param .u32 %out_retval) GOACC_tid (.param .u32 %in_ar1)\n" \
  102. "{\n" \
  103. ".reg .u32 %ar1;\n" \
  104. ".reg .u32 %retval;\n" \
  105. ".reg .u64 %hr10;\n" \
  106. ".reg .u32 %r22;\n" \
  107. ".reg .u32 %r23;\n" \
  108. ".reg .u32 %r24;\n" \
  109. ".reg .u32 %r25;\n" \
  110. ".reg .u32 %r26;\n" \
  111. ".reg .u32 %r27;\n" \
  112. ".reg .u32 %r28;\n" \
  113. ".reg .u32 %r29;\n" \
  114. ".reg .pred %r30;\n" \
  115. ".reg .u32 %r31;\n" \
  116. ".reg .pred %r32;\n" \
  117. ".reg .u32 %r33;\n" \
  118. ".reg .pred %r34;\n" \
  119. ".local .align 8 .b8 %frame[4];\n" \
  120. "ld.param.u32 %ar1,[%in_ar1];\n" \
  121. "mov.u32 %r27,%ar1;\n" \
  122. "st.local.u32 [%frame],%r27;\n" \
  123. "ld.local.u32 %r28,[%frame];\n" \
  124. "mov.u32 %r29,1;\n" \
  125. "setp.eq.u32 %r30,%r28,%r29;\n" \
  126. "@%r30 bra $L4;\n" \
  127. "mov.u32 %r31,2;\n" \
  128. "setp.eq.u32 %r32,%r28,%r31;\n" \
  129. "@%r32 bra $L5;\n" \
  130. "mov.u32 %r33,0;\n" \
  131. "setp.eq.u32 %r34,%r28,%r33;\n" \
  132. "@!%r34 bra $L8;\n" \
  133. "mov.u32 %r23,%tid.x;\n" \
  134. "mov.u32 %r22,%r23;\n" \
  135. "bra $L7;\n" \
  136. "$L4:\n" \
  137. "mov.u32 %r24,%tid.y;\n" \
  138. "mov.u32 %r22,%r24;\n" \
  139. "bra $L7;\n" \
  140. "$L5:\n" \
  141. "mov.u32 %r25,%tid.z;\n" \
  142. "mov.u32 %r22,%r25;\n" \
  143. "bra $L7;\n" \
  144. "$L8:\n" \
  145. "{\n" \
  146. "{\n" \
  147. "call abort;\n" \
  148. "}\n" \
  149. "}\n" \
  150. "$L7:\n" \
  151. "mov.u32 %r26,%r22;\n" \
  152. "mov.u32 %retval,%r26;\n" \
  153. "st.param.u32 [%out_retval],%retval;\n" \
  154. "ret;\n" \
  155. "}\n" \
  156. ".visible .func (.param .u32 %out_retval) GOACC_ntid (.param .u32 %in_ar1)\n" \
  157. "{\n" \
  158. ".reg .u32 %ar1;\n" \
  159. ".reg .u32 %retval;\n" \
  160. ".reg .u64 %hr10;\n" \
  161. ".reg .u32 %r22;\n" \
  162. ".reg .u32 %r23;\n" \
  163. ".reg .u32 %r24;\n" \
  164. ".reg .u32 %r25;\n" \
  165. ".reg .u32 %r26;\n" \
  166. ".reg .u32 %r27;\n" \
  167. ".reg .u32 %r28;\n" \
  168. ".reg .u32 %r29;\n" \
  169. ".reg .pred %r30;\n" \
  170. ".reg .u32 %r31;\n" \
  171. ".reg .pred %r32;\n" \
  172. ".reg .u32 %r33;\n" \
  173. ".reg .pred %r34;\n" \
  174. ".local .align 8 .b8 %frame[4];\n" \
  175. "ld.param.u32 %ar1,[%in_ar1];\n" \
  176. "mov.u32 %r27,%ar1;\n" \
  177. "st.local.u32 [%frame],%r27;\n" \
  178. "ld.local.u32 %r28,[%frame];\n" \
  179. "mov.u32 %r29,1;\n" \
  180. "setp.eq.u32 %r30,%r28,%r29;\n" \
  181. "@%r30 bra $L11;\n" \
  182. "mov.u32 %r31,2;\n" \
  183. "setp.eq.u32 %r32,%r28,%r31;\n" \
  184. "@%r32 bra $L12;\n" \
  185. "mov.u32 %r33,0;\n" \
  186. "setp.eq.u32 %r34,%r28,%r33;\n" \
  187. "@!%r34 bra $L15;\n" \
  188. "mov.u32 %r23,%ntid.x;\n" \
  189. "mov.u32 %r22,%r23;\n" \
  190. "bra $L14;\n" \
  191. "$L11:\n" \
  192. "mov.u32 %r24,%ntid.y;\n" \
  193. "mov.u32 %r22,%r24;\n" \
  194. "bra $L14;\n" \
  195. "$L12:\n" \
  196. "mov.u32 %r25,%ntid.z;\n" \
  197. "mov.u32 %r22,%r25;\n" \
  198. "bra $L14;\n" \
  199. "$L15:\n" \
  200. "{\n" \
  201. "{\n" \
  202. "call abort;\n" \
  203. "}\n" \
  204. "}\n" \
  205. "$L14:\n" \
  206. "mov.u32 %r26,%r22;\n" \
  207. "mov.u32 %retval,%r26;\n" \
  208. "st.param.u32 [%out_retval],%retval;\n" \
  209. "ret;\n" \
  210. "}\n" \
  211. ".visible .func (.param .u32 %out_retval) GOACC_ctaid (.param .u32 %in_ar1)\n" \
  212. "{\n" \
  213. ".reg .u32 %ar1;\n" \
  214. ".reg .u32 %retval;\n" \
  215. ".reg .u64 %hr10;\n" \
  216. ".reg .u32 %r22;\n" \
  217. ".reg .u32 %r23;\n" \
  218. ".reg .u32 %r24;\n" \
  219. ".reg .u32 %r25;\n" \
  220. ".reg .u32 %r26;\n" \
  221. ".reg .u32 %r27;\n" \
  222. ".reg .u32 %r28;\n" \
  223. ".reg .u32 %r29;\n" \
  224. ".reg .pred %r30;\n" \
  225. ".reg .u32 %r31;\n" \
  226. ".reg .pred %r32;\n" \
  227. ".reg .u32 %r33;\n" \
  228. ".reg .pred %r34;\n" \
  229. ".local .align 8 .b8 %frame[4];\n" \
  230. "ld.param.u32 %ar1,[%in_ar1];\n" \
  231. "mov.u32 %r27,%ar1;\n" \
  232. "st.local.u32 [%frame],%r27;\n" \
  233. "ld.local.u32 %r28,[%frame];\n" \
  234. "mov.u32 %r29,1;\n" \
  235. "setp.eq.u32 %r30,%r28,%r29;\n" \
  236. "@%r30 bra $L18;\n" \
  237. "mov.u32 %r31,2;\n" \
  238. "setp.eq.u32 %r32,%r28,%r31;\n" \
  239. "@%r32 bra $L19;\n" \
  240. "mov.u32 %r33,0;\n" \
  241. "setp.eq.u32 %r34,%r28,%r33;\n" \
  242. "@!%r34 bra $L22;\n" \
  243. "mov.u32 %r23,%ctaid.x;\n" \
  244. "mov.u32 %r22,%r23;\n" \
  245. "bra $L21;\n" \
  246. "$L18:\n" \
  247. "mov.u32 %r24,%ctaid.y;\n" \
  248. "mov.u32 %r22,%r24;\n" \
  249. "bra $L21;\n" \
  250. "$L19:\n" \
  251. "mov.u32 %r25,%ctaid.z;\n" \
  252. "mov.u32 %r22,%r25;\n" \
  253. "bra $L21;\n" \
  254. "$L22:\n" \
  255. "{\n" \
  256. "{\n" \
  257. "call abort;\n" \
  258. "}\n" \
  259. "}\n" \
  260. "$L21:\n" \
  261. "mov.u32 %r26,%r22;\n" \
  262. "mov.u32 %retval,%r26;\n" \
  263. "st.param.u32 [%out_retval],%retval;\n" \
  264. "ret;\n" \
  265. "}\n" \
  266. ".visible .func (.param .u32 %out_retval) GOACC_nctaid (.param .u32 %in_ar1)\n" \
  267. "{\n" \
  268. ".reg .u32 %ar1;\n" \
  269. ".reg .u32 %retval;\n" \
  270. ".reg .u64 %hr10;\n" \
  271. ".reg .u32 %r22;\n" \
  272. ".reg .u32 %r23;\n" \
  273. ".reg .u32 %r24;\n" \
  274. ".reg .u32 %r25;\n" \
  275. ".reg .u32 %r26;\n" \
  276. ".reg .u32 %r27;\n" \
  277. ".reg .u32 %r28;\n" \
  278. ".reg .u32 %r29;\n" \
  279. ".reg .pred %r30;\n" \
  280. ".reg .u32 %r31;\n" \
  281. ".reg .pred %r32;\n" \
  282. ".reg .u32 %r33;\n" \
  283. ".reg .pred %r34;\n" \
  284. ".local .align 8 .b8 %frame[4];\n" \
  285. "ld.param.u32 %ar1,[%in_ar1];\n" \
  286. "mov.u32 %r27,%ar1;\n" \
  287. "st.local.u32 [%frame],%r27;\n" \
  288. "ld.local.u32 %r28,[%frame];\n" \
  289. "mov.u32 %r29,1;\n" \
  290. "setp.eq.u32 %r30,%r28,%r29;\n" \
  291. "@%r30 bra $L25;\n" \
  292. "mov.u32 %r31,2;\n" \
  293. "setp.eq.u32 %r32,%r28,%r31;\n" \
  294. "@%r32 bra $L26;\n" \
  295. "mov.u32 %r33,0;\n" \
  296. "setp.eq.u32 %r34,%r28,%r33;\n" \
  297. "@!%r34 bra $L29;\n" \
  298. "mov.u32 %r23,%nctaid.x;\n" \
  299. "mov.u32 %r22,%r23;\n" \
  300. "bra $L28;\n" \
  301. "$L25:\n" \
  302. "mov.u32 %r24,%nctaid.y;\n" \
  303. "mov.u32 %r22,%r24;\n" \
  304. "bra $L28;\n" \
  305. "$L26:\n" \
  306. "mov.u32 %r25,%nctaid.z;\n" \
  307. "mov.u32 %r22,%r25;\n" \
  308. "bra $L28;\n" \
  309. "$L29:\n" \
  310. "{\n" \
  311. "{\n" \
  312. "call abort;\n" \
  313. "}\n" \
  314. "}\n" \
  315. "$L28:\n" \
  316. "mov.u32 %r26,%r22;\n" \
  317. "mov.u32 %retval,%r26;\n" \
  318. "st.param.u32 [%out_retval],%retval;\n" \
  319. "ret;\n" \
  320. "}\n" \
  321. ".visible .func (.param .u32 %out_retval) GOACC_get_num_threads\n" \
  322. "{\n" \
  323. ".reg .u32 %retval;\n" \
  324. ".reg .u64 %hr10;\n" \
  325. ".reg .u32 %r22;\n" \
  326. ".reg .u32 %r23;\n" \
  327. ".reg .u32 %r24;\n" \
  328. ".reg .u32 %r25;\n" \
  329. ".reg .u32 %r26;\n" \
  330. ".reg .u32 %r27;\n" \
  331. ".reg .u32 %r28;\n" \
  332. ".reg .u32 %r29;\n" \
  333. "mov.u32 %r26,0;\n" \
  334. "{\n" \
  335. ".param .u32 %retval_in;\n" \
  336. "{\n" \
  337. ".param .u32 %out_arg0;\n" \
  338. "st.param.u32 [%out_arg0],%r26;\n" \
  339. "call (%retval_in),GOACC_ntid,(%out_arg0);\n" \
  340. "}\n" \
  341. "ld.param.u32 %r27,[%retval_in];\n" \
  342. "}\n" \
  343. "mov.u32 %r22,%r27;\n" \
  344. "mov.u32 %r28,0;\n" \
  345. "{\n" \
  346. ".param .u32 %retval_in;\n" \
  347. "{\n" \
  348. ".param .u32 %out_arg0;\n" \
  349. "st.param.u32 [%out_arg0],%r28;\n" \
  350. "call (%retval_in),GOACC_nctaid,(%out_arg0);\n" \
  351. "}\n" \
  352. "ld.param.u32 %r29,[%retval_in];\n" \
  353. "}\n" \
  354. "mov.u32 %r23,%r29;\n" \
  355. "mul.lo.u32 %r24,%r22,%r23;\n" \
  356. "mov.u32 %r25,%r24;\n" \
  357. "mov.u32 %retval,%r25;\n" \
  358. "st.param.u32 [%out_retval],%retval;\n" \
  359. "ret;\n" \
  360. "}\n" \
  361. ".visible .func (.param .u32 %out_retval) GOACC_get_thread_num\n" \
  362. "{\n" \
  363. ".reg .u32 %retval;\n" \
  364. ".reg .u64 %hr10;\n" \
  365. ".reg .u32 %r22;\n" \
  366. ".reg .u32 %r23;\n" \
  367. ".reg .u32 %r24;\n" \
  368. ".reg .u32 %r25;\n" \
  369. ".reg .u32 %r26;\n" \
  370. ".reg .u32 %r27;\n" \
  371. ".reg .u32 %r28;\n" \
  372. ".reg .u32 %r29;\n" \
  373. ".reg .u32 %r30;\n" \
  374. ".reg .u32 %r31;\n" \
  375. ".reg .u32 %r32;\n" \
  376. ".reg .u32 %r33;\n" \
  377. "mov.u32 %r28,0;\n" \
  378. "{\n" \
  379. ".param .u32 %retval_in;\n" \
  380. "{\n" \
  381. ".param .u32 %out_arg0;\n" \
  382. "st.param.u32 [%out_arg0],%r28;\n" \
  383. "call (%retval_in),GOACC_ntid,(%out_arg0);\n" \
  384. "}\n" \
  385. "ld.param.u32 %r29,[%retval_in];\n" \
  386. "}\n" \
  387. "mov.u32 %r22,%r29;\n" \
  388. "mov.u32 %r30,0;\n" \
  389. "{\n" \
  390. ".param .u32 %retval_in;\n" \
  391. "{\n" \
  392. ".param .u32 %out_arg0;\n" \
  393. "st.param.u32 [%out_arg0],%r30;\n" \
  394. "call (%retval_in),GOACC_ctaid,(%out_arg0);\n" \
  395. "}\n" \
  396. "ld.param.u32 %r31,[%retval_in];\n" \
  397. "}\n" \
  398. "mov.u32 %r23,%r31;\n" \
  399. "mul.lo.u32 %r24,%r22,%r23;\n" \
  400. "mov.u32 %r32,0;\n" \
  401. "{\n" \
  402. ".param .u32 %retval_in;\n" \
  403. "{\n" \
  404. ".param .u32 %out_arg0;\n" \
  405. "st.param.u32 [%out_arg0],%r32;\n" \
  406. "call (%retval_in),GOACC_tid,(%out_arg0);\n" \
  407. "}\n" \
  408. "ld.param.u32 %r33,[%retval_in];\n" \
  409. "}\n" \
  410. "mov.u32 %r25,%r33;\n" \
  411. "add.u32 %r26,%r24,%r25;\n" \
  412. "mov.u32 %r27,%r26;\n" \
  413. "mov.u32 %retval,%r27;\n" \
  414. "st.param.u32 [%out_retval],%retval;\n" \
  415. "ret;\n" \
  416. "}\n"