spu_ovl.S 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472
  1. /* Overlay manager for SPU.
  2. Copyright (C) 2006-2015 Free Software Foundation, Inc.
  3. This file is part of the GNU Binutils.
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 3 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with this program; if not, write to the Free Software
  14. Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston,
  15. MA 02110-1301, USA. */
  16. /* MFC DMA defn's. */
  17. #define MFC_GET_CMD 0x40
  18. #define MFC_MAX_DMA_SIZE 0x4000
  19. #define MFC_TAG_UPDATE_ALL 2
  20. #define MFC_TAG_ID 0
  21. /* Register usage. */
  22. #define reserved1 $75
  23. #define parm $75
  24. #define tab1 reserved1
  25. #define tab2 reserved1
  26. #define vma reserved1
  27. #define oldvma reserved1
  28. #define newmask reserved1
  29. #define map reserved1
  30. #define reserved2 $76
  31. #define off1 reserved2
  32. #define off2 reserved2
  33. #define present1 reserved2
  34. #define present2 reserved2
  35. #define sz reserved2
  36. #define cmp reserved2
  37. #define add64 reserved2
  38. #define cgbits reserved2
  39. #define off3 reserved2
  40. #define off4 reserved2
  41. #define addr4 reserved2
  42. #define off5 reserved2
  43. #define tagstat reserved2
  44. #define reserved3 $77
  45. #define size1 reserved3
  46. #define size2 reserved3
  47. #define rv3 reserved3
  48. #define ealo reserved3
  49. #define cmd reserved3
  50. #define off64 reserved3
  51. #define tab3 reserved3
  52. #define tab4 reserved3
  53. #define tab5 reserved3
  54. #define reserved4 $78
  55. #define ovl reserved4
  56. #define rv2 reserved4
  57. #define rv5 reserved4
  58. #define cgshuf reserved4
  59. #define newovl reserved4
  60. #define irqtmp1 reserved4
  61. #define irqtmp2 reserved4
  62. #define reserved5 $79
  63. #define target reserved5
  64. #define save1 $74
  65. #define rv4 save1
  66. #define rv7 save1
  67. #define tagid save1
  68. #define maxsize save1
  69. #define pbyte save1
  70. #define pbit save1
  71. #define save2 $73
  72. #define cur save2
  73. #define rv6 save2
  74. #define osize save2
  75. #define zovl save2
  76. #define oldovl save2
  77. #define newvma save2
  78. #define save3 $72
  79. #define rv1 save3
  80. #define ea64 save3
  81. #define buf3 save3
  82. #define genwi save3
  83. #define newmap save3
  84. #define oldmask save3
  85. #define save4 $71
  86. #define irq_stat save4
  87. .text
  88. .align 4
  89. .type __rv_pattern, @object
  90. .size __rv_pattern, 16
  91. __rv_pattern:
  92. .word 0x00010203, 0x10111213, 0x80808080, 0x80808080
  93. .type __cg_pattern, @object
  94. .size __cg_pattern, 16
  95. __cg_pattern:
  96. .word 0x04050607, 0x80808080, 0x80808080, 0x80808080
  97. .type __ovly_current, @object
  98. .size __ovly_current, 16
  99. __ovly_current:
  100. .space 16
  101. /*
  102. * __ovly_return - stub for returning from overlay functions.
  103. *
  104. * On entry the four slots of $lr are:
  105. * __ovly_return, prev ovl index, caller return addr, undefined.
  106. *
  107. * Load the previous overlay and jump to the caller return address.
  108. * Updates __ovly_current.
  109. */
  110. .align 4
  111. .global __ovly_return
  112. .type __ovly_return, @function
  113. __ovly_return:
  114. ila tab1, _ovly_table - 16 # 0,2 0
  115. shlqbyi ovl, $lr, 4 # 1,4 0
  116. #nop
  117. shlqbyi target, $lr, 8 # 1,4 1
  118. #nop; lnop
  119. #nop; lnop
  120. shli off1, ovl, 4 # 0,4 4
  121. #lnop
  122. #nop
  123. hbr ovly_ret9, target # 1,15 5
  124. #nop; lnop
  125. #nop; lnop
  126. #nop
  127. lqx vma, tab1, off1 # 1,6 8
  128. #ifdef OVLY_IRQ_SAVE
  129. nop
  130. stqd save4, -64($sp) # 1,6 9
  131. #else
  132. #nop; lnop
  133. #endif
  134. #nop; lnop
  135. #nop; lnop
  136. #nop; lnop
  137. #nop; lnop
  138. #nop
  139. rotqbyi size1, vma, 4 # 1,4 14
  140. #nop
  141. stqd save3, -48($sp) # 1,6 15
  142. #nop
  143. stqd save2, -32($sp) # 1,6 16
  144. #nop
  145. stqd save1, -16($sp) # 1,6 17
  146. andi present1, size1, 1 # 0,2 18
  147. stqr ovl, __ovly_current # 1,6 18
  148. #nop; lnop
  149. #nop
  150. brz present1, do_load # 1,4 20
  151. ovly_ret9:
  152. #nop
  153. bi target # 1,4 21
  154. /*
  155. * __ovly_load - copy an overlay partion to local store.
  156. *
  157. * On entry $75 points to a word consisting of the overlay index in
  158. * the top 14 bits, and the target address in the bottom 18 bits.
  159. *
  160. * Sets up $lr to return via __ovly_return. If $lr is already set
  161. * to return via __ovly_return, don't change it. In that case we
  162. * have a tail call from one overlay function to another.
  163. * Updates __ovly_current.
  164. */
  165. .align 3
  166. .global __ovly_load
  167. .type __ovly_load, @function
  168. __ovly_load:
  169. #if OVL_STUB_SIZE == 8
  170. ########
  171. #nop
  172. lqd target, 0(parm) # 1,6 -11
  173. #nop; lnop
  174. #nop; lnop
  175. #nop; lnop
  176. #nop; lnop
  177. #nop; lnop
  178. #nop
  179. rotqby target, target, parm # 1,4 -5
  180. ila tab2, _ovly_table - 16 # 0,2 -4
  181. stqd save3, -48($sp) # 1,6 -4
  182. #nop
  183. stqd save2, -32($sp) # 1,6 -3
  184. #nop
  185. stqd save1, -16($sp) # 1,6 -2
  186. rotmi ovl, target, -18 # 0,4 -1
  187. hbr ovly_load9, target # 1,15 -1
  188. ila rv1, __ovly_return # 0,2 0
  189. #lnop
  190. #nop; lnop
  191. #nop
  192. lqr cur, __ovly_current # 1,6 2
  193. shli off2, ovl, 4 # 0,4 3
  194. stqr ovl, __ovly_current # 1,6 3
  195. ceq rv2, $lr, rv1 # 0,2 4
  196. lqr rv3, __rv_pattern # 1,6 4
  197. #nop; lnop
  198. #nop; lnop
  199. #nop
  200. lqx vma, tab2, off2 # 1,6 7
  201. ########
  202. #else /* OVL_STUB_SIZE == 16 */
  203. ########
  204. ila tab2, _ovly_table - 16 # 0,2 0
  205. stqd save3, -48($sp) # 1,6 0
  206. ila rv1, __ovly_return # 0,2 1
  207. stqd save2, -32($sp) # 1,6 1
  208. shli off2, ovl, 4 # 0,4 2
  209. lqr cur, __ovly_current # 1,6 2
  210. nop
  211. stqr ovl, __ovly_current # 1,6 3
  212. ceq rv2, $lr, rv1 # 0,2 4
  213. lqr rv3, __rv_pattern # 1,6 4
  214. #nop
  215. hbr ovly_load9, target # 1,15 5
  216. #nop
  217. lqx vma, tab2, off2 # 1,6 6
  218. #nop
  219. stqd save1, -16($sp) # 1,6 7
  220. ########
  221. #endif
  222. #nop; lnop
  223. #nop; lnop
  224. #nop
  225. shufb rv4, rv1, cur, rv3 # 1,4 10
  226. #nop
  227. fsmb rv5, rv2 # 1,4 11
  228. #nop
  229. rotqmbyi rv6, $lr, -8 # 1,4 12
  230. #nop
  231. rotqbyi size2, vma, 4 # 1,4 13
  232. #nop
  233. lqd save3, -48($sp) # 1,6 14
  234. #nop; lnop
  235. or rv7, rv4, rv6 # 0,2 16
  236. lqd save2, -32($sp) # 1,6 16
  237. andi present2, size2, 1 # 0,2 17
  238. #ifdef OVLY_IRQ_SAVE
  239. stqd save4, -64($sp) # 1,6 17
  240. #else
  241. lnop # 1,0 17
  242. #endif
  243. selb $lr, rv7, $lr, rv5 # 0,2 18
  244. lqd save1, -16($sp) # 1,6 18
  245. #nop
  246. brz present2, do_load # 1,4 19
  247. ovly_load9:
  248. #nop
  249. bi target # 1,4 20
  250. /* If we get here, we are about to load a new overlay.
  251. * "vma" contains the relevant entry from _ovly_table[].
  252. * extern struct {
  253. * u32 vma;
  254. * u32 size;
  255. * u32 file_offset;
  256. * u32 buf;
  257. * } _ovly_table[];
  258. */
  259. .align 3
  260. .global __ovly_load_event
  261. .type __ovly_load_event, @function
  262. __ovly_load_event:
  263. do_load:
  264. #ifdef OVLY_IRQ_SAVE
  265. ila irqtmp1, do_load10 # 0,2 -5
  266. rotqbyi sz, vma, 8 # 1,4 -5
  267. #nop
  268. rdch irq_stat, $SPU_RdMachStat # 1,6 -4
  269. #nop
  270. bid irqtmp1 # 1,4 -3
  271. do_load10:
  272. nop
  273. #else
  274. #nop
  275. rotqbyi sz, vma, 8 # 1,4 0
  276. #endif
  277. rotqbyi osize, vma, 4 # 1,4 1
  278. #nop
  279. lqa ea64, _EAR_ # 1,6 2
  280. #nop
  281. lqr cgshuf, __cg_pattern # 1,6 3
  282. /* We could predict the branch at the end of this loop by adding a few
  283. instructions, and there are plenty of free cycles to do so without
  284. impacting loop execution time. However, it doesn't make a great
  285. deal of sense since we need to wait for the dma to complete anyway. */
  286. __ovly_xfer_loop:
  287. #nop
  288. rotqmbyi off64, sz, -4 # 1,4 4
  289. #nop; lnop
  290. #nop; lnop
  291. #nop; lnop
  292. cg cgbits, ea64, off64 # 0,2 8
  293. #lnop
  294. #nop; lnop
  295. #nop
  296. shufb add64, cgbits, cgbits, cgshuf # 1,4 10
  297. #nop; lnop
  298. #nop; lnop
  299. #nop; lnop
  300. addx add64, ea64, off64 # 0,2 14
  301. #lnop
  302. ila maxsize, MFC_MAX_DMA_SIZE # 0,2 15
  303. lnop
  304. ori ea64, add64, 0 # 0,2 16
  305. rotqbyi ealo, add64, 4 # 1,4 16
  306. cgt cmp, osize, maxsize # 0,2 17
  307. wrch $MFC_LSA, vma # 1,6 17
  308. #nop; lnop
  309. selb sz, osize, maxsize, cmp # 0,2 19
  310. wrch $MFC_EAH, ea64 # 1,6 19
  311. ila tagid, MFC_TAG_ID # 0,2 20
  312. wrch $MFC_EAL, ealo # 1,6 20
  313. ila cmd, MFC_GET_CMD # 0,2 21
  314. wrch $MFC_Size, sz # 1,6 21
  315. sf osize, sz, osize # 0,2 22
  316. wrch $MFC_TagId, tagid # 1,6 22
  317. a vma, vma, sz # 0,2 23
  318. wrch $MFC_Cmd, cmd # 1,6 23
  319. #nop
  320. brnz osize, __ovly_xfer_loop # 1,4 24
  321. /* Now update our data structions while waiting for DMA to complete.
  322. Low bit of .size needs to be cleared on the _ovly_table entry
  323. corresponding to the evicted overlay, and set on the entry for the
  324. newly loaded overlay. Note that no overlay may in fact be evicted
  325. as _ovly_buf_table[] starts with all zeros. Don't zap .size entry
  326. for zero index! Also of course update the _ovly_buf_table entry. */
  327. #nop
  328. lqr newovl, __ovly_current # 1,6 25
  329. #nop; lnop
  330. #nop; lnop
  331. #nop; lnop
  332. #nop; lnop
  333. #nop; lnop
  334. shli off3, newovl, 4 # 0,4 31
  335. #lnop
  336. ila tab3, _ovly_table - 16 # 0,2 32
  337. #lnop
  338. #nop
  339. fsmbi pbyte, 0x100 # 1,4 33
  340. #nop; lnop
  341. #nop
  342. lqx vma, tab3, off3 # 1,6 35
  343. #nop; lnop
  344. andi pbit, pbyte, 1 # 0,2 37
  345. lnop
  346. #nop; lnop
  347. #nop; lnop
  348. #nop; lnop
  349. or newvma, vma, pbit # 0,2 41
  350. rotqbyi buf3, vma, 12 # 1,4 41
  351. #nop; lnop
  352. #nop
  353. stqx newvma, tab3, off3 # 1,6 43
  354. #nop; lnop
  355. shli off4, buf3, 2 # 1,4 45
  356. #lnop
  357. ila tab4, _ovly_buf_table - 4 # 0,2 46
  358. #lnop
  359. #nop; lnop
  360. #nop; lnop
  361. #nop
  362. lqx map, tab4, off4 # 1,6 49
  363. #nop
  364. cwx genwi, tab4, off4 # 1,4 50
  365. a addr4, tab4, off4 # 0,2 51
  366. #lnop
  367. #nop; lnop
  368. #nop; lnop
  369. #nop; lnop
  370. #nop
  371. rotqby oldovl, map, addr4 # 1,4 55
  372. #nop
  373. shufb newmap, newovl, map, genwi # 0,4 56
  374. #if MFC_TAG_ID < 16
  375. ila newmask, 1 << MFC_TAG_ID # 0,2 57
  376. #else
  377. ilhu newmask, 1 << (MFC_TAG_ID - 16) # 0,2 57
  378. #endif
  379. #lnop
  380. #nop; lnop
  381. #nop; lnop
  382. stqd newmap, 0(addr4) # 1,6 60
  383. /* Save app's tagmask, wait for DMA complete, restore mask. */
  384. ila tagstat, MFC_TAG_UPDATE_ALL # 0,2 61
  385. rdch oldmask, $MFC_RdTagMask # 1,6 61
  386. #nop
  387. wrch $MFC_WrTagMask, newmask # 1,6 62
  388. #nop
  389. wrch $MFC_WrTagUpdate, tagstat # 1,6 63
  390. #nop
  391. rdch tagstat, $MFC_RdTagStat # 1,6 64
  392. #nop
  393. sync # 1,4 65
  394. /* Any hint prior to the sync is lost. A hint here allows the branch
  395. to complete 15 cycles after the hint. With no hint the branch will
  396. take 18 or 19 cycles. */
  397. ila tab5, _ovly_table - 16 # 0,2 66
  398. hbr do_load99, target # 1,15 66
  399. shli off5, oldovl, 4 # 0,4 67
  400. wrch $MFC_WrTagMask, oldmask # 1,6 67
  401. ceqi zovl, oldovl, 0 # 0,2 68
  402. #lnop
  403. #nop; lnop
  404. #nop
  405. fsm zovl, zovl # 1,4 70
  406. #nop
  407. lqx oldvma, tab5, off5 # 1,6 71
  408. #nop
  409. lqd save3, -48($sp) # 1,6 72
  410. #nop; lnop
  411. andc pbit, pbit, zovl # 0,2 74
  412. lqd save2, -32($sp) # 1,6 74
  413. #ifdef OVLY_IRQ_SAVE
  414. ila irqtmp2, do_load90 # 0,2 75
  415. #lnop
  416. andi irq_stat, irq_stat, 1 # 0,2 76
  417. #lnop
  418. #else
  419. #nop; lnop
  420. #nop; lnop
  421. #endif
  422. andc oldvma, oldvma, pbit # 0,2 77
  423. lqd save1, -16($sp) # 1,6 77
  424. nop # 0,0 78
  425. #lnop
  426. #nop
  427. stqx oldvma, tab5, off5 # 1,6 79
  428. #nop
  429. #ifdef OVLY_IRQ_SAVE
  430. binze irq_stat, irqtmp2 # 1,4 80
  431. do_load90:
  432. #nop
  433. lqd save4, -64($sp) # 1,6 84
  434. #else
  435. #nop; lnop
  436. #endif
  437. .global _ovly_debug_event
  438. .type _ovly_debug_event, @function
  439. _ovly_debug_event:
  440. nop
  441. /* Branch to target address. */
  442. do_load99:
  443. bi target # 1,4 81/85
  444. .size __ovly_load, . - __ovly_load