copyuser_64.S 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676
  1. /*
  2. * Copyright (C) 2002 Paul Mackerras, IBM Corp.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public License
  6. * as published by the Free Software Foundation; either version
  7. * 2 of the License, or (at your option) any later version.
  8. */
  9. #include <asm/processor.h>
  10. #include <asm/ppc_asm.h>
  11. #include <asm/export.h>
  12. #ifdef __BIG_ENDIAN__
  13. #define sLd sld /* Shift towards low-numbered address. */
  14. #define sHd srd /* Shift towards high-numbered address. */
  15. #else
  16. #define sLd srd /* Shift towards low-numbered address. */
  17. #define sHd sld /* Shift towards high-numbered address. */
  18. #endif
  19. .align 7
  20. _GLOBAL_TOC(__copy_tofrom_user)
  21. BEGIN_FTR_SECTION
  22. nop
  23. FTR_SECTION_ELSE
  24. b __copy_tofrom_user_power7
  25. ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY)
  26. _GLOBAL(__copy_tofrom_user_base)
  27. /* first check for a whole page copy on a page boundary */
  28. cmpldi cr1,r5,16
  29. cmpdi cr6,r5,4096
  30. or r0,r3,r4
  31. neg r6,r3 /* LS 3 bits = # bytes to 8-byte dest bdry */
  32. andi. r0,r0,4095
  33. std r3,-24(r1)
  34. crand cr0*4+2,cr0*4+2,cr6*4+2
  35. std r4,-16(r1)
  36. std r5,-8(r1)
  37. dcbt 0,r4
  38. beq .Lcopy_page_4K
  39. andi. r6,r6,7
  40. PPC_MTOCRF(0x01,r5)
  41. blt cr1,.Lshort_copy
  42. /* Below we want to nop out the bne if we're on a CPU that has the
  43. * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit
  44. * cleared.
  45. * At the time of writing the only CPU that has this combination of bits
  46. * set is Power6.
  47. */
  48. BEGIN_FTR_SECTION
  49. nop
  50. FTR_SECTION_ELSE
  51. bne .Ldst_unaligned
  52. ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \
  53. CPU_FTR_UNALIGNED_LD_STD)
  54. .Ldst_aligned:
  55. addi r3,r3,-16
  56. BEGIN_FTR_SECTION
  57. andi. r0,r4,7
  58. bne .Lsrc_unaligned
  59. END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD)
  60. blt cr1,.Ldo_tail /* if < 16 bytes to copy */
  61. srdi r0,r5,5
  62. cmpdi cr1,r0,0
  63. 20: ld r7,0(r4)
  64. 220: ld r6,8(r4)
  65. addi r4,r4,16
  66. mtctr r0
  67. andi. r0,r5,0x10
  68. beq 22f
  69. addi r3,r3,16
  70. addi r4,r4,-16
  71. mr r9,r7
  72. mr r8,r6
  73. beq cr1,72f
  74. 21: ld r7,16(r4)
  75. 221: ld r6,24(r4)
  76. addi r4,r4,32
  77. 70: std r9,0(r3)
  78. 270: std r8,8(r3)
  79. 22: ld r9,0(r4)
  80. 222: ld r8,8(r4)
  81. 71: std r7,16(r3)
  82. 271: std r6,24(r3)
  83. addi r3,r3,32
  84. bdnz 21b
  85. 72: std r9,0(r3)
  86. 272: std r8,8(r3)
  87. andi. r5,r5,0xf
  88. beq+ 3f
  89. addi r4,r4,16
  90. .Ldo_tail:
  91. addi r3,r3,16
  92. bf cr7*4+0,246f
  93. 244: ld r9,0(r4)
  94. addi r4,r4,8
  95. 245: std r9,0(r3)
  96. addi r3,r3,8
  97. 246: bf cr7*4+1,1f
  98. 23: lwz r9,0(r4)
  99. addi r4,r4,4
  100. 73: stw r9,0(r3)
  101. addi r3,r3,4
  102. 1: bf cr7*4+2,2f
  103. 44: lhz r9,0(r4)
  104. addi r4,r4,2
  105. 74: sth r9,0(r3)
  106. addi r3,r3,2
  107. 2: bf cr7*4+3,3f
  108. 45: lbz r9,0(r4)
  109. 75: stb r9,0(r3)
  110. 3: li r3,0
  111. blr
  112. .Lsrc_unaligned:
  113. srdi r6,r5,3
  114. addi r5,r5,-16
  115. subf r4,r0,r4
  116. srdi r7,r5,4
  117. sldi r10,r0,3
  118. cmpldi cr6,r6,3
  119. andi. r5,r5,7
  120. mtctr r7
  121. subfic r11,r10,64
  122. add r5,r5,r0
  123. bt cr7*4+0,28f
  124. 24: ld r9,0(r4) /* 3+2n loads, 2+2n stores */
  125. 25: ld r0,8(r4)
  126. sLd r6,r9,r10
  127. 26: ldu r9,16(r4)
  128. sHd r7,r0,r11
  129. sLd r8,r0,r10
  130. or r7,r7,r6
  131. blt cr6,79f
  132. 27: ld r0,8(r4)
  133. b 2f
  134. 28: ld r0,0(r4) /* 4+2n loads, 3+2n stores */
  135. 29: ldu r9,8(r4)
  136. sLd r8,r0,r10
  137. addi r3,r3,-8
  138. blt cr6,5f
  139. 30: ld r0,8(r4)
  140. sHd r12,r9,r11
  141. sLd r6,r9,r10
  142. 31: ldu r9,16(r4)
  143. or r12,r8,r12
  144. sHd r7,r0,r11
  145. sLd r8,r0,r10
  146. addi r3,r3,16
  147. beq cr6,78f
  148. 1: or r7,r7,r6
  149. 32: ld r0,8(r4)
  150. 76: std r12,8(r3)
  151. 2: sHd r12,r9,r11
  152. sLd r6,r9,r10
  153. 33: ldu r9,16(r4)
  154. or r12,r8,r12
  155. 77: stdu r7,16(r3)
  156. sHd r7,r0,r11
  157. sLd r8,r0,r10
  158. bdnz 1b
  159. 78: std r12,8(r3)
  160. or r7,r7,r6
  161. 79: std r7,16(r3)
  162. 5: sHd r12,r9,r11
  163. or r12,r8,r12
  164. 80: std r12,24(r3)
  165. bne 6f
  166. li r3,0
  167. blr
  168. 6: cmpwi cr1,r5,8
  169. addi r3,r3,32
  170. sLd r9,r9,r10
  171. ble cr1,7f
  172. 34: ld r0,8(r4)
  173. sHd r7,r0,r11
  174. or r9,r7,r9
  175. 7:
  176. bf cr7*4+1,1f
  177. #ifdef __BIG_ENDIAN__
  178. rotldi r9,r9,32
  179. #endif
  180. 94: stw r9,0(r3)
  181. #ifdef __LITTLE_ENDIAN__
  182. rotrdi r9,r9,32
  183. #endif
  184. addi r3,r3,4
  185. 1: bf cr7*4+2,2f
  186. #ifdef __BIG_ENDIAN__
  187. rotldi r9,r9,16
  188. #endif
  189. 95: sth r9,0(r3)
  190. #ifdef __LITTLE_ENDIAN__
  191. rotrdi r9,r9,16
  192. #endif
  193. addi r3,r3,2
  194. 2: bf cr7*4+3,3f
  195. #ifdef __BIG_ENDIAN__
  196. rotldi r9,r9,8
  197. #endif
  198. 96: stb r9,0(r3)
  199. #ifdef __LITTLE_ENDIAN__
  200. rotrdi r9,r9,8
  201. #endif
  202. 3: li r3,0
  203. blr
  204. .Ldst_unaligned:
  205. PPC_MTOCRF(0x01,r6) /* put #bytes to 8B bdry into cr7 */
  206. subf r5,r6,r5
  207. li r7,0
  208. cmpldi cr1,r5,16
  209. bf cr7*4+3,1f
  210. 35: lbz r0,0(r4)
  211. 81: stb r0,0(r3)
  212. addi r7,r7,1
  213. 1: bf cr7*4+2,2f
  214. 36: lhzx r0,r7,r4
  215. 82: sthx r0,r7,r3
  216. addi r7,r7,2
  217. 2: bf cr7*4+1,3f
  218. 37: lwzx r0,r7,r4
  219. 83: stwx r0,r7,r3
  220. 3: PPC_MTOCRF(0x01,r5)
  221. add r4,r6,r4
  222. add r3,r6,r3
  223. b .Ldst_aligned
  224. .Lshort_copy:
  225. bf cr7*4+0,1f
  226. 38: lwz r0,0(r4)
  227. 39: lwz r9,4(r4)
  228. addi r4,r4,8
  229. 84: stw r0,0(r3)
  230. 85: stw r9,4(r3)
  231. addi r3,r3,8
  232. 1: bf cr7*4+1,2f
  233. 40: lwz r0,0(r4)
  234. addi r4,r4,4
  235. 86: stw r0,0(r3)
  236. addi r3,r3,4
  237. 2: bf cr7*4+2,3f
  238. 41: lhz r0,0(r4)
  239. addi r4,r4,2
  240. 87: sth r0,0(r3)
  241. addi r3,r3,2
  242. 3: bf cr7*4+3,4f
  243. 42: lbz r0,0(r4)
  244. 88: stb r0,0(r3)
  245. 4: li r3,0
  246. blr
  247. /*
  248. * exception handlers follow
  249. * we have to return the number of bytes not copied
  250. * for an exception on a load, we set the rest of the destination to 0
  251. */
  252. 136:
  253. 137:
  254. add r3,r3,r7
  255. b 1f
  256. 130:
  257. 131:
  258. addi r3,r3,8
  259. 120:
  260. 320:
  261. 122:
  262. 322:
  263. 124:
  264. 125:
  265. 126:
  266. 127:
  267. 128:
  268. 129:
  269. 133:
  270. addi r3,r3,8
  271. 132:
  272. addi r3,r3,8
  273. 121:
  274. 321:
  275. 344:
  276. 134:
  277. 135:
  278. 138:
  279. 139:
  280. 140:
  281. 141:
  282. 142:
  283. 123:
  284. 144:
  285. 145:
  286. /*
  287. * here we have had a fault on a load and r3 points to the first
  288. * unmodified byte of the destination
  289. */
  290. 1: ld r6,-24(r1)
  291. ld r4,-16(r1)
  292. ld r5,-8(r1)
  293. subf r6,r6,r3
  294. add r4,r4,r6
  295. subf r5,r6,r5 /* #bytes left to go */
  296. /*
  297. * first see if we can copy any more bytes before hitting another exception
  298. */
  299. mtctr r5
  300. 43: lbz r0,0(r4)
  301. addi r4,r4,1
  302. 89: stb r0,0(r3)
  303. addi r3,r3,1
  304. bdnz 43b
  305. li r3,0 /* huh? all copied successfully this time? */
  306. blr
  307. /*
  308. * here we have trapped again, need to clear ctr bytes starting at r3
  309. */
  310. 143: mfctr r5
  311. li r0,0
  312. mr r4,r3
  313. mr r3,r5 /* return the number of bytes not copied */
  314. 1: andi. r9,r4,7
  315. beq 3f
  316. 90: stb r0,0(r4)
  317. addic. r5,r5,-1
  318. addi r4,r4,1
  319. bne 1b
  320. blr
  321. 3: cmpldi cr1,r5,8
  322. srdi r9,r5,3
  323. andi. r5,r5,7
  324. blt cr1,93f
  325. mtctr r9
  326. 91: std r0,0(r4)
  327. addi r4,r4,8
  328. bdnz 91b
  329. 93: beqlr
  330. mtctr r5
  331. 92: stb r0,0(r4)
  332. addi r4,r4,1
  333. bdnz 92b
  334. blr
  335. /*
  336. * exception handlers for stores: we just need to work
  337. * out how many bytes weren't copied
  338. */
  339. 182:
  340. 183:
  341. add r3,r3,r7
  342. b 1f
  343. 371:
  344. 180:
  345. addi r3,r3,8
  346. 171:
  347. 177:
  348. 179:
  349. addi r3,r3,8
  350. 370:
  351. 372:
  352. 176:
  353. 178:
  354. addi r3,r3,4
  355. 185:
  356. addi r3,r3,4
  357. 170:
  358. 172:
  359. 345:
  360. 173:
  361. 174:
  362. 175:
  363. 181:
  364. 184:
  365. 186:
  366. 187:
  367. 188:
  368. 189:
  369. 194:
  370. 195:
  371. 196:
  372. 1:
  373. ld r6,-24(r1)
  374. ld r5,-8(r1)
  375. add r6,r6,r5
  376. subf r3,r3,r6 /* #bytes not copied */
  377. 190:
  378. 191:
  379. 192:
  380. blr /* #bytes not copied in r3 */
  381. .section __ex_table,"a"
  382. .align 3
  383. .llong 20b,120b
  384. .llong 220b,320b
  385. .llong 21b,121b
  386. .llong 221b,321b
  387. .llong 70b,170b
  388. .llong 270b,370b
  389. .llong 22b,122b
  390. .llong 222b,322b
  391. .llong 71b,171b
  392. .llong 271b,371b
  393. .llong 72b,172b
  394. .llong 272b,372b
  395. .llong 244b,344b
  396. .llong 245b,345b
  397. .llong 23b,123b
  398. .llong 73b,173b
  399. .llong 44b,144b
  400. .llong 74b,174b
  401. .llong 45b,145b
  402. .llong 75b,175b
  403. .llong 24b,124b
  404. .llong 25b,125b
  405. .llong 26b,126b
  406. .llong 27b,127b
  407. .llong 28b,128b
  408. .llong 29b,129b
  409. .llong 30b,130b
  410. .llong 31b,131b
  411. .llong 32b,132b
  412. .llong 76b,176b
  413. .llong 33b,133b
  414. .llong 77b,177b
  415. .llong 78b,178b
  416. .llong 79b,179b
  417. .llong 80b,180b
  418. .llong 34b,134b
  419. .llong 94b,194b
  420. .llong 95b,195b
  421. .llong 96b,196b
  422. .llong 35b,135b
  423. .llong 81b,181b
  424. .llong 36b,136b
  425. .llong 82b,182b
  426. .llong 37b,137b
  427. .llong 83b,183b
  428. .llong 38b,138b
  429. .llong 39b,139b
  430. .llong 84b,184b
  431. .llong 85b,185b
  432. .llong 40b,140b
  433. .llong 86b,186b
  434. .llong 41b,141b
  435. .llong 87b,187b
  436. .llong 42b,142b
  437. .llong 88b,188b
  438. .llong 43b,143b
  439. .llong 89b,189b
  440. .llong 90b,190b
  441. .llong 91b,191b
  442. .llong 92b,192b
  443. .text
  444. /*
  445. * Routine to copy a whole page of data, optimized for POWER4.
  446. * On POWER4 it is more than 50% faster than the simple loop
  447. * above (following the .Ldst_aligned label).
  448. */
  449. .Lcopy_page_4K:
  450. std r31,-32(1)
  451. std r30,-40(1)
  452. std r29,-48(1)
  453. std r28,-56(1)
  454. std r27,-64(1)
  455. std r26,-72(1)
  456. std r25,-80(1)
  457. std r24,-88(1)
  458. std r23,-96(1)
  459. std r22,-104(1)
  460. std r21,-112(1)
  461. std r20,-120(1)
  462. li r5,4096/32 - 1
  463. addi r3,r3,-8
  464. li r0,5
  465. 0: addi r5,r5,-24
  466. mtctr r0
  467. 20: ld r22,640(4)
  468. 21: ld r21,512(4)
  469. 22: ld r20,384(4)
  470. 23: ld r11,256(4)
  471. 24: ld r9,128(4)
  472. 25: ld r7,0(4)
  473. 26: ld r25,648(4)
  474. 27: ld r24,520(4)
  475. 28: ld r23,392(4)
  476. 29: ld r10,264(4)
  477. 30: ld r8,136(4)
  478. 31: ldu r6,8(4)
  479. cmpwi r5,24
  480. 1:
  481. 32: std r22,648(3)
  482. 33: std r21,520(3)
  483. 34: std r20,392(3)
  484. 35: std r11,264(3)
  485. 36: std r9,136(3)
  486. 37: std r7,8(3)
  487. 38: ld r28,648(4)
  488. 39: ld r27,520(4)
  489. 40: ld r26,392(4)
  490. 41: ld r31,264(4)
  491. 42: ld r30,136(4)
  492. 43: ld r29,8(4)
  493. 44: std r25,656(3)
  494. 45: std r24,528(3)
  495. 46: std r23,400(3)
  496. 47: std r10,272(3)
  497. 48: std r8,144(3)
  498. 49: std r6,16(3)
  499. 50: ld r22,656(4)
  500. 51: ld r21,528(4)
  501. 52: ld r20,400(4)
  502. 53: ld r11,272(4)
  503. 54: ld r9,144(4)
  504. 55: ld r7,16(4)
  505. 56: std r28,664(3)
  506. 57: std r27,536(3)
  507. 58: std r26,408(3)
  508. 59: std r31,280(3)
  509. 60: std r30,152(3)
  510. 61: stdu r29,24(3)
  511. 62: ld r25,664(4)
  512. 63: ld r24,536(4)
  513. 64: ld r23,408(4)
  514. 65: ld r10,280(4)
  515. 66: ld r8,152(4)
  516. 67: ldu r6,24(4)
  517. bdnz 1b
  518. 68: std r22,648(3)
  519. 69: std r21,520(3)
  520. 70: std r20,392(3)
  521. 71: std r11,264(3)
  522. 72: std r9,136(3)
  523. 73: std r7,8(3)
  524. 74: addi r4,r4,640
  525. 75: addi r3,r3,648
  526. bge 0b
  527. mtctr r5
  528. 76: ld r7,0(4)
  529. 77: ld r8,8(4)
  530. 78: ldu r9,16(4)
  531. 3:
  532. 79: ld r10,8(4)
  533. 80: std r7,8(3)
  534. 81: ld r7,16(4)
  535. 82: std r8,16(3)
  536. 83: ld r8,24(4)
  537. 84: std r9,24(3)
  538. 85: ldu r9,32(4)
  539. 86: stdu r10,32(3)
  540. bdnz 3b
  541. 4:
  542. 87: ld r10,8(4)
  543. 88: std r7,8(3)
  544. 89: std r8,16(3)
  545. 90: std r9,24(3)
  546. 91: std r10,32(3)
  547. 9: ld r20,-120(1)
  548. ld r21,-112(1)
  549. ld r22,-104(1)
  550. ld r23,-96(1)
  551. ld r24,-88(1)
  552. ld r25,-80(1)
  553. ld r26,-72(1)
  554. ld r27,-64(1)
  555. ld r28,-56(1)
  556. ld r29,-48(1)
  557. ld r30,-40(1)
  558. ld r31,-32(1)
  559. li r3,0
  560. blr
  561. /*
  562. * on an exception, reset to the beginning and jump back into the
  563. * standard __copy_tofrom_user
  564. */
  565. 100: ld r20,-120(1)
  566. ld r21,-112(1)
  567. ld r22,-104(1)
  568. ld r23,-96(1)
  569. ld r24,-88(1)
  570. ld r25,-80(1)
  571. ld r26,-72(1)
  572. ld r27,-64(1)
  573. ld r28,-56(1)
  574. ld r29,-48(1)
  575. ld r30,-40(1)
  576. ld r31,-32(1)
  577. ld r3,-24(r1)
  578. ld r4,-16(r1)
  579. li r5,4096
  580. b .Ldst_aligned
  581. .section __ex_table,"a"
  582. .align 3
  583. .llong 20b,100b
  584. .llong 21b,100b
  585. .llong 22b,100b
  586. .llong 23b,100b
  587. .llong 24b,100b
  588. .llong 25b,100b
  589. .llong 26b,100b
  590. .llong 27b,100b
  591. .llong 28b,100b
  592. .llong 29b,100b
  593. .llong 30b,100b
  594. .llong 31b,100b
  595. .llong 32b,100b
  596. .llong 33b,100b
  597. .llong 34b,100b
  598. .llong 35b,100b
  599. .llong 36b,100b
  600. .llong 37b,100b
  601. .llong 38b,100b
  602. .llong 39b,100b
  603. .llong 40b,100b
  604. .llong 41b,100b
  605. .llong 42b,100b
  606. .llong 43b,100b
  607. .llong 44b,100b
  608. .llong 45b,100b
  609. .llong 46b,100b
  610. .llong 47b,100b
  611. .llong 48b,100b
  612. .llong 49b,100b
  613. .llong 50b,100b
  614. .llong 51b,100b
  615. .llong 52b,100b
  616. .llong 53b,100b
  617. .llong 54b,100b
  618. .llong 55b,100b
  619. .llong 56b,100b
  620. .llong 57b,100b
  621. .llong 58b,100b
  622. .llong 59b,100b
  623. .llong 60b,100b
  624. .llong 61b,100b
  625. .llong 62b,100b
  626. .llong 63b,100b
  627. .llong 64b,100b
  628. .llong 65b,100b
  629. .llong 66b,100b
  630. .llong 67b,100b
  631. .llong 68b,100b
  632. .llong 69b,100b
  633. .llong 70b,100b
  634. .llong 71b,100b
  635. .llong 72b,100b
  636. .llong 73b,100b
  637. .llong 74b,100b
  638. .llong 75b,100b
  639. .llong 76b,100b
  640. .llong 77b,100b
  641. .llong 78b,100b
  642. .llong 79b,100b
  643. .llong 80b,100b
  644. .llong 81b,100b
  645. .llong 82b,100b
  646. .llong 83b,100b
  647. .llong 84b,100b
  648. .llong 85b,100b
  649. .llong 86b,100b
  650. .llong 87b,100b
  651. .llong 88b,100b
  652. .llong 89b,100b
  653. .llong 90b,100b
  654. .llong 91b,100b
  655. EXPORT_SYMBOL(__copy_tofrom_user)