unaligned.c 42 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596
  1. /*
  2. * Copyright 2013 Tilera Corporation. All Rights Reserved.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public License
  6. * as published by the Free Software Foundation, version 2.
  7. *
  8. * This program is distributed in the hope that it will be useful, but
  9. * WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
  11. * NON INFRINGEMENT. See the GNU General Public License for
  12. * more details.
  13. *
  14. * A code-rewriter that handles unaligned exception.
  15. */
  16. #include <linux/smp.h>
  17. #include <linux/ptrace.h>
  18. #include <linux/slab.h>
  19. #include <linux/thread_info.h>
  20. #include <linux/uaccess.h>
  21. #include <linux/mman.h>
  22. #include <linux/types.h>
  23. #include <linux/err.h>
  24. #include <linux/module.h>
  25. #include <linux/compat.h>
  26. #include <linux/prctl.h>
  27. #include <asm/cacheflush.h>
  28. #include <asm/traps.h>
  29. #include <asm/uaccess.h>
  30. #include <asm/unaligned.h>
  31. #include <arch/abi.h>
  32. #include <arch/spr_def.h>
  33. #include <arch/opcode.h>
  34. /*
  35. * This file handles unaligned exception for tile-Gx. The tilepro's unaligned
  36. * exception is supported out of single_step.c
  37. */
  38. int unaligned_printk;
  39. static int __init setup_unaligned_printk(char *str)
  40. {
  41. long val;
  42. if (kstrtol(str, 0, &val) != 0)
  43. return 0;
  44. unaligned_printk = val;
  45. pr_info("Printk for each unaligned data accesses is %s\n",
  46. unaligned_printk ? "enabled" : "disabled");
  47. return 1;
  48. }
  49. __setup("unaligned_printk=", setup_unaligned_printk);
  50. unsigned int unaligned_fixup_count;
  51. #ifdef __tilegx__
  52. /*
  53. * Unalign data jit fixup code fragement. Reserved space is 128 bytes.
  54. * The 1st 64-bit word saves fault PC address, 2nd word is the fault
  55. * instruction bundle followed by 14 JIT bundles.
  56. */
  57. struct unaligned_jit_fragment {
  58. unsigned long pc;
  59. tilegx_bundle_bits bundle;
  60. tilegx_bundle_bits insn[14];
  61. };
  62. /*
  63. * Check if a nop or fnop at bundle's pipeline X0.
  64. */
  65. static bool is_bundle_x0_nop(tilegx_bundle_bits bundle)
  66. {
  67. return (((get_UnaryOpcodeExtension_X0(bundle) ==
  68. NOP_UNARY_OPCODE_X0) &&
  69. (get_RRROpcodeExtension_X0(bundle) ==
  70. UNARY_RRR_0_OPCODE_X0) &&
  71. (get_Opcode_X0(bundle) ==
  72. RRR_0_OPCODE_X0)) ||
  73. ((get_UnaryOpcodeExtension_X0(bundle) ==
  74. FNOP_UNARY_OPCODE_X0) &&
  75. (get_RRROpcodeExtension_X0(bundle) ==
  76. UNARY_RRR_0_OPCODE_X0) &&
  77. (get_Opcode_X0(bundle) ==
  78. RRR_0_OPCODE_X0)));
  79. }
  80. /*
  81. * Check if nop or fnop at bundle's pipeline X1.
  82. */
  83. static bool is_bundle_x1_nop(tilegx_bundle_bits bundle)
  84. {
  85. return (((get_UnaryOpcodeExtension_X1(bundle) ==
  86. NOP_UNARY_OPCODE_X1) &&
  87. (get_RRROpcodeExtension_X1(bundle) ==
  88. UNARY_RRR_0_OPCODE_X1) &&
  89. (get_Opcode_X1(bundle) ==
  90. RRR_0_OPCODE_X1)) ||
  91. ((get_UnaryOpcodeExtension_X1(bundle) ==
  92. FNOP_UNARY_OPCODE_X1) &&
  93. (get_RRROpcodeExtension_X1(bundle) ==
  94. UNARY_RRR_0_OPCODE_X1) &&
  95. (get_Opcode_X1(bundle) ==
  96. RRR_0_OPCODE_X1)));
  97. }
  98. /*
  99. * Check if nop or fnop at bundle's Y0 pipeline.
  100. */
  101. static bool is_bundle_y0_nop(tilegx_bundle_bits bundle)
  102. {
  103. return (((get_UnaryOpcodeExtension_Y0(bundle) ==
  104. NOP_UNARY_OPCODE_Y0) &&
  105. (get_RRROpcodeExtension_Y0(bundle) ==
  106. UNARY_RRR_1_OPCODE_Y0) &&
  107. (get_Opcode_Y0(bundle) ==
  108. RRR_1_OPCODE_Y0)) ||
  109. ((get_UnaryOpcodeExtension_Y0(bundle) ==
  110. FNOP_UNARY_OPCODE_Y0) &&
  111. (get_RRROpcodeExtension_Y0(bundle) ==
  112. UNARY_RRR_1_OPCODE_Y0) &&
  113. (get_Opcode_Y0(bundle) ==
  114. RRR_1_OPCODE_Y0)));
  115. }
  116. /*
  117. * Check if nop or fnop at bundle's pipeline Y1.
  118. */
  119. static bool is_bundle_y1_nop(tilegx_bundle_bits bundle)
  120. {
  121. return (((get_UnaryOpcodeExtension_Y1(bundle) ==
  122. NOP_UNARY_OPCODE_Y1) &&
  123. (get_RRROpcodeExtension_Y1(bundle) ==
  124. UNARY_RRR_1_OPCODE_Y1) &&
  125. (get_Opcode_Y1(bundle) ==
  126. RRR_1_OPCODE_Y1)) ||
  127. ((get_UnaryOpcodeExtension_Y1(bundle) ==
  128. FNOP_UNARY_OPCODE_Y1) &&
  129. (get_RRROpcodeExtension_Y1(bundle) ==
  130. UNARY_RRR_1_OPCODE_Y1) &&
  131. (get_Opcode_Y1(bundle) ==
  132. RRR_1_OPCODE_Y1)));
  133. }
  134. /*
  135. * Test if a bundle's y0 and y1 pipelines are both nop or fnop.
  136. */
  137. static bool is_y0_y1_nop(tilegx_bundle_bits bundle)
  138. {
  139. return is_bundle_y0_nop(bundle) && is_bundle_y1_nop(bundle);
  140. }
  141. /*
  142. * Test if a bundle's x0 and x1 pipelines are both nop or fnop.
  143. */
  144. static bool is_x0_x1_nop(tilegx_bundle_bits bundle)
  145. {
  146. return is_bundle_x0_nop(bundle) && is_bundle_x1_nop(bundle);
  147. }
  148. /*
  149. * Find the destination, source registers of fault unalign access instruction
  150. * at X1 or Y2. Also, allocate up to 3 scratch registers clob1, clob2 and
  151. * clob3, which are guaranteed different from any register used in the fault
  152. * bundle. r_alias is used to return if the other instructions other than the
  153. * unalign load/store shares same register with ra, rb and rd.
  154. */
  155. static void find_regs(tilegx_bundle_bits bundle, uint64_t *rd, uint64_t *ra,
  156. uint64_t *rb, uint64_t *clob1, uint64_t *clob2,
  157. uint64_t *clob3, bool *r_alias)
  158. {
  159. int i;
  160. uint64_t reg;
  161. uint64_t reg_map = 0, alias_reg_map = 0, map;
  162. bool alias = false;
  163. /*
  164. * Parse fault bundle, find potential used registers and mark
  165. * corresponding bits in reg_map and alias_map. These 2 bit maps
  166. * are used to find the scratch registers and determine if there
  167. * is register alias.
  168. */
  169. if (bundle & TILEGX_BUNDLE_MODE_MASK) { /* Y Mode Bundle. */
  170. reg = get_SrcA_Y2(bundle);
  171. reg_map |= 1ULL << reg;
  172. *ra = reg;
  173. reg = get_SrcBDest_Y2(bundle);
  174. reg_map |= 1ULL << reg;
  175. if (rd) {
  176. /* Load. */
  177. *rd = reg;
  178. alias_reg_map = (1ULL << *rd) | (1ULL << *ra);
  179. } else {
  180. /* Store. */
  181. *rb = reg;
  182. alias_reg_map = (1ULL << *ra) | (1ULL << *rb);
  183. }
  184. if (!is_bundle_y1_nop(bundle)) {
  185. reg = get_SrcA_Y1(bundle);
  186. reg_map |= (1ULL << reg);
  187. map = (1ULL << reg);
  188. reg = get_SrcB_Y1(bundle);
  189. reg_map |= (1ULL << reg);
  190. map |= (1ULL << reg);
  191. reg = get_Dest_Y1(bundle);
  192. reg_map |= (1ULL << reg);
  193. map |= (1ULL << reg);
  194. if (map & alias_reg_map)
  195. alias = true;
  196. }
  197. if (!is_bundle_y0_nop(bundle)) {
  198. reg = get_SrcA_Y0(bundle);
  199. reg_map |= (1ULL << reg);
  200. map = (1ULL << reg);
  201. reg = get_SrcB_Y0(bundle);
  202. reg_map |= (1ULL << reg);
  203. map |= (1ULL << reg);
  204. reg = get_Dest_Y0(bundle);
  205. reg_map |= (1ULL << reg);
  206. map |= (1ULL << reg);
  207. if (map & alias_reg_map)
  208. alias = true;
  209. }
  210. } else { /* X Mode Bundle. */
  211. reg = get_SrcA_X1(bundle);
  212. reg_map |= (1ULL << reg);
  213. *ra = reg;
  214. if (rd) {
  215. /* Load. */
  216. reg = get_Dest_X1(bundle);
  217. reg_map |= (1ULL << reg);
  218. *rd = reg;
  219. alias_reg_map = (1ULL << *rd) | (1ULL << *ra);
  220. } else {
  221. /* Store. */
  222. reg = get_SrcB_X1(bundle);
  223. reg_map |= (1ULL << reg);
  224. *rb = reg;
  225. alias_reg_map = (1ULL << *ra) | (1ULL << *rb);
  226. }
  227. if (!is_bundle_x0_nop(bundle)) {
  228. reg = get_SrcA_X0(bundle);
  229. reg_map |= (1ULL << reg);
  230. map = (1ULL << reg);
  231. reg = get_SrcB_X0(bundle);
  232. reg_map |= (1ULL << reg);
  233. map |= (1ULL << reg);
  234. reg = get_Dest_X0(bundle);
  235. reg_map |= (1ULL << reg);
  236. map |= (1ULL << reg);
  237. if (map & alias_reg_map)
  238. alias = true;
  239. }
  240. }
  241. /*
  242. * "alias" indicates if the unalign access registers have collision
  243. * with others in the same bundle. We jsut simply test all register
  244. * operands case (RRR), ignored the case with immidate. If a bundle
  245. * has no register alias, we may do fixup in a simple or fast manner.
  246. * So if an immidata field happens to hit with a register, we may end
  247. * up fall back to the generic handling.
  248. */
  249. *r_alias = alias;
  250. /* Flip bits on reg_map. */
  251. reg_map ^= -1ULL;
  252. /* Scan reg_map lower 54(TREG_SP) bits to find 3 set bits. */
  253. for (i = 0; i < TREG_SP; i++) {
  254. if (reg_map & (0x1ULL << i)) {
  255. if (*clob1 == -1) {
  256. *clob1 = i;
  257. } else if (*clob2 == -1) {
  258. *clob2 = i;
  259. } else if (*clob3 == -1) {
  260. *clob3 = i;
  261. return;
  262. }
  263. }
  264. }
  265. }
  266. /*
  267. * Sanity check for register ra, rb, rd, clob1/2/3. Return true if any of them
  268. * is unexpected.
  269. */
  270. static bool check_regs(uint64_t rd, uint64_t ra, uint64_t rb,
  271. uint64_t clob1, uint64_t clob2, uint64_t clob3)
  272. {
  273. bool unexpected = false;
  274. if ((ra >= 56) && (ra != TREG_ZERO))
  275. unexpected = true;
  276. if ((clob1 >= 56) || (clob2 >= 56) || (clob3 >= 56))
  277. unexpected = true;
  278. if (rd != -1) {
  279. if ((rd >= 56) && (rd != TREG_ZERO))
  280. unexpected = true;
  281. } else {
  282. if ((rb >= 56) && (rb != TREG_ZERO))
  283. unexpected = true;
  284. }
  285. return unexpected;
  286. }
  287. #define GX_INSN_X0_MASK ((1ULL << 31) - 1)
  288. #define GX_INSN_X1_MASK (((1ULL << 31) - 1) << 31)
  289. #define GX_INSN_Y0_MASK ((0xFULL << 27) | (0xFFFFFULL))
  290. #define GX_INSN_Y1_MASK (GX_INSN_Y0_MASK << 31)
  291. #define GX_INSN_Y2_MASK ((0x7FULL << 51) | (0x7FULL << 20))
  292. #ifdef __LITTLE_ENDIAN
  293. #define GX_INSN_BSWAP(_bundle_) (_bundle_)
  294. #else
  295. #define GX_INSN_BSWAP(_bundle_) swab64(_bundle_)
  296. #endif /* __LITTLE_ENDIAN */
  297. /*
  298. * __JIT_CODE(.) creates template bundles in .rodata.unalign_data section.
  299. * The corresponding static function jix_x#_###(.) generates partial or
  300. * whole bundle based on the template and given arguments.
  301. */
  302. #define __JIT_CODE(_X_) \
  303. asm (".pushsection .rodata.unalign_data, \"a\"\n" \
  304. _X_"\n" \
  305. ".popsection\n")
  306. __JIT_CODE("__unalign_jit_x1_mtspr: {mtspr 0, r0}");
  307. static tilegx_bundle_bits jit_x1_mtspr(int spr, int reg)
  308. {
  309. extern tilegx_bundle_bits __unalign_jit_x1_mtspr;
  310. return (GX_INSN_BSWAP(__unalign_jit_x1_mtspr) & GX_INSN_X1_MASK) |
  311. create_MT_Imm14_X1(spr) | create_SrcA_X1(reg);
  312. }
  313. __JIT_CODE("__unalign_jit_x1_mfspr: {mfspr r0, 0}");
  314. static tilegx_bundle_bits jit_x1_mfspr(int reg, int spr)
  315. {
  316. extern tilegx_bundle_bits __unalign_jit_x1_mfspr;
  317. return (GX_INSN_BSWAP(__unalign_jit_x1_mfspr) & GX_INSN_X1_MASK) |
  318. create_MF_Imm14_X1(spr) | create_Dest_X1(reg);
  319. }
  320. __JIT_CODE("__unalign_jit_x0_addi: {addi r0, r0, 0; iret}");
  321. static tilegx_bundle_bits jit_x0_addi(int rd, int ra, int imm8)
  322. {
  323. extern tilegx_bundle_bits __unalign_jit_x0_addi;
  324. return (GX_INSN_BSWAP(__unalign_jit_x0_addi) & GX_INSN_X0_MASK) |
  325. create_Dest_X0(rd) | create_SrcA_X0(ra) |
  326. create_Imm8_X0(imm8);
  327. }
  328. __JIT_CODE("__unalign_jit_x1_ldna: {ldna r0, r0}");
  329. static tilegx_bundle_bits jit_x1_ldna(int rd, int ra)
  330. {
  331. extern tilegx_bundle_bits __unalign_jit_x1_ldna;
  332. return (GX_INSN_BSWAP(__unalign_jit_x1_ldna) & GX_INSN_X1_MASK) |
  333. create_Dest_X1(rd) | create_SrcA_X1(ra);
  334. }
  335. __JIT_CODE("__unalign_jit_x0_dblalign: {dblalign r0, r0 ,r0}");
  336. static tilegx_bundle_bits jit_x0_dblalign(int rd, int ra, int rb)
  337. {
  338. extern tilegx_bundle_bits __unalign_jit_x0_dblalign;
  339. return (GX_INSN_BSWAP(__unalign_jit_x0_dblalign) & GX_INSN_X0_MASK) |
  340. create_Dest_X0(rd) | create_SrcA_X0(ra) |
  341. create_SrcB_X0(rb);
  342. }
  343. __JIT_CODE("__unalign_jit_x1_iret: {iret}");
  344. static tilegx_bundle_bits jit_x1_iret(void)
  345. {
  346. extern tilegx_bundle_bits __unalign_jit_x1_iret;
  347. return GX_INSN_BSWAP(__unalign_jit_x1_iret) & GX_INSN_X1_MASK;
  348. }
  349. __JIT_CODE("__unalign_jit_x01_fnop: {fnop;fnop}");
  350. static tilegx_bundle_bits jit_x0_fnop(void)
  351. {
  352. extern tilegx_bundle_bits __unalign_jit_x01_fnop;
  353. return GX_INSN_BSWAP(__unalign_jit_x01_fnop) & GX_INSN_X0_MASK;
  354. }
  355. static tilegx_bundle_bits jit_x1_fnop(void)
  356. {
  357. extern tilegx_bundle_bits __unalign_jit_x01_fnop;
  358. return GX_INSN_BSWAP(__unalign_jit_x01_fnop) & GX_INSN_X1_MASK;
  359. }
  360. __JIT_CODE("__unalign_jit_y2_dummy: {fnop; fnop; ld zero, sp}");
  361. static tilegx_bundle_bits jit_y2_dummy(void)
  362. {
  363. extern tilegx_bundle_bits __unalign_jit_y2_dummy;
  364. return GX_INSN_BSWAP(__unalign_jit_y2_dummy) & GX_INSN_Y2_MASK;
  365. }
  366. static tilegx_bundle_bits jit_y1_fnop(void)
  367. {
  368. extern tilegx_bundle_bits __unalign_jit_y2_dummy;
  369. return GX_INSN_BSWAP(__unalign_jit_y2_dummy) & GX_INSN_Y1_MASK;
  370. }
  371. __JIT_CODE("__unalign_jit_x1_st1_add: {st1_add r1, r0, 0}");
  372. static tilegx_bundle_bits jit_x1_st1_add(int ra, int rb, int imm8)
  373. {
  374. extern tilegx_bundle_bits __unalign_jit_x1_st1_add;
  375. return (GX_INSN_BSWAP(__unalign_jit_x1_st1_add) &
  376. (~create_SrcA_X1(-1)) &
  377. GX_INSN_X1_MASK) | create_SrcA_X1(ra) |
  378. create_SrcB_X1(rb) | create_Dest_Imm8_X1(imm8);
  379. }
  380. __JIT_CODE("__unalign_jit_x1_st: {crc32_8 r1, r0, r0; st r0, r0}");
  381. static tilegx_bundle_bits jit_x1_st(int ra, int rb)
  382. {
  383. extern tilegx_bundle_bits __unalign_jit_x1_st;
  384. return (GX_INSN_BSWAP(__unalign_jit_x1_st) & GX_INSN_X1_MASK) |
  385. create_SrcA_X1(ra) | create_SrcB_X1(rb);
  386. }
  387. __JIT_CODE("__unalign_jit_x1_st_add: {st_add r1, r0, 0}");
  388. static tilegx_bundle_bits jit_x1_st_add(int ra, int rb, int imm8)
  389. {
  390. extern tilegx_bundle_bits __unalign_jit_x1_st_add;
  391. return (GX_INSN_BSWAP(__unalign_jit_x1_st_add) &
  392. (~create_SrcA_X1(-1)) &
  393. GX_INSN_X1_MASK) | create_SrcA_X1(ra) |
  394. create_SrcB_X1(rb) | create_Dest_Imm8_X1(imm8);
  395. }
  396. __JIT_CODE("__unalign_jit_x1_ld: {crc32_8 r1, r0, r0; ld r0, r0}");
  397. static tilegx_bundle_bits jit_x1_ld(int rd, int ra)
  398. {
  399. extern tilegx_bundle_bits __unalign_jit_x1_ld;
  400. return (GX_INSN_BSWAP(__unalign_jit_x1_ld) & GX_INSN_X1_MASK) |
  401. create_Dest_X1(rd) | create_SrcA_X1(ra);
  402. }
  403. __JIT_CODE("__unalign_jit_x1_ld_add: {ld_add r1, r0, 0}");
  404. static tilegx_bundle_bits jit_x1_ld_add(int rd, int ra, int imm8)
  405. {
  406. extern tilegx_bundle_bits __unalign_jit_x1_ld_add;
  407. return (GX_INSN_BSWAP(__unalign_jit_x1_ld_add) &
  408. (~create_Dest_X1(-1)) &
  409. GX_INSN_X1_MASK) | create_Dest_X1(rd) |
  410. create_SrcA_X1(ra) | create_Imm8_X1(imm8);
  411. }
  412. __JIT_CODE("__unalign_jit_x0_bfexts: {bfexts r0, r0, 0, 0}");
  413. static tilegx_bundle_bits jit_x0_bfexts(int rd, int ra, int bfs, int bfe)
  414. {
  415. extern tilegx_bundle_bits __unalign_jit_x0_bfexts;
  416. return (GX_INSN_BSWAP(__unalign_jit_x0_bfexts) &
  417. GX_INSN_X0_MASK) |
  418. create_Dest_X0(rd) | create_SrcA_X0(ra) |
  419. create_BFStart_X0(bfs) | create_BFEnd_X0(bfe);
  420. }
  421. __JIT_CODE("__unalign_jit_x0_bfextu: {bfextu r0, r0, 0, 0}");
  422. static tilegx_bundle_bits jit_x0_bfextu(int rd, int ra, int bfs, int bfe)
  423. {
  424. extern tilegx_bundle_bits __unalign_jit_x0_bfextu;
  425. return (GX_INSN_BSWAP(__unalign_jit_x0_bfextu) &
  426. GX_INSN_X0_MASK) |
  427. create_Dest_X0(rd) | create_SrcA_X0(ra) |
  428. create_BFStart_X0(bfs) | create_BFEnd_X0(bfe);
  429. }
  430. __JIT_CODE("__unalign_jit_x1_addi: {bfextu r1, r1, 0, 0; addi r0, r0, 0}");
  431. static tilegx_bundle_bits jit_x1_addi(int rd, int ra, int imm8)
  432. {
  433. extern tilegx_bundle_bits __unalign_jit_x1_addi;
  434. return (GX_INSN_BSWAP(__unalign_jit_x1_addi) & GX_INSN_X1_MASK) |
  435. create_Dest_X1(rd) | create_SrcA_X1(ra) |
  436. create_Imm8_X1(imm8);
  437. }
  438. __JIT_CODE("__unalign_jit_x0_shrui: {shrui r0, r0, 0; iret}");
  439. static tilegx_bundle_bits jit_x0_shrui(int rd, int ra, int imm6)
  440. {
  441. extern tilegx_bundle_bits __unalign_jit_x0_shrui;
  442. return (GX_INSN_BSWAP(__unalign_jit_x0_shrui) &
  443. GX_INSN_X0_MASK) |
  444. create_Dest_X0(rd) | create_SrcA_X0(ra) |
  445. create_ShAmt_X0(imm6);
  446. }
  447. __JIT_CODE("__unalign_jit_x0_rotli: {rotli r0, r0, 0; iret}");
  448. static tilegx_bundle_bits jit_x0_rotli(int rd, int ra, int imm6)
  449. {
  450. extern tilegx_bundle_bits __unalign_jit_x0_rotli;
  451. return (GX_INSN_BSWAP(__unalign_jit_x0_rotli) &
  452. GX_INSN_X0_MASK) |
  453. create_Dest_X0(rd) | create_SrcA_X0(ra) |
  454. create_ShAmt_X0(imm6);
  455. }
  456. __JIT_CODE("__unalign_jit_x1_bnezt: {bnezt r0, __unalign_jit_x1_bnezt}");
  457. static tilegx_bundle_bits jit_x1_bnezt(int ra, int broff)
  458. {
  459. extern tilegx_bundle_bits __unalign_jit_x1_bnezt;
  460. return (GX_INSN_BSWAP(__unalign_jit_x1_bnezt) &
  461. GX_INSN_X1_MASK) |
  462. create_SrcA_X1(ra) | create_BrOff_X1(broff);
  463. }
  464. #undef __JIT_CODE
  465. /*
  466. * This function generates unalign fixup JIT.
  467. *
  468. * We first find unalign load/store instruction's destination, source
  469. * registers: ra, rb and rd. and 3 scratch registers by calling
  470. * find_regs(...). 3 scratch clobbers should not alias with any register
  471. * used in the fault bundle. Then analyze the fault bundle to determine
  472. * if it's a load or store, operand width, branch or address increment etc.
  473. * At last generated JIT is copied into JIT code area in user space.
  474. */
  475. static
  476. void jit_bundle_gen(struct pt_regs *regs, tilegx_bundle_bits bundle,
  477. int align_ctl)
  478. {
  479. struct thread_info *info = current_thread_info();
  480. struct unaligned_jit_fragment frag;
  481. struct unaligned_jit_fragment *jit_code_area;
  482. tilegx_bundle_bits bundle_2 = 0;
  483. /* If bundle_2_enable = false, bundle_2 is fnop/nop operation. */
  484. bool bundle_2_enable = true;
  485. uint64_t ra = -1, rb = -1, rd = -1, clob1 = -1, clob2 = -1, clob3 = -1;
  486. /*
  487. * Indicate if the unalign access
  488. * instruction's registers hit with
  489. * others in the same bundle.
  490. */
  491. bool alias = false;
  492. bool load_n_store = true;
  493. bool load_store_signed = false;
  494. unsigned int load_store_size = 8;
  495. bool y1_br = false; /* True, for a branch in same bundle at Y1.*/
  496. int y1_br_reg = 0;
  497. /* True for link operation. i.e. jalr or lnk at Y1 */
  498. bool y1_lr = false;
  499. int y1_lr_reg = 0;
  500. bool x1_add = false;/* True, for load/store ADD instruction at X1*/
  501. int x1_add_imm8 = 0;
  502. bool unexpected = false;
  503. int n = 0, k;
  504. jit_code_area =
  505. (struct unaligned_jit_fragment *)(info->unalign_jit_base);
  506. memset((void *)&frag, 0, sizeof(frag));
  507. /* 0: X mode, Otherwise: Y mode. */
  508. if (bundle & TILEGX_BUNDLE_MODE_MASK) {
  509. unsigned int mod, opcode;
  510. if (get_Opcode_Y1(bundle) == RRR_1_OPCODE_Y1 &&
  511. get_RRROpcodeExtension_Y1(bundle) ==
  512. UNARY_RRR_1_OPCODE_Y1) {
  513. opcode = get_UnaryOpcodeExtension_Y1(bundle);
  514. /*
  515. * Test "jalr", "jalrp", "jr", "jrp" instruction at Y1
  516. * pipeline.
  517. */
  518. switch (opcode) {
  519. case JALR_UNARY_OPCODE_Y1:
  520. case JALRP_UNARY_OPCODE_Y1:
  521. y1_lr = true;
  522. y1_lr_reg = 55; /* Link register. */
  523. /* FALLTHROUGH */
  524. case JR_UNARY_OPCODE_Y1:
  525. case JRP_UNARY_OPCODE_Y1:
  526. y1_br = true;
  527. y1_br_reg = get_SrcA_Y1(bundle);
  528. break;
  529. case LNK_UNARY_OPCODE_Y1:
  530. /* "lnk" at Y1 pipeline. */
  531. y1_lr = true;
  532. y1_lr_reg = get_Dest_Y1(bundle);
  533. break;
  534. }
  535. }
  536. opcode = get_Opcode_Y2(bundle);
  537. mod = get_Mode(bundle);
  538. /*
  539. * bundle_2 is bundle after making Y2 as a dummy operation
  540. * - ld zero, sp
  541. */
  542. bundle_2 = (bundle & (~GX_INSN_Y2_MASK)) | jit_y2_dummy();
  543. /* Make Y1 as fnop if Y1 is a branch or lnk operation. */
  544. if (y1_br || y1_lr) {
  545. bundle_2 &= ~(GX_INSN_Y1_MASK);
  546. bundle_2 |= jit_y1_fnop();
  547. }
  548. if (is_y0_y1_nop(bundle_2))
  549. bundle_2_enable = false;
  550. if (mod == MODE_OPCODE_YC2) {
  551. /* Store. */
  552. load_n_store = false;
  553. load_store_size = 1 << opcode;
  554. load_store_signed = false;
  555. find_regs(bundle, 0, &ra, &rb, &clob1, &clob2,
  556. &clob3, &alias);
  557. if (load_store_size > 8)
  558. unexpected = true;
  559. } else {
  560. /* Load. */
  561. load_n_store = true;
  562. if (mod == MODE_OPCODE_YB2) {
  563. switch (opcode) {
  564. case LD_OPCODE_Y2:
  565. load_store_signed = false;
  566. load_store_size = 8;
  567. break;
  568. case LD4S_OPCODE_Y2:
  569. load_store_signed = true;
  570. load_store_size = 4;
  571. break;
  572. case LD4U_OPCODE_Y2:
  573. load_store_signed = false;
  574. load_store_size = 4;
  575. break;
  576. default:
  577. unexpected = true;
  578. }
  579. } else if (mod == MODE_OPCODE_YA2) {
  580. if (opcode == LD2S_OPCODE_Y2) {
  581. load_store_signed = true;
  582. load_store_size = 2;
  583. } else if (opcode == LD2U_OPCODE_Y2) {
  584. load_store_signed = false;
  585. load_store_size = 2;
  586. } else
  587. unexpected = true;
  588. } else
  589. unexpected = true;
  590. find_regs(bundle, &rd, &ra, &rb, &clob1, &clob2,
  591. &clob3, &alias);
  592. }
  593. } else {
  594. unsigned int opcode;
  595. /* bundle_2 is bundle after making X1 as "fnop". */
  596. bundle_2 = (bundle & (~GX_INSN_X1_MASK)) | jit_x1_fnop();
  597. if (is_x0_x1_nop(bundle_2))
  598. bundle_2_enable = false;
  599. if (get_Opcode_X1(bundle) == RRR_0_OPCODE_X1) {
  600. opcode = get_UnaryOpcodeExtension_X1(bundle);
  601. if (get_RRROpcodeExtension_X1(bundle) ==
  602. UNARY_RRR_0_OPCODE_X1) {
  603. load_n_store = true;
  604. find_regs(bundle, &rd, &ra, &rb, &clob1,
  605. &clob2, &clob3, &alias);
  606. switch (opcode) {
  607. case LD_UNARY_OPCODE_X1:
  608. load_store_signed = false;
  609. load_store_size = 8;
  610. break;
  611. case LD4S_UNARY_OPCODE_X1:
  612. load_store_signed = true;
  613. /* FALLTHROUGH */
  614. case LD4U_UNARY_OPCODE_X1:
  615. load_store_size = 4;
  616. break;
  617. case LD2S_UNARY_OPCODE_X1:
  618. load_store_signed = true;
  619. /* FALLTHROUGH */
  620. case LD2U_UNARY_OPCODE_X1:
  621. load_store_size = 2;
  622. break;
  623. default:
  624. unexpected = true;
  625. }
  626. } else {
  627. load_n_store = false;
  628. load_store_signed = false;
  629. find_regs(bundle, 0, &ra, &rb,
  630. &clob1, &clob2, &clob3,
  631. &alias);
  632. opcode = get_RRROpcodeExtension_X1(bundle);
  633. switch (opcode) {
  634. case ST_RRR_0_OPCODE_X1:
  635. load_store_size = 8;
  636. break;
  637. case ST4_RRR_0_OPCODE_X1:
  638. load_store_size = 4;
  639. break;
  640. case ST2_RRR_0_OPCODE_X1:
  641. load_store_size = 2;
  642. break;
  643. default:
  644. unexpected = true;
  645. }
  646. }
  647. } else if (get_Opcode_X1(bundle) == IMM8_OPCODE_X1) {
  648. load_n_store = true;
  649. opcode = get_Imm8OpcodeExtension_X1(bundle);
  650. switch (opcode) {
  651. case LD_ADD_IMM8_OPCODE_X1:
  652. load_store_size = 8;
  653. break;
  654. case LD4S_ADD_IMM8_OPCODE_X1:
  655. load_store_signed = true;
  656. /* FALLTHROUGH */
  657. case LD4U_ADD_IMM8_OPCODE_X1:
  658. load_store_size = 4;
  659. break;
  660. case LD2S_ADD_IMM8_OPCODE_X1:
  661. load_store_signed = true;
  662. /* FALLTHROUGH */
  663. case LD2U_ADD_IMM8_OPCODE_X1:
  664. load_store_size = 2;
  665. break;
  666. case ST_ADD_IMM8_OPCODE_X1:
  667. load_n_store = false;
  668. load_store_size = 8;
  669. break;
  670. case ST4_ADD_IMM8_OPCODE_X1:
  671. load_n_store = false;
  672. load_store_size = 4;
  673. break;
  674. case ST2_ADD_IMM8_OPCODE_X1:
  675. load_n_store = false;
  676. load_store_size = 2;
  677. break;
  678. default:
  679. unexpected = true;
  680. }
  681. if (!unexpected) {
  682. x1_add = true;
  683. if (load_n_store)
  684. x1_add_imm8 = get_Imm8_X1(bundle);
  685. else
  686. x1_add_imm8 = get_Dest_Imm8_X1(bundle);
  687. }
  688. find_regs(bundle, load_n_store ? (&rd) : NULL,
  689. &ra, &rb, &clob1, &clob2, &clob3, &alias);
  690. } else
  691. unexpected = true;
  692. }
  693. /*
  694. * Some sanity check for register numbers extracted from fault bundle.
  695. */
  696. if (check_regs(rd, ra, rb, clob1, clob2, clob3) == true)
  697. unexpected = true;
  698. /* Give warning if register ra has an aligned address. */
  699. if (!unexpected)
  700. WARN_ON(!((load_store_size - 1) & (regs->regs[ra])));
  701. /*
  702. * Fault came from kernel space, here we only need take care of
  703. * unaligned "get_user/put_user" macros defined in "uaccess.h".
  704. * Basically, we will handle bundle like this:
  705. * {ld/2u/4s rd, ra; movei rx, 0} or {st/2/4 ra, rb; movei rx, 0}
  706. * (Refer to file "arch/tile/include/asm/uaccess.h" for details).
  707. * For either load or store, byte-wise operation is performed by calling
  708. * get_user() or put_user(). If the macro returns non-zero value,
  709. * set the value to rx, otherwise set zero to rx. Finally make pc point
  710. * to next bundle and return.
  711. */
  712. if (EX1_PL(regs->ex1) != USER_PL) {
  713. unsigned long rx = 0;
  714. unsigned long x = 0, ret = 0;
  715. if (y1_br || y1_lr || x1_add ||
  716. (load_store_signed !=
  717. (load_n_store && load_store_size == 4))) {
  718. /* No branch, link, wrong sign-ext or load/store add. */
  719. unexpected = true;
  720. } else if (!unexpected) {
  721. if (bundle & TILEGX_BUNDLE_MODE_MASK) {
  722. /*
  723. * Fault bundle is Y mode.
  724. * Check if the Y1 and Y0 is the form of
  725. * { movei rx, 0; nop/fnop }, if yes,
  726. * find the rx.
  727. */
  728. if ((get_Opcode_Y1(bundle) == ADDI_OPCODE_Y1)
  729. && (get_SrcA_Y1(bundle) == TREG_ZERO) &&
  730. (get_Imm8_Y1(bundle) == 0) &&
  731. is_bundle_y0_nop(bundle)) {
  732. rx = get_Dest_Y1(bundle);
  733. } else if ((get_Opcode_Y0(bundle) ==
  734. ADDI_OPCODE_Y0) &&
  735. (get_SrcA_Y0(bundle) == TREG_ZERO) &&
  736. (get_Imm8_Y0(bundle) == 0) &&
  737. is_bundle_y1_nop(bundle)) {
  738. rx = get_Dest_Y0(bundle);
  739. } else {
  740. unexpected = true;
  741. }
  742. } else {
  743. /*
  744. * Fault bundle is X mode.
  745. * Check if the X0 is 'movei rx, 0',
  746. * if yes, find the rx.
  747. */
  748. if ((get_Opcode_X0(bundle) == IMM8_OPCODE_X0)
  749. && (get_Imm8OpcodeExtension_X0(bundle) ==
  750. ADDI_IMM8_OPCODE_X0) &&
  751. (get_SrcA_X0(bundle) == TREG_ZERO) &&
  752. (get_Imm8_X0(bundle) == 0)) {
  753. rx = get_Dest_X0(bundle);
  754. } else {
  755. unexpected = true;
  756. }
  757. }
  758. /* rx should be less than 56. */
  759. if (!unexpected && (rx >= 56))
  760. unexpected = true;
  761. }
  762. if (!search_exception_tables(regs->pc)) {
  763. /* No fixup in the exception tables for the pc. */
  764. unexpected = true;
  765. }
  766. if (unexpected) {
  767. /* Unexpected unalign kernel fault. */
  768. struct task_struct *tsk = validate_current();
  769. bust_spinlocks(1);
  770. show_regs(regs);
  771. if (unlikely(tsk->pid < 2)) {
  772. panic("Kernel unalign fault running %s!",
  773. tsk->pid ? "init" : "the idle task");
  774. }
  775. #ifdef SUPPORT_DIE
  776. die("Oops", regs);
  777. #endif
  778. bust_spinlocks(1);
  779. do_group_exit(SIGKILL);
  780. } else {
  781. unsigned long i, b = 0;
  782. unsigned char *ptr =
  783. (unsigned char *)regs->regs[ra];
  784. if (load_n_store) {
  785. /* handle get_user(x, ptr) */
  786. for (i = 0; i < load_store_size; i++) {
  787. ret = get_user(b, ptr++);
  788. if (!ret) {
  789. /* Success! update x. */
  790. #ifdef __LITTLE_ENDIAN
  791. x |= (b << (8 * i));
  792. #else
  793. x <<= 8;
  794. x |= b;
  795. #endif /* __LITTLE_ENDIAN */
  796. } else {
  797. x = 0;
  798. break;
  799. }
  800. }
  801. /* Sign-extend 4-byte loads. */
  802. if (load_store_size == 4)
  803. x = (long)(int)x;
  804. /* Set register rd. */
  805. regs->regs[rd] = x;
  806. /* Set register rx. */
  807. regs->regs[rx] = ret;
  808. /* Bump pc. */
  809. regs->pc += 8;
  810. } else {
  811. /* Handle put_user(x, ptr) */
  812. x = regs->regs[rb];
  813. #ifdef __LITTLE_ENDIAN
  814. b = x;
  815. #else
  816. /*
  817. * Swap x in order to store x from low
  818. * to high memory same as the
  819. * little-endian case.
  820. */
  821. switch (load_store_size) {
  822. case 8:
  823. b = swab64(x);
  824. break;
  825. case 4:
  826. b = swab32(x);
  827. break;
  828. case 2:
  829. b = swab16(x);
  830. break;
  831. }
  832. #endif /* __LITTLE_ENDIAN */
  833. for (i = 0; i < load_store_size; i++) {
  834. ret = put_user(b, ptr++);
  835. if (ret)
  836. break;
  837. /* Success! shift 1 byte. */
  838. b >>= 8;
  839. }
  840. /* Set register rx. */
  841. regs->regs[rx] = ret;
  842. /* Bump pc. */
  843. regs->pc += 8;
  844. }
  845. }
  846. unaligned_fixup_count++;
  847. if (unaligned_printk) {
  848. pr_info("%s/%d - Unalign fixup for kernel access to userspace %lx\n",
  849. current->comm, current->pid, regs->regs[ra]);
  850. }
  851. /* Done! Return to the exception handler. */
  852. return;
  853. }
  854. if ((align_ctl == 0) || unexpected) {
  855. siginfo_t info = {
  856. .si_signo = SIGBUS,
  857. .si_code = BUS_ADRALN,
  858. .si_addr = (unsigned char __user *)0
  859. };
  860. if (unaligned_printk)
  861. pr_info("Unalign bundle: unexp @%llx, %llx\n",
  862. (unsigned long long)regs->pc,
  863. (unsigned long long)bundle);
  864. if (ra < 56) {
  865. unsigned long uaa = (unsigned long)regs->regs[ra];
  866. /* Set bus Address. */
  867. info.si_addr = (unsigned char __user *)uaa;
  868. }
  869. unaligned_fixup_count++;
  870. trace_unhandled_signal("unaligned fixup trap", regs,
  871. (unsigned long)info.si_addr, SIGBUS);
  872. force_sig_info(info.si_signo, &info, current);
  873. return;
  874. }
  875. #ifdef __LITTLE_ENDIAN
  876. #define UA_FIXUP_ADDR_DELTA 1
  877. #define UA_FIXUP_BFEXT_START(_B_) 0
  878. #define UA_FIXUP_BFEXT_END(_B_) (8 * (_B_) - 1)
  879. #else /* __BIG_ENDIAN */
  880. #define UA_FIXUP_ADDR_DELTA -1
  881. #define UA_FIXUP_BFEXT_START(_B_) (64 - 8 * (_B_))
  882. #define UA_FIXUP_BFEXT_END(_B_) 63
  883. #endif /* __LITTLE_ENDIAN */
  884. if ((ra != rb) && (rd != TREG_SP) && !alias &&
  885. !y1_br && !y1_lr && !x1_add) {
  886. /*
  887. * Simple case: ra != rb and no register alias found,
  888. * and no branch or link. This will be the majority.
  889. * We can do a little better for simplae case than the
  890. * generic scheme below.
  891. */
  892. if (!load_n_store) {
  893. /*
  894. * Simple store: ra != rb, no need for scratch register.
  895. * Just store and rotate to right bytewise.
  896. */
  897. #ifdef __BIG_ENDIAN
  898. frag.insn[n++] =
  899. jit_x0_addi(ra, ra, load_store_size - 1) |
  900. jit_x1_fnop();
  901. #endif /* __BIG_ENDIAN */
  902. for (k = 0; k < load_store_size; k++) {
  903. /* Store a byte. */
  904. frag.insn[n++] =
  905. jit_x0_rotli(rb, rb, 56) |
  906. jit_x1_st1_add(ra, rb,
  907. UA_FIXUP_ADDR_DELTA);
  908. }
  909. #ifdef __BIG_ENDIAN
  910. frag.insn[n] = jit_x1_addi(ra, ra, 1);
  911. #else
  912. frag.insn[n] = jit_x1_addi(ra, ra,
  913. -1 * load_store_size);
  914. #endif /* __LITTLE_ENDIAN */
  915. if (load_store_size == 8) {
  916. frag.insn[n] |= jit_x0_fnop();
  917. } else if (load_store_size == 4) {
  918. frag.insn[n] |= jit_x0_rotli(rb, rb, 32);
  919. } else { /* = 2 */
  920. frag.insn[n] |= jit_x0_rotli(rb, rb, 16);
  921. }
  922. n++;
  923. if (bundle_2_enable)
  924. frag.insn[n++] = bundle_2;
  925. frag.insn[n++] = jit_x0_fnop() | jit_x1_iret();
  926. } else {
  927. if (rd == ra) {
  928. /* Use two clobber registers: clob1/2. */
  929. frag.insn[n++] =
  930. jit_x0_addi(TREG_SP, TREG_SP, -16) |
  931. jit_x1_fnop();
  932. frag.insn[n++] =
  933. jit_x0_addi(clob1, ra, 7) |
  934. jit_x1_st_add(TREG_SP, clob1, -8);
  935. frag.insn[n++] =
  936. jit_x0_addi(clob2, ra, 0) |
  937. jit_x1_st(TREG_SP, clob2);
  938. frag.insn[n++] =
  939. jit_x0_fnop() |
  940. jit_x1_ldna(rd, ra);
  941. frag.insn[n++] =
  942. jit_x0_fnop() |
  943. jit_x1_ldna(clob1, clob1);
  944. /*
  945. * Note: we must make sure that rd must not
  946. * be sp. Recover clob1/2 from stack.
  947. */
  948. frag.insn[n++] =
  949. jit_x0_dblalign(rd, clob1, clob2) |
  950. jit_x1_ld_add(clob2, TREG_SP, 8);
  951. frag.insn[n++] =
  952. jit_x0_fnop() |
  953. jit_x1_ld_add(clob1, TREG_SP, 16);
  954. } else {
  955. /* Use one clobber register: clob1 only. */
  956. frag.insn[n++] =
  957. jit_x0_addi(TREG_SP, TREG_SP, -16) |
  958. jit_x1_fnop();
  959. frag.insn[n++] =
  960. jit_x0_addi(clob1, ra, 7) |
  961. jit_x1_st(TREG_SP, clob1);
  962. frag.insn[n++] =
  963. jit_x0_fnop() |
  964. jit_x1_ldna(rd, ra);
  965. frag.insn[n++] =
  966. jit_x0_fnop() |
  967. jit_x1_ldna(clob1, clob1);
  968. /*
  969. * Note: we must make sure that rd must not
  970. * be sp. Recover clob1 from stack.
  971. */
  972. frag.insn[n++] =
  973. jit_x0_dblalign(rd, clob1, ra) |
  974. jit_x1_ld_add(clob1, TREG_SP, 16);
  975. }
  976. if (bundle_2_enable)
  977. frag.insn[n++] = bundle_2;
  978. /*
  979. * For non 8-byte load, extract corresponding bytes and
  980. * signed extension.
  981. */
  982. if (load_store_size == 4) {
  983. if (load_store_signed)
  984. frag.insn[n++] =
  985. jit_x0_bfexts(
  986. rd, rd,
  987. UA_FIXUP_BFEXT_START(4),
  988. UA_FIXUP_BFEXT_END(4)) |
  989. jit_x1_fnop();
  990. else
  991. frag.insn[n++] =
  992. jit_x0_bfextu(
  993. rd, rd,
  994. UA_FIXUP_BFEXT_START(4),
  995. UA_FIXUP_BFEXT_END(4)) |
  996. jit_x1_fnop();
  997. } else if (load_store_size == 2) {
  998. if (load_store_signed)
  999. frag.insn[n++] =
  1000. jit_x0_bfexts(
  1001. rd, rd,
  1002. UA_FIXUP_BFEXT_START(2),
  1003. UA_FIXUP_BFEXT_END(2)) |
  1004. jit_x1_fnop();
  1005. else
  1006. frag.insn[n++] =
  1007. jit_x0_bfextu(
  1008. rd, rd,
  1009. UA_FIXUP_BFEXT_START(2),
  1010. UA_FIXUP_BFEXT_END(2)) |
  1011. jit_x1_fnop();
  1012. }
  1013. frag.insn[n++] =
  1014. jit_x0_fnop() |
  1015. jit_x1_iret();
  1016. }
  1017. } else if (!load_n_store) {
  1018. /*
  1019. * Generic memory store cases: use 3 clobber registers.
  1020. *
  1021. * Alloc space for saveing clob2,1,3 on user's stack.
  1022. * register clob3 points to where clob2 saved, followed by
  1023. * clob1 and 3 from high to low memory.
  1024. */
  1025. frag.insn[n++] =
  1026. jit_x0_addi(TREG_SP, TREG_SP, -32) |
  1027. jit_x1_fnop();
  1028. frag.insn[n++] =
  1029. jit_x0_addi(clob3, TREG_SP, 16) |
  1030. jit_x1_st_add(TREG_SP, clob3, 8);
  1031. #ifdef __LITTLE_ENDIAN
  1032. frag.insn[n++] =
  1033. jit_x0_addi(clob1, ra, 0) |
  1034. jit_x1_st_add(TREG_SP, clob1, 8);
  1035. #else
  1036. frag.insn[n++] =
  1037. jit_x0_addi(clob1, ra, load_store_size - 1) |
  1038. jit_x1_st_add(TREG_SP, clob1, 8);
  1039. #endif
  1040. if (load_store_size == 8) {
  1041. /*
  1042. * We save one byte a time, not for fast, but compact
  1043. * code. After each store, data source register shift
  1044. * right one byte. unchanged after 8 stores.
  1045. */
  1046. frag.insn[n++] =
  1047. jit_x0_addi(clob2, TREG_ZERO, 7) |
  1048. jit_x1_st_add(TREG_SP, clob2, 16);
  1049. frag.insn[n++] =
  1050. jit_x0_rotli(rb, rb, 56) |
  1051. jit_x1_st1_add(clob1, rb, UA_FIXUP_ADDR_DELTA);
  1052. frag.insn[n++] =
  1053. jit_x0_addi(clob2, clob2, -1) |
  1054. jit_x1_bnezt(clob2, -1);
  1055. frag.insn[n++] =
  1056. jit_x0_fnop() |
  1057. jit_x1_addi(clob2, y1_br_reg, 0);
  1058. } else if (load_store_size == 4) {
  1059. frag.insn[n++] =
  1060. jit_x0_addi(clob2, TREG_ZERO, 3) |
  1061. jit_x1_st_add(TREG_SP, clob2, 16);
  1062. frag.insn[n++] =
  1063. jit_x0_rotli(rb, rb, 56) |
  1064. jit_x1_st1_add(clob1, rb, UA_FIXUP_ADDR_DELTA);
  1065. frag.insn[n++] =
  1066. jit_x0_addi(clob2, clob2, -1) |
  1067. jit_x1_bnezt(clob2, -1);
  1068. /*
  1069. * same as 8-byte case, but need shift another 4
  1070. * byte to recover rb for 4-byte store.
  1071. */
  1072. frag.insn[n++] = jit_x0_rotli(rb, rb, 32) |
  1073. jit_x1_addi(clob2, y1_br_reg, 0);
  1074. } else { /* =2 */
  1075. frag.insn[n++] =
  1076. jit_x0_addi(clob2, rb, 0) |
  1077. jit_x1_st_add(TREG_SP, clob2, 16);
  1078. for (k = 0; k < 2; k++) {
  1079. frag.insn[n++] =
  1080. jit_x0_shrui(rb, rb, 8) |
  1081. jit_x1_st1_add(clob1, rb,
  1082. UA_FIXUP_ADDR_DELTA);
  1083. }
  1084. frag.insn[n++] =
  1085. jit_x0_addi(rb, clob2, 0) |
  1086. jit_x1_addi(clob2, y1_br_reg, 0);
  1087. }
  1088. if (bundle_2_enable)
  1089. frag.insn[n++] = bundle_2;
  1090. if (y1_lr) {
  1091. frag.insn[n++] =
  1092. jit_x0_fnop() |
  1093. jit_x1_mfspr(y1_lr_reg,
  1094. SPR_EX_CONTEXT_0_0);
  1095. }
  1096. if (y1_br) {
  1097. frag.insn[n++] =
  1098. jit_x0_fnop() |
  1099. jit_x1_mtspr(SPR_EX_CONTEXT_0_0,
  1100. clob2);
  1101. }
  1102. if (x1_add) {
  1103. frag.insn[n++] =
  1104. jit_x0_addi(ra, ra, x1_add_imm8) |
  1105. jit_x1_ld_add(clob2, clob3, -8);
  1106. } else {
  1107. frag.insn[n++] =
  1108. jit_x0_fnop() |
  1109. jit_x1_ld_add(clob2, clob3, -8);
  1110. }
  1111. frag.insn[n++] =
  1112. jit_x0_fnop() |
  1113. jit_x1_ld_add(clob1, clob3, -8);
  1114. frag.insn[n++] = jit_x0_fnop() | jit_x1_ld(clob3, clob3);
  1115. frag.insn[n++] = jit_x0_fnop() | jit_x1_iret();
  1116. } else {
  1117. /*
  1118. * Generic memory load cases.
  1119. *
  1120. * Alloc space for saveing clob1,2,3 on user's stack.
  1121. * register clob3 points to where clob1 saved, followed
  1122. * by clob2 and 3 from high to low memory.
  1123. */
  1124. frag.insn[n++] =
  1125. jit_x0_addi(TREG_SP, TREG_SP, -32) |
  1126. jit_x1_fnop();
  1127. frag.insn[n++] =
  1128. jit_x0_addi(clob3, TREG_SP, 16) |
  1129. jit_x1_st_add(TREG_SP, clob3, 8);
  1130. frag.insn[n++] =
  1131. jit_x0_addi(clob2, ra, 0) |
  1132. jit_x1_st_add(TREG_SP, clob2, 8);
  1133. if (y1_br) {
  1134. frag.insn[n++] =
  1135. jit_x0_addi(clob1, y1_br_reg, 0) |
  1136. jit_x1_st_add(TREG_SP, clob1, 16);
  1137. } else {
  1138. frag.insn[n++] =
  1139. jit_x0_fnop() |
  1140. jit_x1_st_add(TREG_SP, clob1, 16);
  1141. }
  1142. if (bundle_2_enable)
  1143. frag.insn[n++] = bundle_2;
  1144. if (y1_lr) {
  1145. frag.insn[n++] =
  1146. jit_x0_fnop() |
  1147. jit_x1_mfspr(y1_lr_reg,
  1148. SPR_EX_CONTEXT_0_0);
  1149. }
  1150. if (y1_br) {
  1151. frag.insn[n++] =
  1152. jit_x0_fnop() |
  1153. jit_x1_mtspr(SPR_EX_CONTEXT_0_0,
  1154. clob1);
  1155. }
  1156. frag.insn[n++] =
  1157. jit_x0_addi(clob1, clob2, 7) |
  1158. jit_x1_ldna(rd, clob2);
  1159. frag.insn[n++] =
  1160. jit_x0_fnop() |
  1161. jit_x1_ldna(clob1, clob1);
  1162. frag.insn[n++] =
  1163. jit_x0_dblalign(rd, clob1, clob2) |
  1164. jit_x1_ld_add(clob1, clob3, -8);
  1165. if (x1_add) {
  1166. frag.insn[n++] =
  1167. jit_x0_addi(ra, ra, x1_add_imm8) |
  1168. jit_x1_ld_add(clob2, clob3, -8);
  1169. } else {
  1170. frag.insn[n++] =
  1171. jit_x0_fnop() |
  1172. jit_x1_ld_add(clob2, clob3, -8);
  1173. }
  1174. frag.insn[n++] =
  1175. jit_x0_fnop() |
  1176. jit_x1_ld(clob3, clob3);
  1177. if (load_store_size == 4) {
  1178. if (load_store_signed)
  1179. frag.insn[n++] =
  1180. jit_x0_bfexts(
  1181. rd, rd,
  1182. UA_FIXUP_BFEXT_START(4),
  1183. UA_FIXUP_BFEXT_END(4)) |
  1184. jit_x1_fnop();
  1185. else
  1186. frag.insn[n++] =
  1187. jit_x0_bfextu(
  1188. rd, rd,
  1189. UA_FIXUP_BFEXT_START(4),
  1190. UA_FIXUP_BFEXT_END(4)) |
  1191. jit_x1_fnop();
  1192. } else if (load_store_size == 2) {
  1193. if (load_store_signed)
  1194. frag.insn[n++] =
  1195. jit_x0_bfexts(
  1196. rd, rd,
  1197. UA_FIXUP_BFEXT_START(2),
  1198. UA_FIXUP_BFEXT_END(2)) |
  1199. jit_x1_fnop();
  1200. else
  1201. frag.insn[n++] =
  1202. jit_x0_bfextu(
  1203. rd, rd,
  1204. UA_FIXUP_BFEXT_START(2),
  1205. UA_FIXUP_BFEXT_END(2)) |
  1206. jit_x1_fnop();
  1207. }
  1208. frag.insn[n++] = jit_x0_fnop() | jit_x1_iret();
  1209. }
  1210. /* Max JIT bundle count is 14. */
  1211. WARN_ON(n > 14);
  1212. if (!unexpected) {
  1213. int status = 0;
  1214. int idx = (regs->pc >> 3) &
  1215. ((1ULL << (PAGE_SHIFT - UNALIGN_JIT_SHIFT)) - 1);
  1216. frag.pc = regs->pc;
  1217. frag.bundle = bundle;
  1218. if (unaligned_printk) {
  1219. pr_info("%s/%d, Unalign fixup: pc=%lx bundle=%lx %d %d %d %d %d %d %d %d\n",
  1220. current->comm, current->pid,
  1221. (unsigned long)frag.pc,
  1222. (unsigned long)frag.bundle,
  1223. (int)alias, (int)rd, (int)ra,
  1224. (int)rb, (int)bundle_2_enable,
  1225. (int)y1_lr, (int)y1_br, (int)x1_add);
  1226. for (k = 0; k < n; k += 2)
  1227. pr_info("[%d] %016llx %016llx\n",
  1228. k, (unsigned long long)frag.insn[k],
  1229. (unsigned long long)frag.insn[k+1]);
  1230. }
  1231. /* Swap bundle byte order for big endian sys. */
  1232. #ifdef __BIG_ENDIAN
  1233. frag.bundle = GX_INSN_BSWAP(frag.bundle);
  1234. for (k = 0; k < n; k++)
  1235. frag.insn[k] = GX_INSN_BSWAP(frag.insn[k]);
  1236. #endif /* __BIG_ENDIAN */
  1237. status = copy_to_user((void __user *)&jit_code_area[idx],
  1238. &frag, sizeof(frag));
  1239. if (status) {
  1240. /* Fail to copy JIT into user land. send SIGSEGV. */
  1241. siginfo_t info = {
  1242. .si_signo = SIGSEGV,
  1243. .si_code = SEGV_MAPERR,
  1244. .si_addr = (void __user *)&jit_code_area[idx]
  1245. };
  1246. pr_warn("Unalign fixup: pid=%d %s jit_code_area=%llx\n",
  1247. current->pid, current->comm,
  1248. (unsigned long long)&jit_code_area[idx]);
  1249. trace_unhandled_signal("segfault in unalign fixup",
  1250. regs,
  1251. (unsigned long)info.si_addr,
  1252. SIGSEGV);
  1253. force_sig_info(info.si_signo, &info, current);
  1254. return;
  1255. }
  1256. /* Do a cheaper increment, not accurate. */
  1257. unaligned_fixup_count++;
  1258. __flush_icache_range((unsigned long)&jit_code_area[idx],
  1259. (unsigned long)&jit_code_area[idx] +
  1260. sizeof(frag));
  1261. /* Setup SPR_EX_CONTEXT_0_0/1 for returning to user program.*/
  1262. __insn_mtspr(SPR_EX_CONTEXT_0_0, regs->pc + 8);
  1263. __insn_mtspr(SPR_EX_CONTEXT_0_1, PL_ICS_EX1(USER_PL, 0));
  1264. /* Modify pc at the start of new JIT. */
  1265. regs->pc = (unsigned long)&jit_code_area[idx].insn[0];
  1266. /* Set ICS in SPR_EX_CONTEXT_K_1. */
  1267. regs->ex1 = PL_ICS_EX1(USER_PL, 1);
  1268. }
  1269. }
  1270. /*
  1271. * C function to generate unalign data JIT. Called from unalign data
  1272. * interrupt handler.
  1273. *
  1274. * First check if unalign fix is disabled or exception did not not come from
  1275. * user space or sp register points to unalign address, if true, generate a
  1276. * SIGBUS. Then map a page into user space as JIT area if it is not mapped
  1277. * yet. Genenerate JIT code by calling jit_bundle_gen(). After that return
  1278. * back to exception handler.
  1279. *
  1280. * The exception handler will "iret" to new generated JIT code after
  1281. * restoring caller saved registers. In theory, the JIT code will perform
  1282. * another "iret" to resume user's program.
  1283. */
  1284. void do_unaligned(struct pt_regs *regs, int vecnum)
  1285. {
  1286. tilegx_bundle_bits __user *pc;
  1287. tilegx_bundle_bits bundle;
  1288. struct thread_info *info = current_thread_info();
  1289. int align_ctl;
  1290. /* Checks the per-process unaligned JIT flags */
  1291. align_ctl = unaligned_fixup;
  1292. switch (task_thread_info(current)->align_ctl) {
  1293. case PR_UNALIGN_NOPRINT:
  1294. align_ctl = 1;
  1295. break;
  1296. case PR_UNALIGN_SIGBUS:
  1297. align_ctl = 0;
  1298. break;
  1299. }
  1300. /* Enable iterrupt in order to access user land. */
  1301. local_irq_enable();
  1302. /*
  1303. * The fault came from kernel space. Two choices:
  1304. * (a) unaligned_fixup < 1, we will first call get/put_user fixup
  1305. * to return -EFAULT. If no fixup, simply panic the kernel.
  1306. * (b) unaligned_fixup >=1, we will try to fix the unaligned access
  1307. * if it was triggered by get_user/put_user() macros. Panic the
  1308. * kernel if it is not fixable.
  1309. */
  1310. if (EX1_PL(regs->ex1) != USER_PL) {
  1311. if (align_ctl < 1) {
  1312. unaligned_fixup_count++;
  1313. /* If exception came from kernel, try fix it up. */
  1314. if (fixup_exception(regs)) {
  1315. if (unaligned_printk)
  1316. pr_info("Unalign fixup: %d %llx @%llx\n",
  1317. (int)unaligned_fixup,
  1318. (unsigned long long)regs->ex1,
  1319. (unsigned long long)regs->pc);
  1320. } else {
  1321. /* Not fixable. Go panic. */
  1322. panic("Unalign exception in Kernel. pc=%lx",
  1323. regs->pc);
  1324. }
  1325. } else {
  1326. /*
  1327. * Try to fix the exception. If we can't, panic the
  1328. * kernel.
  1329. */
  1330. bundle = GX_INSN_BSWAP(
  1331. *((tilegx_bundle_bits *)(regs->pc)));
  1332. jit_bundle_gen(regs, bundle, align_ctl);
  1333. }
  1334. return;
  1335. }
  1336. /*
  1337. * Fault came from user with ICS or stack is not aligned.
  1338. * If so, we will trigger SIGBUS.
  1339. */
  1340. if ((regs->sp & 0x7) || (regs->ex1) || (align_ctl < 0)) {
  1341. siginfo_t info = {
  1342. .si_signo = SIGBUS,
  1343. .si_code = BUS_ADRALN,
  1344. .si_addr = (unsigned char __user *)0
  1345. };
  1346. if (unaligned_printk)
  1347. pr_info("Unalign fixup: %d %llx @%llx\n",
  1348. (int)unaligned_fixup,
  1349. (unsigned long long)regs->ex1,
  1350. (unsigned long long)regs->pc);
  1351. unaligned_fixup_count++;
  1352. trace_unhandled_signal("unaligned fixup trap", regs, 0, SIGBUS);
  1353. force_sig_info(info.si_signo, &info, current);
  1354. return;
  1355. }
  1356. /* Read the bundle caused the exception! */
  1357. pc = (tilegx_bundle_bits __user *)(regs->pc);
  1358. if (get_user(bundle, pc) != 0) {
  1359. /* Probably never be here since pc is valid user address.*/
  1360. siginfo_t info = {
  1361. .si_signo = SIGSEGV,
  1362. .si_code = SEGV_MAPERR,
  1363. .si_addr = (void __user *)pc
  1364. };
  1365. pr_err("Couldn't read instruction at %p trying to step\n", pc);
  1366. trace_unhandled_signal("segfault in unalign fixup", regs,
  1367. (unsigned long)info.si_addr, SIGSEGV);
  1368. force_sig_info(info.si_signo, &info, current);
  1369. return;
  1370. }
  1371. if (!info->unalign_jit_base) {
  1372. void __user *user_page;
  1373. /*
  1374. * Allocate a page in userland.
  1375. * For 64-bit processes we try to place the mapping far
  1376. * from anything else that might be going on (specifically
  1377. * 64 GB below the top of the user address space). If it
  1378. * happens not to be possible to put it there, it's OK;
  1379. * the kernel will choose another location and we'll
  1380. * remember it for later.
  1381. */
  1382. if (is_compat_task())
  1383. user_page = NULL;
  1384. else
  1385. user_page = (void __user *)(TASK_SIZE - (1UL << 36)) +
  1386. (current->pid << PAGE_SHIFT);
  1387. user_page = (void __user *) vm_mmap(NULL,
  1388. (unsigned long)user_page,
  1389. PAGE_SIZE,
  1390. PROT_EXEC | PROT_READ |
  1391. PROT_WRITE,
  1392. #ifdef CONFIG_HOMECACHE
  1393. MAP_CACHE_HOME_TASK |
  1394. #endif
  1395. MAP_PRIVATE |
  1396. MAP_ANONYMOUS,
  1397. 0);
  1398. if (IS_ERR((void __force *)user_page)) {
  1399. pr_err("Out of kernel pages trying do_mmap\n");
  1400. return;
  1401. }
  1402. /* Save the address in the thread_info struct */
  1403. info->unalign_jit_base = user_page;
  1404. if (unaligned_printk)
  1405. pr_info("Unalign bundle: %d:%d, allocate page @%llx\n",
  1406. raw_smp_processor_id(), current->pid,
  1407. (unsigned long long)user_page);
  1408. }
  1409. /* Generate unalign JIT */
  1410. jit_bundle_gen(regs, GX_INSN_BSWAP(bundle), align_ctl);
  1411. }
  1412. #endif /* __tilegx__ */