recov_avx512.c 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389
  1. /*
  2. * Copyright (C) 2016 Intel Corporation
  3. *
  4. * Author: Gayatri Kammela <gayatri.kammela@intel.com>
  5. * Author: Megha Dey <megha.dey@linux.intel.com>
  6. *
  7. * This program is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU General Public License
  9. * as published by the Free Software Foundation; version 2
  10. * of the License.
  11. *
  12. */
  13. #ifdef CONFIG_AS_AVX512
  14. #include <linux/raid/pq.h>
  15. #include "x86.h"
  16. static int raid6_has_avx512(void)
  17. {
  18. return boot_cpu_has(X86_FEATURE_AVX2) &&
  19. boot_cpu_has(X86_FEATURE_AVX) &&
  20. boot_cpu_has(X86_FEATURE_AVX512F) &&
  21. boot_cpu_has(X86_FEATURE_AVX512BW) &&
  22. boot_cpu_has(X86_FEATURE_AVX512VL) &&
  23. boot_cpu_has(X86_FEATURE_AVX512DQ);
  24. }
  25. static void raid6_2data_recov_avx512(int disks, size_t bytes, int faila,
  26. int failb, void **ptrs)
  27. {
  28. u8 *p, *q, *dp, *dq;
  29. const u8 *pbmul; /* P multiplier table for B data */
  30. const u8 *qmul; /* Q multiplier table (for both) */
  31. const u8 x0f = 0x0f;
  32. p = (u8 *)ptrs[disks-2];
  33. q = (u8 *)ptrs[disks-1];
  34. /*
  35. * Compute syndrome with zero for the missing data pages
  36. * Use the dead data pages as temporary storage for
  37. * delta p and delta q
  38. */
  39. dp = (u8 *)ptrs[faila];
  40. ptrs[faila] = (void *)raid6_empty_zero_page;
  41. ptrs[disks-2] = dp;
  42. dq = (u8 *)ptrs[failb];
  43. ptrs[failb] = (void *)raid6_empty_zero_page;
  44. ptrs[disks-1] = dq;
  45. raid6_call.gen_syndrome(disks, bytes, ptrs);
  46. /* Restore pointer table */
  47. ptrs[faila] = dp;
  48. ptrs[failb] = dq;
  49. ptrs[disks-2] = p;
  50. ptrs[disks-1] = q;
  51. /* Now, pick the proper data tables */
  52. pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]];
  53. qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^
  54. raid6_gfexp[failb]]];
  55. kernel_fpu_begin();
  56. /* zmm0 = x0f[16] */
  57. asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f));
  58. while (bytes) {
  59. #ifdef CONFIG_X86_64
  60. asm volatile("vmovdqa64 %0, %%zmm1\n\t"
  61. "vmovdqa64 %1, %%zmm9\n\t"
  62. "vmovdqa64 %2, %%zmm0\n\t"
  63. "vmovdqa64 %3, %%zmm8\n\t"
  64. "vpxorq %4, %%zmm1, %%zmm1\n\t"
  65. "vpxorq %5, %%zmm9, %%zmm9\n\t"
  66. "vpxorq %6, %%zmm0, %%zmm0\n\t"
  67. "vpxorq %7, %%zmm8, %%zmm8"
  68. :
  69. : "m" (q[0]), "m" (q[64]), "m" (p[0]),
  70. "m" (p[64]), "m" (dq[0]), "m" (dq[64]),
  71. "m" (dp[0]), "m" (dp[64]));
  72. /*
  73. * 1 = dq[0] ^ q[0]
  74. * 9 = dq[64] ^ q[64]
  75. * 0 = dp[0] ^ p[0]
  76. * 8 = dp[64] ^ p[64]
  77. */
  78. asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
  79. "vbroadcasti64x2 %1, %%zmm5"
  80. :
  81. : "m" (qmul[0]), "m" (qmul[16]));
  82. asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t"
  83. "vpsraw $4, %%zmm9, %%zmm12\n\t"
  84. "vpandq %%zmm7, %%zmm1, %%zmm1\n\t"
  85. "vpandq %%zmm7, %%zmm9, %%zmm9\n\t"
  86. "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
  87. "vpandq %%zmm7, %%zmm12, %%zmm12\n\t"
  88. "vpshufb %%zmm9, %%zmm4, %%zmm14\n\t"
  89. "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t"
  90. "vpshufb %%zmm12, %%zmm5, %%zmm15\n\t"
  91. "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t"
  92. "vpxorq %%zmm14, %%zmm15, %%zmm15\n\t"
  93. "vpxorq %%zmm4, %%zmm5, %%zmm5"
  94. :
  95. : );
  96. /*
  97. * 5 = qx[0]
  98. * 15 = qx[64]
  99. */
  100. asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
  101. "vbroadcasti64x2 %1, %%zmm1\n\t"
  102. "vpsraw $4, %%zmm0, %%zmm2\n\t"
  103. "vpsraw $4, %%zmm8, %%zmm6\n\t"
  104. "vpandq %%zmm7, %%zmm0, %%zmm3\n\t"
  105. "vpandq %%zmm7, %%zmm8, %%zmm14\n\t"
  106. "vpandq %%zmm7, %%zmm2, %%zmm2\n\t"
  107. "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
  108. "vpshufb %%zmm14, %%zmm4, %%zmm12\n\t"
  109. "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t"
  110. "vpshufb %%zmm6, %%zmm1, %%zmm13\n\t"
  111. "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t"
  112. "vpxorq %%zmm4, %%zmm1, %%zmm1\n\t"
  113. "vpxorq %%zmm12, %%zmm13, %%zmm13"
  114. :
  115. : "m" (pbmul[0]), "m" (pbmul[16]));
  116. /*
  117. * 1 = pbmul[px[0]]
  118. * 13 = pbmul[px[64]]
  119. */
  120. asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t"
  121. "vpxorq %%zmm15, %%zmm13, %%zmm13"
  122. :
  123. : );
  124. /*
  125. * 1 = db = DQ
  126. * 13 = db[64] = DQ[64]
  127. */
  128. asm volatile("vmovdqa64 %%zmm1, %0\n\t"
  129. "vmovdqa64 %%zmm13,%1\n\t"
  130. "vpxorq %%zmm1, %%zmm0, %%zmm0\n\t"
  131. "vpxorq %%zmm13, %%zmm8, %%zmm8"
  132. :
  133. : "m" (dq[0]), "m" (dq[64]));
  134. asm volatile("vmovdqa64 %%zmm0, %0\n\t"
  135. "vmovdqa64 %%zmm8, %1"
  136. :
  137. : "m" (dp[0]), "m" (dp[64]));
  138. bytes -= 128;
  139. p += 128;
  140. q += 128;
  141. dp += 128;
  142. dq += 128;
  143. #else
  144. asm volatile("vmovdqa64 %0, %%zmm1\n\t"
  145. "vmovdqa64 %1, %%zmm0\n\t"
  146. "vpxorq %2, %%zmm1, %%zmm1\n\t"
  147. "vpxorq %3, %%zmm0, %%zmm0"
  148. :
  149. : "m" (*q), "m" (*p), "m"(*dq), "m" (*dp));
  150. /* 1 = dq ^ q; 0 = dp ^ p */
  151. asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
  152. "vbroadcasti64x2 %1, %%zmm5"
  153. :
  154. : "m" (qmul[0]), "m" (qmul[16]));
  155. /*
  156. * 1 = dq ^ q
  157. * 3 = dq ^ p >> 4
  158. */
  159. asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t"
  160. "vpandq %%zmm7, %%zmm1, %%zmm1\n\t"
  161. "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
  162. "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t"
  163. "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t"
  164. "vpxorq %%zmm4, %%zmm5, %%zmm5"
  165. :
  166. : );
  167. /* 5 = qx */
  168. asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
  169. "vbroadcasti64x2 %1, %%zmm1"
  170. :
  171. : "m" (pbmul[0]), "m" (pbmul[16]));
  172. asm volatile("vpsraw $4, %%zmm0, %%zmm2\n\t"
  173. "vpandq %%zmm7, %%zmm0, %%zmm3\n\t"
  174. "vpandq %%zmm7, %%zmm2, %%zmm2\n\t"
  175. "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t"
  176. "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t"
  177. "vpxorq %%zmm4, %%zmm1, %%zmm1"
  178. :
  179. : );
  180. /* 1 = pbmul[px] */
  181. asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t"
  182. /* 1 = db = DQ */
  183. "vmovdqa64 %%zmm1, %0\n\t"
  184. :
  185. : "m" (dq[0]));
  186. asm volatile("vpxorq %%zmm1, %%zmm0, %%zmm0\n\t"
  187. "vmovdqa64 %%zmm0, %0"
  188. :
  189. : "m" (dp[0]));
  190. bytes -= 64;
  191. p += 64;
  192. q += 64;
  193. dp += 64;
  194. dq += 64;
  195. #endif
  196. }
  197. kernel_fpu_end();
  198. }
  199. static void raid6_datap_recov_avx512(int disks, size_t bytes, int faila,
  200. void **ptrs)
  201. {
  202. u8 *p, *q, *dq;
  203. const u8 *qmul; /* Q multiplier table */
  204. const u8 x0f = 0x0f;
  205. p = (u8 *)ptrs[disks-2];
  206. q = (u8 *)ptrs[disks-1];
  207. /*
  208. * Compute syndrome with zero for the missing data page
  209. * Use the dead data page as temporary storage for delta q
  210. */
  211. dq = (u8 *)ptrs[faila];
  212. ptrs[faila] = (void *)raid6_empty_zero_page;
  213. ptrs[disks-1] = dq;
  214. raid6_call.gen_syndrome(disks, bytes, ptrs);
  215. /* Restore pointer table */
  216. ptrs[faila] = dq;
  217. ptrs[disks-1] = q;
  218. /* Now, pick the proper data tables */
  219. qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
  220. kernel_fpu_begin();
  221. asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f));
  222. while (bytes) {
  223. #ifdef CONFIG_X86_64
  224. asm volatile("vmovdqa64 %0, %%zmm3\n\t"
  225. "vmovdqa64 %1, %%zmm8\n\t"
  226. "vpxorq %2, %%zmm3, %%zmm3\n\t"
  227. "vpxorq %3, %%zmm8, %%zmm8"
  228. :
  229. : "m" (dq[0]), "m" (dq[64]), "m" (q[0]),
  230. "m" (q[64]));
  231. /*
  232. * 3 = q[0] ^ dq[0]
  233. * 8 = q[64] ^ dq[64]
  234. */
  235. asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t"
  236. "vmovapd %%zmm0, %%zmm13\n\t"
  237. "vbroadcasti64x2 %1, %%zmm1\n\t"
  238. "vmovapd %%zmm1, %%zmm14"
  239. :
  240. : "m" (qmul[0]), "m" (qmul[16]));
  241. asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t"
  242. "vpsraw $4, %%zmm8, %%zmm12\n\t"
  243. "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
  244. "vpandq %%zmm7, %%zmm8, %%zmm8\n\t"
  245. "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
  246. "vpandq %%zmm7, %%zmm12, %%zmm12\n\t"
  247. "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t"
  248. "vpshufb %%zmm8, %%zmm13, %%zmm13\n\t"
  249. "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t"
  250. "vpshufb %%zmm12, %%zmm14, %%zmm14\n\t"
  251. "vpxorq %%zmm0, %%zmm1, %%zmm1\n\t"
  252. "vpxorq %%zmm13, %%zmm14, %%zmm14"
  253. :
  254. : );
  255. /*
  256. * 1 = qmul[q[0] ^ dq[0]]
  257. * 14 = qmul[q[64] ^ dq[64]]
  258. */
  259. asm volatile("vmovdqa64 %0, %%zmm2\n\t"
  260. "vmovdqa64 %1, %%zmm12\n\t"
  261. "vpxorq %%zmm1, %%zmm2, %%zmm2\n\t"
  262. "vpxorq %%zmm14, %%zmm12, %%zmm12"
  263. :
  264. : "m" (p[0]), "m" (p[64]));
  265. /*
  266. * 2 = p[0] ^ qmul[q[0] ^ dq[0]]
  267. * 12 = p[64] ^ qmul[q[64] ^ dq[64]]
  268. */
  269. asm volatile("vmovdqa64 %%zmm1, %0\n\t"
  270. "vmovdqa64 %%zmm14, %1\n\t"
  271. "vmovdqa64 %%zmm2, %2\n\t"
  272. "vmovdqa64 %%zmm12,%3"
  273. :
  274. : "m" (dq[0]), "m" (dq[64]), "m" (p[0]),
  275. "m" (p[64]));
  276. bytes -= 128;
  277. p += 128;
  278. q += 128;
  279. dq += 128;
  280. #else
  281. asm volatile("vmovdqa64 %0, %%zmm3\n\t"
  282. "vpxorq %1, %%zmm3, %%zmm3"
  283. :
  284. : "m" (dq[0]), "m" (q[0]));
  285. /* 3 = q ^ dq */
  286. asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t"
  287. "vbroadcasti64x2 %1, %%zmm1"
  288. :
  289. : "m" (qmul[0]), "m" (qmul[16]));
  290. asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t"
  291. "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
  292. "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
  293. "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t"
  294. "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t"
  295. "vpxorq %%zmm0, %%zmm1, %%zmm1"
  296. :
  297. : );
  298. /* 1 = qmul[q ^ dq] */
  299. asm volatile("vmovdqa64 %0, %%zmm2\n\t"
  300. "vpxorq %%zmm1, %%zmm2, %%zmm2"
  301. :
  302. : "m" (p[0]));
  303. /* 2 = p ^ qmul[q ^ dq] */
  304. asm volatile("vmovdqa64 %%zmm1, %0\n\t"
  305. "vmovdqa64 %%zmm2, %1"
  306. :
  307. : "m" (dq[0]), "m" (p[0]));
  308. bytes -= 64;
  309. p += 64;
  310. q += 64;
  311. dq += 64;
  312. #endif
  313. }
  314. kernel_fpu_end();
  315. }
  316. const struct raid6_recov_calls raid6_recov_avx512 = {
  317. .data2 = raid6_2data_recov_avx512,
  318. .datap = raid6_datap_recov_avx512,
  319. .valid = raid6_has_avx512,
  320. #ifdef CONFIG_X86_64
  321. .name = "avx512x2",
  322. #else
  323. .name = "avx512x1",
  324. #endif
  325. .priority = 3,
  326. };
  327. #else
  328. #warning "your version of binutils lacks AVX512 support"
  329. #endif