s390vx.uc 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169
  1. /*
  2. * raid6_vx$#.c
  3. *
  4. * $#-way unrolled RAID6 gen/xor functions for s390
  5. * based on the vector facility
  6. *
  7. * Copyright IBM Corp. 2016
  8. * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
  9. *
  10. * This file is postprocessed using unroll.awk.
  11. */
  12. #include <linux/raid/pq.h>
  13. #include <asm/fpu/api.h>
  14. asm(".include \"asm/vx-insn.h\"\n");
  15. #define NSIZE 16
  16. static inline void LOAD_CONST(void)
  17. {
  18. asm volatile("VREPIB %v24,7");
  19. asm volatile("VREPIB %v25,0x1d");
  20. }
  21. /*
  22. * The SHLBYTE() operation shifts each of the 16 bytes in
  23. * vector register y left by 1 bit and stores the result in
  24. * vector register x.
  25. */
  26. static inline void SHLBYTE(int x, int y)
  27. {
  28. asm volatile ("VAB %0,%1,%1" : : "i" (x), "i" (y));
  29. }
  30. /*
  31. * For each of the 16 bytes in the vector register y the MASK()
  32. * operation returns 0xFF if the high bit of the byte is 1,
  33. * or 0x00 if the high bit is 0. The result is stored in vector
  34. * register x.
  35. */
  36. static inline void MASK(int x, int y)
  37. {
  38. asm volatile ("VESRAVB %0,%1,24" : : "i" (x), "i" (y));
  39. }
  40. static inline void AND(int x, int y, int z)
  41. {
  42. asm volatile ("VN %0,%1,%2" : : "i" (x), "i" (y), "i" (z));
  43. }
  44. static inline void XOR(int x, int y, int z)
  45. {
  46. asm volatile ("VX %0,%1,%2" : : "i" (x), "i" (y), "i" (z));
  47. }
  48. static inline void LOAD_DATA(int x, int n, u8 *ptr)
  49. {
  50. typedef struct { u8 _[16*n]; } addrtype;
  51. register addrtype *__ptr asm("1") = (addrtype *) ptr;
  52. asm volatile ("VLM %2,%3,0,%r1"
  53. : : "m" (*__ptr), "a" (__ptr), "i" (x), "i" (x + n - 1));
  54. }
  55. static inline void STORE_DATA(int x, int n, u8 *ptr)
  56. {
  57. typedef struct { u8 _[16*n]; } addrtype;
  58. register addrtype *__ptr asm("1") = (addrtype *) ptr;
  59. asm volatile ("VSTM %2,%3,0,1"
  60. : "=m" (*__ptr) : "a" (__ptr), "i" (x), "i" (x + n - 1));
  61. }
  62. static inline void COPY_VEC(int x, int y)
  63. {
  64. asm volatile ("VLR %0,%1" : : "i" (x), "i" (y));
  65. }
  66. static void raid6_s390vx$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
  67. {
  68. struct kernel_fpu vxstate;
  69. u8 **dptr, *p, *q;
  70. int d, z, z0;
  71. kernel_fpu_begin(&vxstate, KERNEL_VXR);
  72. LOAD_CONST();
  73. dptr = (u8 **) ptrs;
  74. z0 = disks - 3; /* Highest data disk */
  75. p = dptr[z0 + 1]; /* XOR parity */
  76. q = dptr[z0 + 2]; /* RS syndrome */
  77. for (d = 0; d < bytes; d += $#*NSIZE) {
  78. LOAD_DATA(0,$#,&dptr[z0][d]);
  79. COPY_VEC(8+$$,0+$$);
  80. for (z = z0 - 1; z >= 0; z--) {
  81. MASK(16+$$,8+$$);
  82. AND(16+$$,16+$$,25);
  83. SHLBYTE(8+$$,8+$$);
  84. XOR(8+$$,8+$$,16+$$);
  85. LOAD_DATA(16,$#,&dptr[z][d]);
  86. XOR(0+$$,0+$$,16+$$);
  87. XOR(8+$$,8+$$,16+$$);
  88. }
  89. STORE_DATA(0,$#,&p[d]);
  90. STORE_DATA(8,$#,&q[d]);
  91. }
  92. kernel_fpu_end(&vxstate, KERNEL_VXR);
  93. }
  94. static void raid6_s390vx$#_xor_syndrome(int disks, int start, int stop,
  95. size_t bytes, void **ptrs)
  96. {
  97. struct kernel_fpu vxstate;
  98. u8 **dptr, *p, *q;
  99. int d, z, z0;
  100. dptr = (u8 **) ptrs;
  101. z0 = stop; /* P/Q right side optimization */
  102. p = dptr[disks - 2]; /* XOR parity */
  103. q = dptr[disks - 1]; /* RS syndrome */
  104. kernel_fpu_begin(&vxstate, KERNEL_VXR);
  105. LOAD_CONST();
  106. for (d = 0; d < bytes; d += $#*NSIZE) {
  107. /* P/Q data pages */
  108. LOAD_DATA(0,$#,&dptr[z0][d]);
  109. COPY_VEC(8+$$,0+$$);
  110. for (z = z0 - 1; z >= start; z--) {
  111. MASK(16+$$,8+$$);
  112. AND(16+$$,16+$$,25);
  113. SHLBYTE(8+$$,8+$$);
  114. XOR(8+$$,8+$$,16+$$);
  115. LOAD_DATA(16,$#,&dptr[z][d]);
  116. XOR(0+$$,0+$$,16+$$);
  117. XOR(8+$$,8+$$,16+$$);
  118. }
  119. /* P/Q left side optimization */
  120. for (z = start - 1; z >= 0; z--) {
  121. MASK(16+$$,8+$$);
  122. AND(16+$$,16+$$,25);
  123. SHLBYTE(8+$$,8+$$);
  124. XOR(8+$$,8+$$,16+$$);
  125. }
  126. LOAD_DATA(16,$#,&p[d]);
  127. XOR(16+$$,16+$$,0+$$);
  128. STORE_DATA(16,$#,&p[d]);
  129. LOAD_DATA(16,$#,&q[d]);
  130. XOR(16+$$,16+$$,8+$$);
  131. STORE_DATA(16,$#,&q[d]);
  132. }
  133. kernel_fpu_end(&vxstate, KERNEL_VXR);
  134. }
  135. static int raid6_s390vx$#_valid(void)
  136. {
  137. return MACHINE_HAS_VX;
  138. }
  139. const struct raid6_calls raid6_s390vx$# = {
  140. raid6_s390vx$#_gen_syndrome,
  141. raid6_s390vx$#_xor_syndrome,
  142. raid6_s390vx$#_valid,
  143. "vx128x$#",
  144. 1
  145. };