softraid_raid6.c 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852
  1. /* $OpenBSD: softraid_raid6.c,v 1.69 2015/07/21 03:30:51 krw Exp $ */
  2. /*
  3. * Copyright (c) 2009 Marco Peereboom <marco@peereboom.us>
  4. * Copyright (c) 2009 Jordan Hargrave <jordan@openbsd.org>
  5. *
  6. * Permission to use, copy, modify, and distribute this software for any
  7. * purpose with or without fee is hereby granted, provided that the above
  8. * copyright notice and this permission notice appear in all copies.
  9. *
  10. * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  11. * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  12. * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  13. * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  14. * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  15. * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  16. * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  17. */
  18. #include "bio.h"
  19. #include <sys/param.h>
  20. #include <sys/systm.h>
  21. #include <sys/buf.h>
  22. #include <sys/device.h>
  23. #include <sys/ioctl.h>
  24. #include <sys/malloc.h>
  25. #include <sys/kernel.h>
  26. #include <sys/disk.h>
  27. #include <sys/rwlock.h>
  28. #include <sys/queue.h>
  29. #include <sys/fcntl.h>
  30. #include <sys/mount.h>
  31. #include <sys/sensors.h>
  32. #include <sys/stat.h>
  33. #include <sys/task.h>
  34. #include <sys/conf.h>
  35. #include <sys/uio.h>
  36. #include <scsi/scsi_all.h>
  37. #include <scsi/scsiconf.h>
  38. #include <scsi/scsi_disk.h>
  39. #include <dev/softraidvar.h>
  40. uint8_t *gf_map[256];
  41. uint8_t gf_pow[768];
  42. int gf_log[256];
  43. /* RAID 6 functions. */
  44. int sr_raid6_create(struct sr_discipline *, struct bioc_createraid *,
  45. int, int64_t);
  46. int sr_raid6_assemble(struct sr_discipline *, struct bioc_createraid *,
  47. int, void *);
  48. int sr_raid6_init(struct sr_discipline *);
  49. int sr_raid6_rw(struct sr_workunit *);
  50. int sr_raid6_openings(struct sr_discipline *);
  51. void sr_raid6_intr(struct buf *);
  52. int sr_raid6_wu_done(struct sr_workunit *);
  53. void sr_raid6_set_chunk_state(struct sr_discipline *, int, int);
  54. void sr_raid6_set_vol_state(struct sr_discipline *);
  55. void sr_raid6_xorp(void *, void *, int);
  56. void sr_raid6_xorq(void *, void *, int, int);
  57. int sr_raid6_addio(struct sr_workunit *wu, int, daddr_t, long,
  58. void *, int, int, void *, void *, int);
  59. void sr_raid6_scrub(struct sr_discipline *);
  60. int sr_failio(struct sr_workunit *);
  61. void gf_init(void);
  62. uint8_t gf_inv(uint8_t);
  63. int gf_premul(uint8_t);
  64. uint8_t gf_mul(uint8_t, uint8_t);
  65. #define SR_NOFAIL 0x00
  66. #define SR_FAILX (1L << 0)
  67. #define SR_FAILY (1L << 1)
  68. #define SR_FAILP (1L << 2)
  69. #define SR_FAILQ (1L << 3)
  70. struct sr_raid6_opaque {
  71. int gn;
  72. void *pbuf;
  73. void *qbuf;
  74. };
  75. /* discipline initialisation. */
  76. void
  77. sr_raid6_discipline_init(struct sr_discipline *sd)
  78. {
  79. /* Initialize GF256 tables. */
  80. gf_init();
  81. /* Fill out discipline members. */
  82. sd->sd_type = SR_MD_RAID6;
  83. strlcpy(sd->sd_name, "RAID 6", sizeof(sd->sd_name));
  84. sd->sd_capabilities = SR_CAP_SYSTEM_DISK | SR_CAP_AUTO_ASSEMBLE |
  85. SR_CAP_REDUNDANT;
  86. sd->sd_max_wu = SR_RAID6_NOWU;
  87. /* Setup discipline specific function pointers. */
  88. sd->sd_assemble = sr_raid6_assemble;
  89. sd->sd_create = sr_raid6_create;
  90. sd->sd_openings = sr_raid6_openings;
  91. sd->sd_scsi_rw = sr_raid6_rw;
  92. sd->sd_scsi_intr = sr_raid6_intr;
  93. sd->sd_scsi_wu_done = sr_raid6_wu_done;
  94. sd->sd_set_chunk_state = sr_raid6_set_chunk_state;
  95. sd->sd_set_vol_state = sr_raid6_set_vol_state;
  96. }
  97. int
  98. sr_raid6_create(struct sr_discipline *sd, struct bioc_createraid *bc,
  99. int no_chunk, int64_t coerced_size)
  100. {
  101. if (no_chunk < 4) {
  102. sr_error(sd->sd_sc, "%s requires four or more chunks",
  103. sd->sd_name);
  104. return EINVAL;
  105. }
  106. /*
  107. * XXX add variable strip size later even though MAXPHYS is really
  108. * the clever value, users like * to tinker with that type of stuff.
  109. */
  110. sd->sd_meta->ssdi.ssd_strip_size = MAXPHYS;
  111. sd->sd_meta->ssdi.ssd_size = (coerced_size &
  112. ~(((u_int64_t)sd->sd_meta->ssdi.ssd_strip_size >>
  113. DEV_BSHIFT) - 1)) * (no_chunk - 2);
  114. return sr_raid6_init(sd);
  115. }
  116. int
  117. sr_raid6_assemble(struct sr_discipline *sd, struct bioc_createraid *bc,
  118. int no_chunk, void *data)
  119. {
  120. return sr_raid6_init(sd);
  121. }
  122. int
  123. sr_raid6_init(struct sr_discipline *sd)
  124. {
  125. /* Initialise runtime values. */
  126. sd->mds.mdd_raid6.sr6_strip_bits =
  127. sr_validate_stripsize(sd->sd_meta->ssdi.ssd_strip_size);
  128. if (sd->mds.mdd_raid6.sr6_strip_bits == -1) {
  129. sr_error(sd->sd_sc, "invalid strip size");
  130. return EINVAL;
  131. }
  132. /* only if stripsize <= MAXPHYS */
  133. sd->sd_max_ccb_per_wu = max(6, 2 * sd->sd_meta->ssdi.ssd_chunk_no);
  134. return 0;
  135. }
  136. int
  137. sr_raid6_openings(struct sr_discipline *sd)
  138. {
  139. return (sd->sd_max_wu >> 1); /* 2 wu's per IO */
  140. }
  141. void
  142. sr_raid6_set_chunk_state(struct sr_discipline *sd, int c, int new_state)
  143. {
  144. int old_state, s;
  145. /* XXX this is for RAID 0 */
  146. DNPRINTF(SR_D_STATE, "%s: %s: %s: sr_raid_set_chunk_state %d -> %d\n",
  147. DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
  148. sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, c, new_state);
  149. /* ok to go to splbio since this only happens in error path */
  150. s = splbio();
  151. old_state = sd->sd_vol.sv_chunks[c]->src_meta.scm_status;
  152. /* multiple IOs to the same chunk that fail will come through here */
  153. if (old_state == new_state)
  154. goto done;
  155. switch (old_state) {
  156. case BIOC_SDONLINE:
  157. switch (new_state) {
  158. case BIOC_SDOFFLINE:
  159. case BIOC_SDSCRUB:
  160. break;
  161. default:
  162. goto die;
  163. }
  164. break;
  165. case BIOC_SDOFFLINE:
  166. if (new_state == BIOC_SDREBUILD) {
  167. ;
  168. } else
  169. goto die;
  170. break;
  171. case BIOC_SDSCRUB:
  172. switch (new_state) {
  173. case BIOC_SDONLINE:
  174. case BIOC_SDOFFLINE:
  175. break;
  176. default:
  177. goto die;
  178. }
  179. break;
  180. case BIOC_SDREBUILD:
  181. switch (new_state) {
  182. case BIOC_SDONLINE:
  183. case BIOC_SDOFFLINE:
  184. break;
  185. default:
  186. goto die;
  187. }
  188. break;
  189. default:
  190. die:
  191. splx(s); /* XXX */
  192. panic("%s: %s: %s: invalid chunk state transition "
  193. "%d -> %d", DEVNAME(sd->sd_sc),
  194. sd->sd_meta->ssd_devname,
  195. sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname,
  196. old_state, new_state);
  197. /* NOTREACHED */
  198. }
  199. sd->sd_vol.sv_chunks[c]->src_meta.scm_status = new_state;
  200. sd->sd_set_vol_state(sd);
  201. sd->sd_must_flush = 1;
  202. task_add(systq, &sd->sd_meta_save_task);
  203. done:
  204. splx(s);
  205. }
  206. void
  207. sr_raid6_set_vol_state(struct sr_discipline *sd)
  208. {
  209. int states[SR_MAX_STATES];
  210. int new_state, i, s, nd;
  211. int old_state = sd->sd_vol_status;
  212. /* XXX this is for RAID 0 */
  213. DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state\n",
  214. DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname);
  215. nd = sd->sd_meta->ssdi.ssd_chunk_no;
  216. for (i = 0; i < SR_MAX_STATES; i++)
  217. states[i] = 0;
  218. for (i = 0; i < nd; i++) {
  219. s = sd->sd_vol.sv_chunks[i]->src_meta.scm_status;
  220. if (s >= SR_MAX_STATES)
  221. panic("%s: %s: %s: invalid chunk state",
  222. DEVNAME(sd->sd_sc),
  223. sd->sd_meta->ssd_devname,
  224. sd->sd_vol.sv_chunks[i]->src_meta.scmi.scm_devname);
  225. states[s]++;
  226. }
  227. if (states[BIOC_SDONLINE] == nd)
  228. new_state = BIOC_SVONLINE;
  229. else if (states[BIOC_SDONLINE] < nd - 2)
  230. new_state = BIOC_SVOFFLINE;
  231. else if (states[BIOC_SDSCRUB] != 0)
  232. new_state = BIOC_SVSCRUB;
  233. else if (states[BIOC_SDREBUILD] != 0)
  234. new_state = BIOC_SVREBUILD;
  235. else if (states[BIOC_SDONLINE] < nd)
  236. new_state = BIOC_SVDEGRADED;
  237. else {
  238. printf("old_state = %d, ", old_state);
  239. for (i = 0; i < nd; i++)
  240. printf("%d = %d, ", i,
  241. sd->sd_vol.sv_chunks[i]->src_meta.scm_status);
  242. panic("invalid new_state");
  243. }
  244. DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state %d -> %d\n",
  245. DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
  246. old_state, new_state);
  247. switch (old_state) {
  248. case BIOC_SVONLINE:
  249. switch (new_state) {
  250. case BIOC_SVONLINE: /* can go to same state */
  251. case BIOC_SVOFFLINE:
  252. case BIOC_SVDEGRADED:
  253. case BIOC_SVREBUILD: /* happens on boot */
  254. break;
  255. default:
  256. goto die;
  257. }
  258. break;
  259. case BIOC_SVOFFLINE:
  260. /* XXX this might be a little too much */
  261. goto die;
  262. case BIOC_SVDEGRADED:
  263. switch (new_state) {
  264. case BIOC_SVOFFLINE:
  265. case BIOC_SVREBUILD:
  266. case BIOC_SVDEGRADED: /* can go to the same state */
  267. break;
  268. default:
  269. goto die;
  270. }
  271. break;
  272. case BIOC_SVBUILDING:
  273. switch (new_state) {
  274. case BIOC_SVONLINE:
  275. case BIOC_SVOFFLINE:
  276. case BIOC_SVBUILDING: /* can go to the same state */
  277. break;
  278. default:
  279. goto die;
  280. }
  281. break;
  282. case BIOC_SVSCRUB:
  283. switch (new_state) {
  284. case BIOC_SVONLINE:
  285. case BIOC_SVOFFLINE:
  286. case BIOC_SVDEGRADED:
  287. case BIOC_SVSCRUB: /* can go to same state */
  288. break;
  289. default:
  290. goto die;
  291. }
  292. break;
  293. case BIOC_SVREBUILD:
  294. switch (new_state) {
  295. case BIOC_SVONLINE:
  296. case BIOC_SVOFFLINE:
  297. case BIOC_SVDEGRADED:
  298. case BIOC_SVREBUILD: /* can go to the same state */
  299. break;
  300. default:
  301. goto die;
  302. }
  303. break;
  304. default:
  305. die:
  306. panic("%s: %s: invalid volume state transition %d -> %d",
  307. DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
  308. old_state, new_state);
  309. /* NOTREACHED */
  310. }
  311. sd->sd_vol_status = new_state;
  312. }
  313. /* modes:
  314. * readq: sr_raid6_addio(i, lba, length, NULL, SCSI_DATA_IN,
  315. * 0, qbuf, NULL, 0);
  316. * readp: sr_raid6_addio(i, lba, length, NULL, SCSI_DATA_IN,
  317. * 0, pbuf, NULL, 0);
  318. * readx: sr_raid6_addio(i, lba, length, NULL, SCSI_DATA_IN,
  319. * 0, pbuf, qbuf, gf_pow[i]);
  320. */
  321. int
  322. sr_raid6_rw(struct sr_workunit *wu)
  323. {
  324. struct sr_workunit *wu_r = NULL;
  325. struct sr_discipline *sd = wu->swu_dis;
  326. struct scsi_xfer *xs = wu->swu_xs;
  327. struct sr_chunk *scp;
  328. int s, fail, i, gxinv, pxinv;
  329. daddr_t blkno, lba;
  330. int64_t chunk_offs, lbaoffs, offset, strip_offs;
  331. int64_t strip_no, strip_size, strip_bits, row_size;
  332. int64_t fchunk, no_chunk, chunk, qchunk, pchunk;
  333. long length, datalen;
  334. void *pbuf, *data, *qbuf;
  335. /* blkno and scsi error will be handled by sr_validate_io */
  336. if (sr_validate_io(wu, &blkno, "sr_raid6_rw"))
  337. goto bad;
  338. strip_size = sd->sd_meta->ssdi.ssd_strip_size;
  339. strip_bits = sd->mds.mdd_raid6.sr6_strip_bits;
  340. no_chunk = sd->sd_meta->ssdi.ssd_chunk_no - 2;
  341. row_size = (no_chunk << strip_bits) >> DEV_BSHIFT;
  342. data = xs->data;
  343. datalen = xs->datalen;
  344. lbaoffs = blkno << DEV_BSHIFT;
  345. if (xs->flags & SCSI_DATA_OUT) {
  346. if ((wu_r = sr_scsi_wu_get(sd, SCSI_NOSLEEP)) == NULL){
  347. printf("%s: can't get wu_r", DEVNAME(sd->sd_sc));
  348. goto bad;
  349. }
  350. wu_r->swu_state = SR_WU_INPROGRESS;
  351. wu_r->swu_flags |= SR_WUF_DISCIPLINE;
  352. }
  353. wu->swu_blk_start = 0;
  354. while (datalen != 0) {
  355. strip_no = lbaoffs >> strip_bits;
  356. strip_offs = lbaoffs & (strip_size - 1);
  357. chunk_offs = (strip_no / no_chunk) << strip_bits;
  358. offset = chunk_offs + strip_offs;
  359. /* get size remaining in this stripe */
  360. length = MIN(strip_size - strip_offs, datalen);
  361. /* map disk offset to parity/data drive */
  362. chunk = strip_no % no_chunk;
  363. qchunk = (no_chunk + 1) - ((strip_no / no_chunk) % (no_chunk+2));
  364. if (qchunk == 0)
  365. pchunk = no_chunk + 1;
  366. else
  367. pchunk = qchunk - 1;
  368. if (chunk >= pchunk)
  369. chunk++;
  370. if (chunk >= qchunk)
  371. chunk++;
  372. lba = offset >> DEV_BSHIFT;
  373. /* XXX big hammer.. exclude I/O from entire stripe */
  374. if (wu->swu_blk_start == 0)
  375. wu->swu_blk_start = (strip_no / no_chunk) * row_size;
  376. wu->swu_blk_end = (strip_no / no_chunk) * row_size + (row_size - 1);
  377. fail = 0;
  378. fchunk = -1;
  379. /* Get disk-fail flags */
  380. for (i=0; i< no_chunk+2; i++) {
  381. scp = sd->sd_vol.sv_chunks[i];
  382. switch (scp->src_meta.scm_status) {
  383. case BIOC_SDOFFLINE:
  384. case BIOC_SDREBUILD:
  385. case BIOC_SDHOTSPARE:
  386. if (i == qchunk)
  387. fail |= SR_FAILQ;
  388. else if (i == pchunk)
  389. fail |= SR_FAILP;
  390. else if (i == chunk)
  391. fail |= SR_FAILX;
  392. else {
  393. /* dual data-disk failure */
  394. fail |= SR_FAILY;
  395. fchunk = i;
  396. }
  397. break;
  398. }
  399. }
  400. if (xs->flags & SCSI_DATA_IN) {
  401. if (!(fail & SR_FAILX)) {
  402. /* drive is good. issue single read request */
  403. if (sr_raid6_addio(wu, chunk, lba, length,
  404. data, xs->flags, 0, NULL, NULL, 0))
  405. goto bad;
  406. } else if (fail & SR_FAILP) {
  407. /* Dx, P failed */
  408. printf("Disk %llx offline, "
  409. "regenerating Dx+P\n", chunk);
  410. gxinv = gf_inv(gf_pow[chunk]);
  411. /* Calculate: Dx = (Q^Dz*gz)*inv(gx) */
  412. memset(data, 0, length);
  413. if (sr_raid6_addio(wu, qchunk, lba, length,
  414. NULL, SCSI_DATA_IN, 0, NULL, data, gxinv))
  415. goto bad;
  416. /* Read Dz * gz * inv(gx) */
  417. for (i = 0; i < no_chunk+2; i++) {
  418. if (i == qchunk || i == pchunk || i == chunk)
  419. continue;
  420. if (sr_raid6_addio(wu, i, lba, length,
  421. NULL, SCSI_DATA_IN, 0, NULL, data,
  422. gf_mul(gf_pow[i], gxinv)))
  423. goto bad;
  424. }
  425. /* data will contain correct value on completion */
  426. } else if (fail & SR_FAILY) {
  427. /* Dx, Dy failed */
  428. printf("Disk %llx & %llx offline, "
  429. "regenerating Dx+Dy\n", chunk, fchunk);
  430. gxinv = gf_inv(gf_pow[chunk] ^ gf_pow[fchunk]);
  431. pxinv = gf_mul(gf_pow[fchunk], gxinv);
  432. /* read Q * inv(gx + gy) */
  433. memset(data, 0, length);
  434. if (sr_raid6_addio(wu, qchunk, lba, length,
  435. NULL, SCSI_DATA_IN, 0, NULL, data, gxinv))
  436. goto bad;
  437. /* read P * gy * inv(gx + gy) */
  438. if (sr_raid6_addio(wu, pchunk, lba, length,
  439. NULL, SCSI_DATA_IN, 0, NULL, data, pxinv))
  440. goto bad;
  441. /* Calculate: Dx*gx^Dy*gy = Q^(Dz*gz) ; Dx^Dy = P^Dz
  442. * Q: sr_raid6_xorp(qbuf, --, length);
  443. * P: sr_raid6_xorp(pbuf, --, length);
  444. * Dz: sr_raid6_xorp(pbuf, --, length);
  445. * sr_raid6_xorq(qbuf, --, length, gf_pow[i]);
  446. */
  447. for (i = 0; i < no_chunk+2; i++) {
  448. if (i == qchunk || i == pchunk ||
  449. i == chunk || i == fchunk)
  450. continue;
  451. /* read Dz * (gz + gy) * inv(gx + gy) */
  452. if (sr_raid6_addio(wu, i, lba, length,
  453. NULL, SCSI_DATA_IN, 0, NULL, data,
  454. pxinv ^ gf_mul(gf_pow[i], gxinv)))
  455. goto bad;
  456. }
  457. } else {
  458. /* Two cases: single disk (Dx) or (Dx+Q)
  459. * Dx = Dz ^ P (same as RAID5)
  460. */
  461. printf("Disk %llx offline, "
  462. "regenerating Dx%s\n", chunk,
  463. fail & SR_FAILQ ? "+Q" : " single");
  464. /* Calculate: Dx = P^Dz
  465. * P: sr_raid6_xorp(data, ---, length);
  466. * Dz: sr_raid6_xorp(data, ---, length);
  467. */
  468. memset(data, 0, length);
  469. for (i = 0; i < no_chunk+2; i++) {
  470. if (i != chunk && i != qchunk) {
  471. /* Read Dz */
  472. if (sr_raid6_addio(wu, i, lba,
  473. length, NULL, SCSI_DATA_IN,
  474. 0, data, NULL, 0))
  475. goto bad;
  476. }
  477. }
  478. /* data will contain correct value on completion */
  479. }
  480. } else {
  481. /* XXX handle writes to failed/offline disk? */
  482. if (fail & (SR_FAILX|SR_FAILQ|SR_FAILP))
  483. goto bad;
  484. /*
  485. * initialize pbuf with contents of new data to be
  486. * written. This will be XORed with old data and old
  487. * parity in the intr routine. The result in pbuf
  488. * is the new parity data.
  489. */
  490. qbuf = sr_block_get(sd, length);
  491. if (qbuf == NULL)
  492. goto bad;
  493. pbuf = sr_block_get(sd, length);
  494. if (pbuf == NULL)
  495. goto bad;
  496. /* Calculate P = Dn; Q = gn * Dn */
  497. if (gf_premul(gf_pow[chunk]))
  498. goto bad;
  499. sr_raid6_xorp(pbuf, data, length);
  500. sr_raid6_xorq(qbuf, data, length, gf_pow[chunk]);
  501. /* Read old data: P ^= Dn' ; Q ^= (gn * Dn') */
  502. if (sr_raid6_addio(wu_r, chunk, lba, length, NULL,
  503. SCSI_DATA_IN, 0, pbuf, qbuf, gf_pow[chunk]))
  504. goto bad;
  505. /* Read old xor-parity: P ^= P' */
  506. if (sr_raid6_addio(wu_r, pchunk, lba, length, NULL,
  507. SCSI_DATA_IN, 0, pbuf, NULL, 0))
  508. goto bad;
  509. /* Read old q-parity: Q ^= Q' */
  510. if (sr_raid6_addio(wu_r, qchunk, lba, length, NULL,
  511. SCSI_DATA_IN, 0, qbuf, NULL, 0))
  512. goto bad;
  513. /* write new data */
  514. if (sr_raid6_addio(wu, chunk, lba, length, data,
  515. xs->flags, 0, NULL, NULL, 0))
  516. goto bad;
  517. /* write new xor-parity */
  518. if (sr_raid6_addio(wu, pchunk, lba, length, pbuf,
  519. xs->flags, SR_CCBF_FREEBUF, NULL, NULL, 0))
  520. goto bad;
  521. /* write new q-parity */
  522. if (sr_raid6_addio(wu, qchunk, lba, length, qbuf,
  523. xs->flags, SR_CCBF_FREEBUF, NULL, NULL, 0))
  524. goto bad;
  525. }
  526. /* advance to next block */
  527. lbaoffs += length;
  528. datalen -= length;
  529. data += length;
  530. }
  531. s = splbio();
  532. if (wu_r) {
  533. /* collide write request with reads */
  534. wu_r->swu_blk_start = wu->swu_blk_start;
  535. wu_r->swu_blk_end = wu->swu_blk_end;
  536. wu->swu_state = SR_WU_DEFERRED;
  537. wu_r->swu_collider = wu;
  538. TAILQ_INSERT_TAIL(&sd->sd_wu_defq, wu, swu_link);
  539. wu = wu_r;
  540. }
  541. splx(s);
  542. sr_schedule_wu(wu);
  543. return (0);
  544. bad:
  545. /* XXX - can leak pbuf/qbuf on error. */
  546. /* wu is unwound by sr_wu_put */
  547. if (wu_r)
  548. sr_scsi_wu_put(sd, wu_r);
  549. return (1);
  550. }
  551. /* Handle failure I/O completion */
  552. int
  553. sr_failio(struct sr_workunit *wu)
  554. {
  555. struct sr_discipline *sd = wu->swu_dis;
  556. struct sr_ccb *ccb;
  557. if (!(wu->swu_flags & SR_WUF_FAIL))
  558. return (0);
  559. /* Wu is a 'fake'.. don't do real I/O just intr */
  560. TAILQ_INSERT_TAIL(&sd->sd_wu_pendq, wu, swu_link);
  561. TAILQ_FOREACH(ccb, &wu->swu_ccb, ccb_link)
  562. sr_raid6_intr(&ccb->ccb_buf);
  563. return (1);
  564. }
  565. void
  566. sr_raid6_intr(struct buf *bp)
  567. {
  568. struct sr_ccb *ccb = (struct sr_ccb *)bp;
  569. struct sr_workunit *wu = ccb->ccb_wu;
  570. struct sr_discipline *sd = wu->swu_dis;
  571. struct sr_raid6_opaque *pq = ccb->ccb_opaque;
  572. int s;
  573. DNPRINTF(SR_D_INTR, "%s: sr_raid6_intr bp %p xs %p\n",
  574. DEVNAME(sd->sd_sc), bp, wu->swu_xs);
  575. s = splbio();
  576. sr_ccb_done(ccb);
  577. /* XOR data to result. */
  578. if (ccb->ccb_state == SR_CCB_OK && pq) {
  579. if (pq->pbuf)
  580. /* Calculate xor-parity */
  581. sr_raid6_xorp(pq->pbuf, ccb->ccb_buf.b_data,
  582. ccb->ccb_buf.b_bcount);
  583. if (pq->qbuf)
  584. /* Calculate q-parity */
  585. sr_raid6_xorq(pq->qbuf, ccb->ccb_buf.b_data,
  586. ccb->ccb_buf.b_bcount, pq->gn);
  587. free(pq, M_DEVBUF, 0);
  588. ccb->ccb_opaque = NULL;
  589. }
  590. /* Free allocated data buffer. */
  591. if (ccb->ccb_flags & SR_CCBF_FREEBUF) {
  592. sr_block_put(sd, ccb->ccb_buf.b_data, ccb->ccb_buf.b_bcount);
  593. ccb->ccb_buf.b_data = NULL;
  594. }
  595. sr_wu_done(wu);
  596. splx(s);
  597. }
  598. int
  599. sr_raid6_wu_done(struct sr_workunit *wu)
  600. {
  601. struct sr_discipline *sd = wu->swu_dis;
  602. struct scsi_xfer *xs = wu->swu_xs;
  603. /* XXX - we have no way of propagating errors... */
  604. if (wu->swu_flags & SR_WUF_DISCIPLINE)
  605. return SR_WU_OK;
  606. /* XXX - This is insufficient for RAID 6. */
  607. if (wu->swu_ios_succeeded > 0) {
  608. xs->error = XS_NOERROR;
  609. return SR_WU_OK;
  610. }
  611. if (xs->flags & SCSI_DATA_IN) {
  612. printf("%s: retrying read on block %lld\n",
  613. sd->sd_meta->ssd_devname, (long long)wu->swu_blk_start);
  614. sr_wu_release_ccbs(wu);
  615. wu->swu_state = SR_WU_RESTART;
  616. if (sd->sd_scsi_rw(wu) == 0)
  617. return SR_WU_RESTART;
  618. } else {
  619. printf("%s: permanently fail write on block %lld\n",
  620. sd->sd_meta->ssd_devname, (long long)wu->swu_blk_start);
  621. }
  622. wu->swu_state = SR_WU_FAILED;
  623. xs->error = XS_DRIVER_STUFFUP;
  624. return SR_WU_FAILED;
  625. }
  626. int
  627. sr_raid6_addio(struct sr_workunit *wu, int chunk, daddr_t blkno,
  628. long len, void *data, int xsflags, int ccbflags, void *pbuf,
  629. void *qbuf, int gn)
  630. {
  631. struct sr_discipline *sd = wu->swu_dis;
  632. struct sr_ccb *ccb;
  633. struct sr_raid6_opaque *pqbuf;
  634. DNPRINTF(SR_D_DIS, "sr_raid6_addio: %s %d.%lld %ld %p:%p\n",
  635. (xsflags & SCSI_DATA_IN) ? "read" : "write", chunk,
  636. (long long)blkno, len, pbuf, qbuf);
  637. /* Allocate temporary buffer. */
  638. if (data == NULL) {
  639. data = sr_block_get(sd, len);
  640. if (data == NULL)
  641. return (-1);
  642. ccbflags |= SR_CCBF_FREEBUF;
  643. }
  644. ccb = sr_ccb_rw(sd, chunk, blkno, len, data, xsflags, ccbflags);
  645. if (ccb == NULL) {
  646. if (ccbflags & SR_CCBF_FREEBUF)
  647. sr_block_put(sd, data, len);
  648. return (-1);
  649. }
  650. if (pbuf || qbuf) {
  651. /* XXX - can leak data and ccb on failure. */
  652. if (qbuf && gf_premul(gn))
  653. return (-1);
  654. /* XXX - should be preallocated? */
  655. pqbuf = malloc(sizeof(struct sr_raid6_opaque),
  656. M_DEVBUF, M_ZERO | M_NOWAIT);
  657. if (pqbuf == NULL) {
  658. sr_ccb_put(ccb);
  659. return (-1);
  660. }
  661. pqbuf->pbuf = pbuf;
  662. pqbuf->qbuf = qbuf;
  663. pqbuf->gn = gn;
  664. ccb->ccb_opaque = pqbuf;
  665. }
  666. sr_wu_enqueue_ccb(wu, ccb);
  667. return (0);
  668. }
  669. /* Perform RAID6 parity calculation.
  670. * P=xor parity, Q=GF256 parity, D=data, gn=disk# */
  671. void
  672. sr_raid6_xorp(void *p, void *d, int len)
  673. {
  674. uint32_t *pbuf = p, *data = d;
  675. len >>= 2;
  676. while (len--)
  677. *pbuf++ ^= *data++;
  678. }
  679. void
  680. sr_raid6_xorq(void *q, void *d, int len, int gn)
  681. {
  682. uint32_t *qbuf = q, *data = d, x;
  683. uint8_t *gn_map = gf_map[gn];
  684. len >>= 2;
  685. while (len--) {
  686. x = *data++;
  687. *qbuf++ ^= (((uint32_t)gn_map[x & 0xff]) |
  688. ((uint32_t)gn_map[(x >> 8) & 0xff] << 8) |
  689. ((uint32_t)gn_map[(x >> 16) & 0xff] << 16) |
  690. ((uint32_t)gn_map[(x >> 24) & 0xff] << 24));
  691. }
  692. }
  693. /* Create GF256 log/pow tables: polynomial = 0x11D */
  694. void
  695. gf_init(void)
  696. {
  697. int i;
  698. uint8_t p = 1;
  699. /* use 2N pow table to avoid using % in multiply */
  700. for (i=0; i<256; i++) {
  701. gf_log[p] = i;
  702. gf_pow[i] = gf_pow[i+255] = p;
  703. p = ((p << 1) ^ ((p & 0x80) ? 0x1D : 0x00));
  704. }
  705. gf_log[0] = 512;
  706. }
  707. uint8_t
  708. gf_inv(uint8_t a)
  709. {
  710. return gf_pow[255 - gf_log[a]];
  711. }
  712. uint8_t
  713. gf_mul(uint8_t a, uint8_t b)
  714. {
  715. return gf_pow[gf_log[a] + gf_log[b]];
  716. }
  717. /* Precalculate multiplication tables for drive gn */
  718. int
  719. gf_premul(uint8_t gn)
  720. {
  721. int i;
  722. if (gf_map[gn] != NULL)
  723. return (0);
  724. if ((gf_map[gn] = malloc(256, M_DEVBUF, M_ZERO | M_NOWAIT)) == NULL)
  725. return (-1);
  726. for (i=0; i<256; i++)
  727. gf_map[gn][i] = gf_pow[gf_log[i] + gf_log[gn]];
  728. return (0);
  729. }