softraid_raid1.c 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442
  1. /* $OpenBSD: softraid_raid1.c,v 1.63 2015/07/21 03:30:51 krw Exp $ */
  2. /*
  3. * Copyright (c) 2007 Marco Peereboom <marco@peereboom.us>
  4. *
  5. * Permission to use, copy, modify, and distribute this software for any
  6. * purpose with or without fee is hereby granted, provided that the above
  7. * copyright notice and this permission notice appear in all copies.
  8. *
  9. * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  10. * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  11. * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  12. * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  13. * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  14. * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  15. * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  16. */
  17. #include "bio.h"
  18. #include <sys/param.h>
  19. #include <sys/systm.h>
  20. #include <sys/buf.h>
  21. #include <sys/device.h>
  22. #include <sys/ioctl.h>
  23. #include <sys/malloc.h>
  24. #include <sys/kernel.h>
  25. #include <sys/disk.h>
  26. #include <sys/rwlock.h>
  27. #include <sys/queue.h>
  28. #include <sys/fcntl.h>
  29. #include <sys/mount.h>
  30. #include <sys/sensors.h>
  31. #include <sys/stat.h>
  32. #include <sys/task.h>
  33. #include <sys/conf.h>
  34. #include <sys/uio.h>
  35. #include <scsi/scsi_all.h>
  36. #include <scsi/scsiconf.h>
  37. #include <scsi/scsi_disk.h>
  38. #include <dev/softraidvar.h>
  39. /* RAID 1 functions. */
  40. int sr_raid1_create(struct sr_discipline *, struct bioc_createraid *,
  41. int, int64_t);
  42. int sr_raid1_assemble(struct sr_discipline *, struct bioc_createraid *,
  43. int, void *);
  44. int sr_raid1_init(struct sr_discipline *sd);
  45. int sr_raid1_rw(struct sr_workunit *);
  46. int sr_raid1_wu_done(struct sr_workunit *);
  47. void sr_raid1_set_chunk_state(struct sr_discipline *, int, int);
  48. void sr_raid1_set_vol_state(struct sr_discipline *);
  49. /* Discipline initialisation. */
  50. void
  51. sr_raid1_discipline_init(struct sr_discipline *sd)
  52. {
  53. /* Fill out discipline members. */
  54. sd->sd_type = SR_MD_RAID1;
  55. strlcpy(sd->sd_name, "RAID 1", sizeof(sd->sd_name));
  56. sd->sd_capabilities = SR_CAP_SYSTEM_DISK | SR_CAP_AUTO_ASSEMBLE |
  57. SR_CAP_REBUILD | SR_CAP_REDUNDANT;
  58. sd->sd_max_wu = SR_RAID1_NOWU;
  59. /* Setup discipline specific function pointers. */
  60. sd->sd_assemble = sr_raid1_assemble;
  61. sd->sd_create = sr_raid1_create;
  62. sd->sd_scsi_rw = sr_raid1_rw;
  63. sd->sd_scsi_wu_done = sr_raid1_wu_done;
  64. sd->sd_set_chunk_state = sr_raid1_set_chunk_state;
  65. sd->sd_set_vol_state = sr_raid1_set_vol_state;
  66. }
  67. int
  68. sr_raid1_create(struct sr_discipline *sd, struct bioc_createraid *bc,
  69. int no_chunk, int64_t coerced_size)
  70. {
  71. if (no_chunk < 2) {
  72. sr_error(sd->sd_sc, "%s requires two or more chunks",
  73. sd->sd_name);
  74. return EINVAL;
  75. }
  76. sd->sd_meta->ssdi.ssd_size = coerced_size;
  77. return sr_raid1_init(sd);
  78. }
  79. int
  80. sr_raid1_assemble(struct sr_discipline *sd, struct bioc_createraid *bc,
  81. int no_chunk, void *data)
  82. {
  83. return sr_raid1_init(sd);
  84. }
  85. int
  86. sr_raid1_init(struct sr_discipline *sd)
  87. {
  88. sd->sd_max_ccb_per_wu = sd->sd_meta->ssdi.ssd_chunk_no;
  89. return 0;
  90. }
  91. void
  92. sr_raid1_set_chunk_state(struct sr_discipline *sd, int c, int new_state)
  93. {
  94. int old_state, s;
  95. DNPRINTF(SR_D_STATE, "%s: %s: %s: sr_raid_set_chunk_state %d -> %d\n",
  96. DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
  97. sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, c, new_state);
  98. /* ok to go to splbio since this only happens in error path */
  99. s = splbio();
  100. old_state = sd->sd_vol.sv_chunks[c]->src_meta.scm_status;
  101. /* multiple IOs to the same chunk that fail will come through here */
  102. if (old_state == new_state)
  103. goto done;
  104. switch (old_state) {
  105. case BIOC_SDONLINE:
  106. switch (new_state) {
  107. case BIOC_SDOFFLINE:
  108. case BIOC_SDSCRUB:
  109. break;
  110. default:
  111. goto die;
  112. }
  113. break;
  114. case BIOC_SDOFFLINE:
  115. switch (new_state) {
  116. case BIOC_SDREBUILD:
  117. case BIOC_SDHOTSPARE:
  118. break;
  119. default:
  120. goto die;
  121. }
  122. break;
  123. case BIOC_SDSCRUB:
  124. if (new_state == BIOC_SDONLINE) {
  125. ;
  126. } else
  127. goto die;
  128. break;
  129. case BIOC_SDREBUILD:
  130. switch (new_state) {
  131. case BIOC_SDONLINE:
  132. break;
  133. case BIOC_SDOFFLINE:
  134. /* Abort rebuild since the rebuild chunk disappeared. */
  135. sd->sd_reb_abort = 1;
  136. break;
  137. default:
  138. goto die;
  139. }
  140. break;
  141. case BIOC_SDHOTSPARE:
  142. switch (new_state) {
  143. case BIOC_SDOFFLINE:
  144. case BIOC_SDREBUILD:
  145. break;
  146. default:
  147. goto die;
  148. }
  149. break;
  150. default:
  151. die:
  152. splx(s); /* XXX */
  153. panic("%s: %s: %s: invalid chunk state transition "
  154. "%d -> %d\n", DEVNAME(sd->sd_sc),
  155. sd->sd_meta->ssd_devname,
  156. sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname,
  157. old_state, new_state);
  158. /* NOTREACHED */
  159. }
  160. sd->sd_vol.sv_chunks[c]->src_meta.scm_status = new_state;
  161. sd->sd_set_vol_state(sd);
  162. sd->sd_must_flush = 1;
  163. task_add(systq, &sd->sd_meta_save_task);
  164. done:
  165. splx(s);
  166. }
  167. void
  168. sr_raid1_set_vol_state(struct sr_discipline *sd)
  169. {
  170. int states[SR_MAX_STATES];
  171. int new_state, i, s, nd;
  172. int old_state = sd->sd_vol_status;
  173. DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state\n",
  174. DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname);
  175. nd = sd->sd_meta->ssdi.ssd_chunk_no;
  176. #ifdef SR_DEBUG
  177. for (i = 0; i < nd; i++)
  178. DNPRINTF(SR_D_STATE, "%s: chunk %d status = %u\n",
  179. DEVNAME(sd->sd_sc), i,
  180. sd->sd_vol.sv_chunks[i]->src_meta.scm_status);
  181. #endif
  182. for (i = 0; i < SR_MAX_STATES; i++)
  183. states[i] = 0;
  184. for (i = 0; i < nd; i++) {
  185. s = sd->sd_vol.sv_chunks[i]->src_meta.scm_status;
  186. if (s >= SR_MAX_STATES)
  187. panic("%s: %s: %s: invalid chunk state",
  188. DEVNAME(sd->sd_sc),
  189. sd->sd_meta->ssd_devname,
  190. sd->sd_vol.sv_chunks[i]->src_meta.scmi.scm_devname);
  191. states[s]++;
  192. }
  193. if (states[BIOC_SDONLINE] == nd)
  194. new_state = BIOC_SVONLINE;
  195. else if (states[BIOC_SDONLINE] == 0)
  196. new_state = BIOC_SVOFFLINE;
  197. else if (states[BIOC_SDSCRUB] != 0)
  198. new_state = BIOC_SVSCRUB;
  199. else if (states[BIOC_SDREBUILD] != 0)
  200. new_state = BIOC_SVREBUILD;
  201. else if (states[BIOC_SDOFFLINE] != 0)
  202. new_state = BIOC_SVDEGRADED;
  203. else {
  204. DNPRINTF(SR_D_STATE, "%s: invalid volume state, old state "
  205. "was %d\n", DEVNAME(sd->sd_sc), old_state);
  206. panic("invalid volume state");
  207. }
  208. DNPRINTF(SR_D_STATE, "%s: %s: sr_raid1_set_vol_state %d -> %d\n",
  209. DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
  210. old_state, new_state);
  211. switch (old_state) {
  212. case BIOC_SVONLINE:
  213. switch (new_state) {
  214. case BIOC_SVONLINE: /* can go to same state */
  215. case BIOC_SVOFFLINE:
  216. case BIOC_SVDEGRADED:
  217. case BIOC_SVREBUILD: /* happens on boot */
  218. break;
  219. default:
  220. goto die;
  221. }
  222. break;
  223. case BIOC_SVOFFLINE:
  224. /* XXX this might be a little too much */
  225. goto die;
  226. case BIOC_SVDEGRADED:
  227. switch (new_state) {
  228. case BIOC_SVOFFLINE:
  229. case BIOC_SVREBUILD:
  230. case BIOC_SVDEGRADED: /* can go to the same state */
  231. break;
  232. default:
  233. goto die;
  234. }
  235. break;
  236. case BIOC_SVBUILDING:
  237. switch (new_state) {
  238. case BIOC_SVONLINE:
  239. case BIOC_SVOFFLINE:
  240. case BIOC_SVBUILDING: /* can go to the same state */
  241. break;
  242. default:
  243. goto die;
  244. }
  245. break;
  246. case BIOC_SVSCRUB:
  247. switch (new_state) {
  248. case BIOC_SVONLINE:
  249. case BIOC_SVOFFLINE:
  250. case BIOC_SVDEGRADED:
  251. case BIOC_SVSCRUB: /* can go to same state */
  252. break;
  253. default:
  254. goto die;
  255. }
  256. break;
  257. case BIOC_SVREBUILD:
  258. switch (new_state) {
  259. case BIOC_SVONLINE:
  260. case BIOC_SVOFFLINE:
  261. case BIOC_SVDEGRADED:
  262. case BIOC_SVREBUILD: /* can go to the same state */
  263. break;
  264. default:
  265. goto die;
  266. }
  267. break;
  268. default:
  269. die:
  270. panic("%s: %s: invalid volume state transition "
  271. "%d -> %d\n", DEVNAME(sd->sd_sc),
  272. sd->sd_meta->ssd_devname,
  273. old_state, new_state);
  274. /* NOTREACHED */
  275. }
  276. sd->sd_vol_status = new_state;
  277. /* If we have just become degraded, look for a hotspare. */
  278. if (new_state == BIOC_SVDEGRADED)
  279. task_add(systq, &sd->sd_hotspare_rebuild_task);
  280. }
  281. int
  282. sr_raid1_rw(struct sr_workunit *wu)
  283. {
  284. struct sr_discipline *sd = wu->swu_dis;
  285. struct scsi_xfer *xs = wu->swu_xs;
  286. struct sr_ccb *ccb;
  287. struct sr_chunk *scp;
  288. int ios, chunk, i, rt;
  289. daddr_t blkno;
  290. /* blkno and scsi error will be handled by sr_validate_io */
  291. if (sr_validate_io(wu, &blkno, "sr_raid1_rw"))
  292. goto bad;
  293. if (xs->flags & SCSI_DATA_IN)
  294. ios = 1;
  295. else
  296. ios = sd->sd_meta->ssdi.ssd_chunk_no;
  297. for (i = 0; i < ios; i++) {
  298. if (xs->flags & SCSI_DATA_IN) {
  299. rt = 0;
  300. ragain:
  301. /* interleave reads */
  302. chunk = sd->mds.mdd_raid1.sr1_counter++ %
  303. sd->sd_meta->ssdi.ssd_chunk_no;
  304. scp = sd->sd_vol.sv_chunks[chunk];
  305. switch (scp->src_meta.scm_status) {
  306. case BIOC_SDONLINE:
  307. case BIOC_SDSCRUB:
  308. break;
  309. case BIOC_SDOFFLINE:
  310. case BIOC_SDREBUILD:
  311. case BIOC_SDHOTSPARE:
  312. if (rt++ < sd->sd_meta->ssdi.ssd_chunk_no)
  313. goto ragain;
  314. /* FALLTHROUGH */
  315. default:
  316. /* volume offline */
  317. printf("%s: is offline, cannot read\n",
  318. DEVNAME(sd->sd_sc));
  319. goto bad;
  320. }
  321. } else {
  322. /* writes go on all working disks */
  323. chunk = i;
  324. scp = sd->sd_vol.sv_chunks[chunk];
  325. switch (scp->src_meta.scm_status) {
  326. case BIOC_SDONLINE:
  327. case BIOC_SDSCRUB:
  328. case BIOC_SDREBUILD:
  329. break;
  330. case BIOC_SDHOTSPARE: /* should never happen */
  331. case BIOC_SDOFFLINE:
  332. continue;
  333. default:
  334. goto bad;
  335. }
  336. }
  337. ccb = sr_ccb_rw(sd, chunk, blkno, xs->datalen, xs->data,
  338. xs->flags, 0);
  339. if (!ccb) {
  340. /* should never happen but handle more gracefully */
  341. printf("%s: %s: too many ccbs queued\n",
  342. DEVNAME(sd->sd_sc),
  343. sd->sd_meta->ssd_devname);
  344. goto bad;
  345. }
  346. sr_wu_enqueue_ccb(wu, ccb);
  347. }
  348. sr_schedule_wu(wu);
  349. return (0);
  350. bad:
  351. /* wu is unwound by sr_wu_put */
  352. return (1);
  353. }
  354. int
  355. sr_raid1_wu_done(struct sr_workunit *wu)
  356. {
  357. struct sr_discipline *sd = wu->swu_dis;
  358. struct scsi_xfer *xs = wu->swu_xs;
  359. /* If at least one I/O succeeded, we are okay. */
  360. if (wu->swu_ios_succeeded > 0) {
  361. xs->error = XS_NOERROR;
  362. return SR_WU_OK;
  363. }
  364. /* If all I/O failed, retry reads and give up on writes. */
  365. if (xs->flags & SCSI_DATA_IN) {
  366. printf("%s: retrying read on block %lld\n",
  367. sd->sd_meta->ssd_devname, (long long)wu->swu_blk_start);
  368. if (wu->swu_cb_active == 1)
  369. panic("%s: sr_raid1_intr_cb",
  370. DEVNAME(sd->sd_sc));
  371. sr_wu_release_ccbs(wu);
  372. wu->swu_state = SR_WU_RESTART;
  373. if (sd->sd_scsi_rw(wu) == 0)
  374. return SR_WU_RESTART;
  375. } else {
  376. printf("%s: permanently failing write on block %lld\n",
  377. sd->sd_meta->ssd_devname, (long long)wu->swu_blk_start);
  378. }
  379. wu->swu_state = SR_WU_FAILED;
  380. xs->error = XS_DRIVER_STUFFUP;
  381. return SR_WU_FAILED;
  382. }