tr_raid0.c 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336
  1. /*-
  2. * SPDX-License-Identifier: BSD-2-Clause
  3. *
  4. * Copyright (c) 2010 Alexander Motin <mav@FreeBSD.org>
  5. * All rights reserved.
  6. *
  7. * Redistribution and use in source and binary forms, with or without
  8. * modification, are permitted provided that the following conditions
  9. * are met:
  10. * 1. Redistributions of source code must retain the above copyright
  11. * notice, this list of conditions and the following disclaimer.
  12. * 2. Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in the
  14. * documentation and/or other materials provided with the distribution.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
  17. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  19. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
  20. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  21. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  22. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  23. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  24. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  25. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  26. * SUCH DAMAGE.
  27. */
  28. #include <sys/param.h>
  29. #include <sys/bio.h>
  30. #include <sys/endian.h>
  31. #include <sys/kernel.h>
  32. #include <sys/kobj.h>
  33. #include <sys/lock.h>
  34. #include <sys/malloc.h>
  35. #include <sys/mutex.h>
  36. #include <sys/systm.h>
  37. #include <geom/geom.h>
  38. #include <geom/geom_dbg.h>
  39. #include "geom/raid/g_raid.h"
  40. #include "g_raid_tr_if.h"
  41. static MALLOC_DEFINE(M_TR_RAID0, "tr_raid0_data", "GEOM_RAID RAID0 data");
  42. struct g_raid_tr_raid0_object {
  43. struct g_raid_tr_object trso_base;
  44. int trso_starting;
  45. int trso_stopped;
  46. };
  47. static g_raid_tr_taste_t g_raid_tr_taste_raid0;
  48. static g_raid_tr_event_t g_raid_tr_event_raid0;
  49. static g_raid_tr_start_t g_raid_tr_start_raid0;
  50. static g_raid_tr_stop_t g_raid_tr_stop_raid0;
  51. static g_raid_tr_iostart_t g_raid_tr_iostart_raid0;
  52. static g_raid_tr_iodone_t g_raid_tr_iodone_raid0;
  53. static g_raid_tr_kerneldump_t g_raid_tr_kerneldump_raid0;
  54. static g_raid_tr_free_t g_raid_tr_free_raid0;
  55. static kobj_method_t g_raid_tr_raid0_methods[] = {
  56. KOBJMETHOD(g_raid_tr_taste, g_raid_tr_taste_raid0),
  57. KOBJMETHOD(g_raid_tr_event, g_raid_tr_event_raid0),
  58. KOBJMETHOD(g_raid_tr_start, g_raid_tr_start_raid0),
  59. KOBJMETHOD(g_raid_tr_stop, g_raid_tr_stop_raid0),
  60. KOBJMETHOD(g_raid_tr_iostart, g_raid_tr_iostart_raid0),
  61. KOBJMETHOD(g_raid_tr_iodone, g_raid_tr_iodone_raid0),
  62. KOBJMETHOD(g_raid_tr_kerneldump, g_raid_tr_kerneldump_raid0),
  63. KOBJMETHOD(g_raid_tr_free, g_raid_tr_free_raid0),
  64. { 0, 0 }
  65. };
  66. static struct g_raid_tr_class g_raid_tr_raid0_class = {
  67. "RAID0",
  68. g_raid_tr_raid0_methods,
  69. sizeof(struct g_raid_tr_raid0_object),
  70. .trc_enable = 1,
  71. .trc_priority = 100,
  72. .trc_accept_unmapped = 1
  73. };
  74. static int
  75. g_raid_tr_taste_raid0(struct g_raid_tr_object *tr, struct g_raid_volume *volume)
  76. {
  77. struct g_raid_tr_raid0_object *trs;
  78. trs = (struct g_raid_tr_raid0_object *)tr;
  79. if (tr->tro_volume->v_raid_level != G_RAID_VOLUME_RL_RAID0 ||
  80. tr->tro_volume->v_raid_level_qualifier != G_RAID_VOLUME_RLQ_NONE)
  81. return (G_RAID_TR_TASTE_FAIL);
  82. trs->trso_starting = 1;
  83. return (G_RAID_TR_TASTE_SUCCEED);
  84. }
  85. static int
  86. g_raid_tr_update_state_raid0(struct g_raid_volume *vol)
  87. {
  88. struct g_raid_tr_raid0_object *trs;
  89. struct g_raid_softc *sc;
  90. u_int s;
  91. int n, f;
  92. sc = vol->v_softc;
  93. trs = (struct g_raid_tr_raid0_object *)vol->v_tr;
  94. if (trs->trso_stopped)
  95. s = G_RAID_VOLUME_S_STOPPED;
  96. else if (trs->trso_starting)
  97. s = G_RAID_VOLUME_S_STARTING;
  98. else {
  99. n = g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_ACTIVE);
  100. f = g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_FAILED);
  101. if (n + f == vol->v_disks_count) {
  102. if (f == 0)
  103. s = G_RAID_VOLUME_S_OPTIMAL;
  104. else
  105. s = G_RAID_VOLUME_S_SUBOPTIMAL;
  106. } else
  107. s = G_RAID_VOLUME_S_BROKEN;
  108. }
  109. if (s != vol->v_state) {
  110. g_raid_event_send(vol, G_RAID_VOLUME_S_ALIVE(s) ?
  111. G_RAID_VOLUME_E_UP : G_RAID_VOLUME_E_DOWN,
  112. G_RAID_EVENT_VOLUME);
  113. g_raid_change_volume_state(vol, s);
  114. if (!trs->trso_starting && !trs->trso_stopped)
  115. g_raid_write_metadata(sc, vol, NULL, NULL);
  116. }
  117. return (0);
  118. }
  119. static int
  120. g_raid_tr_event_raid0(struct g_raid_tr_object *tr,
  121. struct g_raid_subdisk *sd, u_int event)
  122. {
  123. struct g_raid_tr_raid0_object *trs;
  124. struct g_raid_softc *sc;
  125. struct g_raid_volume *vol;
  126. int state;
  127. trs = (struct g_raid_tr_raid0_object *)tr;
  128. vol = tr->tro_volume;
  129. sc = vol->v_softc;
  130. state = sd->sd_state;
  131. if (state != G_RAID_SUBDISK_S_NONE &&
  132. state != G_RAID_SUBDISK_S_FAILED &&
  133. state != G_RAID_SUBDISK_S_ACTIVE) {
  134. G_RAID_DEBUG1(1, sc,
  135. "Promote subdisk %s:%d from %s to ACTIVE.",
  136. vol->v_name, sd->sd_pos,
  137. g_raid_subdisk_state2str(sd->sd_state));
  138. g_raid_change_subdisk_state(sd, G_RAID_SUBDISK_S_ACTIVE);
  139. }
  140. if (state != sd->sd_state &&
  141. !trs->trso_starting && !trs->trso_stopped)
  142. g_raid_write_metadata(sc, vol, sd, NULL);
  143. g_raid_tr_update_state_raid0(vol);
  144. return (0);
  145. }
  146. static int
  147. g_raid_tr_start_raid0(struct g_raid_tr_object *tr)
  148. {
  149. struct g_raid_tr_raid0_object *trs;
  150. struct g_raid_volume *vol;
  151. trs = (struct g_raid_tr_raid0_object *)tr;
  152. vol = tr->tro_volume;
  153. trs->trso_starting = 0;
  154. g_raid_tr_update_state_raid0(vol);
  155. return (0);
  156. }
  157. static int
  158. g_raid_tr_stop_raid0(struct g_raid_tr_object *tr)
  159. {
  160. struct g_raid_tr_raid0_object *trs;
  161. struct g_raid_volume *vol;
  162. trs = (struct g_raid_tr_raid0_object *)tr;
  163. vol = tr->tro_volume;
  164. trs->trso_starting = 0;
  165. trs->trso_stopped = 1;
  166. g_raid_tr_update_state_raid0(vol);
  167. return (0);
  168. }
  169. static void
  170. g_raid_tr_iostart_raid0(struct g_raid_tr_object *tr, struct bio *bp)
  171. {
  172. struct g_raid_volume *vol;
  173. struct g_raid_subdisk *sd;
  174. struct bio_queue_head queue;
  175. struct bio *cbp;
  176. char *addr;
  177. off_t offset, start, length, nstripe, remain;
  178. u_int no, strip_size;
  179. vol = tr->tro_volume;
  180. if (vol->v_state != G_RAID_VOLUME_S_OPTIMAL &&
  181. vol->v_state != G_RAID_VOLUME_S_SUBOPTIMAL) {
  182. g_raid_iodone(bp, EIO);
  183. return;
  184. }
  185. if (bp->bio_cmd == BIO_FLUSH || bp->bio_cmd == BIO_SPEEDUP) {
  186. g_raid_tr_flush_common(tr, bp);
  187. return;
  188. }
  189. if ((bp->bio_flags & BIO_UNMAPPED) != 0)
  190. addr = NULL;
  191. else
  192. addr = bp->bio_data;
  193. strip_size = vol->v_strip_size;
  194. /* Stripe number. */
  195. nstripe = bp->bio_offset / strip_size;
  196. /* Start position in stripe. */
  197. start = bp->bio_offset % strip_size;
  198. /* Disk number. */
  199. no = nstripe % vol->v_disks_count;
  200. /* Stripe start position in disk. */
  201. offset = (nstripe / vol->v_disks_count) * strip_size;
  202. /* Length of data to operate. */
  203. remain = bp->bio_length;
  204. bioq_init(&queue);
  205. do {
  206. length = MIN(strip_size - start, remain);
  207. cbp = g_clone_bio(bp);
  208. if (cbp == NULL)
  209. goto failure;
  210. cbp->bio_offset = offset + start;
  211. cbp->bio_length = length;
  212. if ((bp->bio_flags & BIO_UNMAPPED) != 0 &&
  213. bp->bio_cmd != BIO_DELETE) {
  214. cbp->bio_ma_offset += (uintptr_t)addr;
  215. cbp->bio_ma += cbp->bio_ma_offset / PAGE_SIZE;
  216. cbp->bio_ma_offset %= PAGE_SIZE;
  217. cbp->bio_ma_n = round_page(cbp->bio_ma_offset +
  218. cbp->bio_length) / PAGE_SIZE;
  219. } else
  220. cbp->bio_data = addr;
  221. cbp->bio_caller1 = &vol->v_subdisks[no];
  222. bioq_insert_tail(&queue, cbp);
  223. if (++no >= vol->v_disks_count) {
  224. no = 0;
  225. offset += strip_size;
  226. }
  227. remain -= length;
  228. if (bp->bio_cmd != BIO_DELETE)
  229. addr += length;
  230. start = 0;
  231. } while (remain > 0);
  232. while ((cbp = bioq_takefirst(&queue)) != NULL) {
  233. sd = cbp->bio_caller1;
  234. cbp->bio_caller1 = NULL;
  235. g_raid_subdisk_iostart(sd, cbp);
  236. }
  237. return;
  238. failure:
  239. while ((cbp = bioq_takefirst(&queue)) != NULL)
  240. g_destroy_bio(cbp);
  241. if (bp->bio_error == 0)
  242. bp->bio_error = ENOMEM;
  243. g_raid_iodone(bp, bp->bio_error);
  244. }
  245. static int
  246. g_raid_tr_kerneldump_raid0(struct g_raid_tr_object *tr,
  247. void *virtual, off_t boffset, size_t blength)
  248. {
  249. struct g_raid_volume *vol;
  250. char *addr;
  251. off_t offset, start, length, nstripe, remain;
  252. u_int no, strip_size;
  253. int error;
  254. vol = tr->tro_volume;
  255. if (vol->v_state != G_RAID_VOLUME_S_OPTIMAL)
  256. return (ENXIO);
  257. addr = virtual;
  258. strip_size = vol->v_strip_size;
  259. /* Stripe number. */
  260. nstripe = boffset / strip_size;
  261. /* Start position in stripe. */
  262. start = boffset % strip_size;
  263. /* Disk number. */
  264. no = nstripe % vol->v_disks_count;
  265. /* Stripe tart position in disk. */
  266. offset = (nstripe / vol->v_disks_count) * strip_size;
  267. /* Length of data to operate. */
  268. remain = blength;
  269. do {
  270. length = MIN(strip_size - start, remain);
  271. error = g_raid_subdisk_kerneldump(&vol->v_subdisks[no], addr,
  272. offset + start, length);
  273. if (error != 0)
  274. return (error);
  275. if (++no >= vol->v_disks_count) {
  276. no = 0;
  277. offset += strip_size;
  278. }
  279. remain -= length;
  280. addr += length;
  281. start = 0;
  282. } while (remain > 0);
  283. return (0);
  284. }
  285. static void
  286. g_raid_tr_iodone_raid0(struct g_raid_tr_object *tr,
  287. struct g_raid_subdisk *sd,struct bio *bp)
  288. {
  289. struct bio *pbp;
  290. pbp = bp->bio_parent;
  291. if (pbp->bio_error == 0)
  292. pbp->bio_error = bp->bio_error;
  293. g_destroy_bio(bp);
  294. pbp->bio_inbed++;
  295. if (pbp->bio_children == pbp->bio_inbed) {
  296. pbp->bio_completed = pbp->bio_length;
  297. g_raid_iodone(pbp, pbp->bio_error);
  298. }
  299. }
  300. static int
  301. g_raid_tr_free_raid0(struct g_raid_tr_object *tr)
  302. {
  303. return (0);
  304. }
  305. G_RAID_TR_DECLARE(raid0, "RAID0");