123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967 |
- /* $OpenBSD: softraid_raid5.c,v 1.23 2015/07/21 03:30:51 krw Exp $ */
- /*
- * Copyright (c) 2014 Joel Sing <jsing@openbsd.org>
- * Copyright (c) 2009 Marco Peereboom <marco@peereboom.us>
- * Copyright (c) 2009 Jordan Hargrave <jordan@openbsd.org>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
- #include "bio.h"
- #include <sys/param.h>
- #include <sys/systm.h>
- #include <sys/buf.h>
- #include <sys/device.h>
- #include <sys/ioctl.h>
- #include <sys/malloc.h>
- #include <sys/kernel.h>
- #include <sys/disk.h>
- #include <sys/rwlock.h>
- #include <sys/queue.h>
- #include <sys/fcntl.h>
- #include <sys/mount.h>
- #include <sys/sensors.h>
- #include <sys/stat.h>
- #include <sys/task.h>
- #include <sys/pool.h>
- #include <sys/conf.h>
- #include <sys/uio.h>
- #include <scsi/scsi_all.h>
- #include <scsi/scsiconf.h>
- #include <scsi/scsi_disk.h>
- #include <dev/softraidvar.h>
- /* RAID 5 functions. */
- int sr_raid5_create(struct sr_discipline *, struct bioc_createraid *,
- int, int64_t);
- int sr_raid5_assemble(struct sr_discipline *, struct bioc_createraid *,
- int, void *);
- int sr_raid5_init(struct sr_discipline *);
- int sr_raid5_rw(struct sr_workunit *);
- int sr_raid5_openings(struct sr_discipline *);
- void sr_raid5_intr(struct buf *);
- int sr_raid5_wu_done(struct sr_workunit *);
- void sr_raid5_set_chunk_state(struct sr_discipline *, int, int);
- void sr_raid5_set_vol_state(struct sr_discipline *);
- int sr_raid5_addio(struct sr_workunit *wu, int, daddr_t, long,
- void *, int, int, void *);
- int sr_raid5_regenerate(struct sr_workunit *, int, daddr_t, long,
- void *);
- int sr_raid5_write(struct sr_workunit *, struct sr_workunit *, int, int,
- daddr_t, long, void *, int, int);
- void sr_raid5_xor(void *, void *, int);
- void sr_raid5_rebuild(struct sr_discipline *);
- void sr_raid5_scrub(struct sr_discipline *);
- /* discipline initialisation. */
- void
- sr_raid5_discipline_init(struct sr_discipline *sd)
- {
- /* Fill out discipline members. */
- sd->sd_type = SR_MD_RAID5;
- strlcpy(sd->sd_name, "RAID 5", sizeof(sd->sd_name));
- sd->sd_capabilities = SR_CAP_SYSTEM_DISK | SR_CAP_AUTO_ASSEMBLE |
- SR_CAP_REBUILD | SR_CAP_REDUNDANT;
- sd->sd_max_ccb_per_wu = 4; /* only if stripsize <= MAXPHYS */
- sd->sd_max_wu = SR_RAID5_NOWU + 2; /* Two for scrub/rebuild. */
- /* Setup discipline specific function pointers. */
- sd->sd_assemble = sr_raid5_assemble;
- sd->sd_create = sr_raid5_create;
- sd->sd_openings = sr_raid5_openings;
- sd->sd_rebuild = sr_raid5_rebuild;
- sd->sd_scsi_rw = sr_raid5_rw;
- sd->sd_scsi_intr = sr_raid5_intr;
- sd->sd_scsi_wu_done = sr_raid5_wu_done;
- sd->sd_set_chunk_state = sr_raid5_set_chunk_state;
- sd->sd_set_vol_state = sr_raid5_set_vol_state;
- }
- int
- sr_raid5_create(struct sr_discipline *sd, struct bioc_createraid *bc,
- int no_chunk, int64_t coerced_size)
- {
- if (no_chunk < 3) {
- sr_error(sd->sd_sc, "%s requires three or more chunks",
- sd->sd_name);
- return EINVAL;
- }
- /*
- * XXX add variable strip size later even though MAXPHYS is really
- * the clever value, users like to tinker with that type of stuff.
- */
- sd->sd_meta->ssdi.ssd_strip_size = MAXPHYS;
- sd->sd_meta->ssdi.ssd_size = (coerced_size &
- ~(((u_int64_t)sd->sd_meta->ssdi.ssd_strip_size >>
- DEV_BSHIFT) - 1)) * (no_chunk - 1);
- return sr_raid5_init(sd);
- }
- int
- sr_raid5_assemble(struct sr_discipline *sd, struct bioc_createraid *bc,
- int no_chunk, void *data)
- {
- return sr_raid5_init(sd);
- }
- int
- sr_raid5_init(struct sr_discipline *sd)
- {
- /* Initialise runtime values. */
- sd->mds.mdd_raid5.sr5_strip_bits =
- sr_validate_stripsize(sd->sd_meta->ssdi.ssd_strip_size);
- if (sd->mds.mdd_raid5.sr5_strip_bits == -1) {
- sr_error(sd->sd_sc, "invalid strip size");
- return EINVAL;
- }
- return 0;
- }
- int
- sr_raid5_openings(struct sr_discipline *sd)
- {
- /* Two work units per I/O, two for rebuild/scrub. */
- return ((sd->sd_max_wu - 2) >> 1);
- }
- void
- sr_raid5_set_chunk_state(struct sr_discipline *sd, int c, int new_state)
- {
- int old_state, s;
- DNPRINTF(SR_D_STATE, "%s: %s: %s: sr_raid_set_chunk_state %d -> %d\n",
- DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
- sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname, c, new_state);
- /* ok to go to splbio since this only happens in error path */
- s = splbio();
- old_state = sd->sd_vol.sv_chunks[c]->src_meta.scm_status;
- /* multiple IOs to the same chunk that fail will come through here */
- if (old_state == new_state)
- goto done;
- switch (old_state) {
- case BIOC_SDONLINE:
- switch (new_state) {
- case BIOC_SDOFFLINE:
- case BIOC_SDSCRUB:
- break;
- default:
- goto die;
- }
- break;
- case BIOC_SDOFFLINE:
- if (new_state == BIOC_SDREBUILD) {
- ;
- } else
- goto die;
- break;
- case BIOC_SDSCRUB:
- switch (new_state) {
- case BIOC_SDONLINE:
- case BIOC_SDOFFLINE:
- break;
- default:
- goto die;
- }
- break;
- case BIOC_SDREBUILD:
- switch (new_state) {
- case BIOC_SDONLINE:
- case BIOC_SDOFFLINE:
- break;
- default:
- goto die;
- }
- break;
- default:
- die:
- splx(s); /* XXX */
- panic("%s: %s: %s: invalid chunk state transition "
- "%d -> %d", DEVNAME(sd->sd_sc),
- sd->sd_meta->ssd_devname,
- sd->sd_vol.sv_chunks[c]->src_meta.scmi.scm_devname,
- old_state, new_state);
- /* NOTREACHED */
- }
- sd->sd_vol.sv_chunks[c]->src_meta.scm_status = new_state;
- sd->sd_set_vol_state(sd);
- sd->sd_must_flush = 1;
- task_add(systq, &sd->sd_meta_save_task);
- done:
- splx(s);
- }
- void
- sr_raid5_set_vol_state(struct sr_discipline *sd)
- {
- int states[SR_MAX_STATES];
- int new_state, i, s, nd;
- int old_state = sd->sd_vol_status;
- DNPRINTF(SR_D_STATE, "%s: %s: sr_raid_set_vol_state\n",
- DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname);
- nd = sd->sd_meta->ssdi.ssd_chunk_no;
- for (i = 0; i < SR_MAX_STATES; i++)
- states[i] = 0;
- for (i = 0; i < nd; i++) {
- s = sd->sd_vol.sv_chunks[i]->src_meta.scm_status;
- if (s >= SR_MAX_STATES)
- panic("%s: %s: %s: invalid chunk state",
- DEVNAME(sd->sd_sc),
- sd->sd_meta->ssd_devname,
- sd->sd_vol.sv_chunks[i]->src_meta.scmi.scm_devname);
- states[s]++;
- }
- if (states[BIOC_SDONLINE] == nd)
- new_state = BIOC_SVONLINE;
- else if (states[BIOC_SDONLINE] < nd - 1)
- new_state = BIOC_SVOFFLINE;
- else if (states[BIOC_SDSCRUB] != 0)
- new_state = BIOC_SVSCRUB;
- else if (states[BIOC_SDREBUILD] != 0)
- new_state = BIOC_SVREBUILD;
- else if (states[BIOC_SDONLINE] == nd - 1)
- new_state = BIOC_SVDEGRADED;
- else {
- #ifdef SR_DEBUG
- DNPRINTF(SR_D_STATE, "%s: invalid volume state, old state "
- "was %d\n", DEVNAME(sd->sd_sc), old_state);
- for (i = 0; i < nd; i++)
- DNPRINTF(SR_D_STATE, "%s: chunk %d status = %d\n",
- DEVNAME(sd->sd_sc), i,
- sd->sd_vol.sv_chunks[i]->src_meta.scm_status);
- #endif
- panic("invalid volume state");
- }
- DNPRINTF(SR_D_STATE, "%s: %s: sr_raid5_set_vol_state %d -> %d\n",
- DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
- old_state, new_state);
- switch (old_state) {
- case BIOC_SVONLINE:
- switch (new_state) {
- case BIOC_SVONLINE: /* can go to same state */
- case BIOC_SVOFFLINE:
- case BIOC_SVDEGRADED:
- case BIOC_SVREBUILD: /* happens on boot */
- break;
- default:
- goto die;
- }
- break;
- case BIOC_SVOFFLINE:
- /* XXX this might be a little too much */
- goto die;
- case BIOC_SVDEGRADED:
- switch (new_state) {
- case BIOC_SVOFFLINE:
- case BIOC_SVREBUILD:
- case BIOC_SVDEGRADED: /* can go to the same state */
- break;
- default:
- goto die;
- }
- break;
- case BIOC_SVBUILDING:
- switch (new_state) {
- case BIOC_SVONLINE:
- case BIOC_SVOFFLINE:
- case BIOC_SVBUILDING: /* can go to the same state */
- break;
- default:
- goto die;
- }
- break;
- case BIOC_SVSCRUB:
- switch (new_state) {
- case BIOC_SVONLINE:
- case BIOC_SVOFFLINE:
- case BIOC_SVDEGRADED:
- case BIOC_SVSCRUB: /* can go to same state */
- break;
- default:
- goto die;
- }
- break;
- case BIOC_SVREBUILD:
- switch (new_state) {
- case BIOC_SVONLINE:
- case BIOC_SVOFFLINE:
- case BIOC_SVDEGRADED:
- case BIOC_SVREBUILD: /* can go to the same state */
- break;
- default:
- goto die;
- }
- break;
- default:
- die:
- panic("%s: %s: invalid volume state transition %d -> %d",
- DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
- old_state, new_state);
- /* NOTREACHED */
- }
- sd->sd_vol_status = new_state;
- }
- static inline int
- sr_raid5_chunk_online(struct sr_discipline *sd, int chunk)
- {
- switch (sd->sd_vol.sv_chunks[chunk]->src_meta.scm_status) {
- case BIOC_SDONLINE:
- case BIOC_SDSCRUB:
- return 1;
- default:
- return 0;
- }
- }
- static inline int
- sr_raid5_chunk_rebuild(struct sr_discipline *sd, int chunk)
- {
- switch (sd->sd_vol.sv_chunks[chunk]->src_meta.scm_status) {
- case BIOC_SDREBUILD:
- return 1;
- default:
- return 0;
- }
- }
- int
- sr_raid5_rw(struct sr_workunit *wu)
- {
- struct sr_workunit *wu_r = NULL;
- struct sr_discipline *sd = wu->swu_dis;
- struct scsi_xfer *xs = wu->swu_xs;
- struct sr_chunk *scp;
- daddr_t blkno, lba;
- int64_t chunk_offs, lbaoffs, offset, strip_offs;
- int64_t strip_bits, strip_no, strip_size;
- int64_t chunk, no_chunk;
- int64_t parity, row_size;
- long length, datalen;
- void *data;
- int s;
- /* blkno and scsi error will be handled by sr_validate_io */
- if (sr_validate_io(wu, &blkno, "sr_raid5_rw"))
- goto bad;
- DNPRINTF(SR_D_DIS, "%s: %s sr_raid5_rw %s: blkno %lld size %d\n",
- DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
- (xs->flags & SCSI_DATA_IN) ? "read" : "write",
- (long long)blkno, xs->datalen);
- strip_size = sd->sd_meta->ssdi.ssd_strip_size;
- strip_bits = sd->mds.mdd_raid5.sr5_strip_bits;
- no_chunk = sd->sd_meta->ssdi.ssd_chunk_no - 1;
- row_size = (no_chunk << strip_bits) >> DEV_BSHIFT;
- data = xs->data;
- datalen = xs->datalen;
- lbaoffs = blkno << DEV_BSHIFT;
- if (xs->flags & SCSI_DATA_OUT) {
- if ((wu_r = sr_scsi_wu_get(sd, SCSI_NOSLEEP)) == NULL){
- printf("%s: %s failed to get read work unit",
- DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname);
- goto bad;
- }
- wu_r->swu_state = SR_WU_INPROGRESS;
- wu_r->swu_flags |= SR_WUF_DISCIPLINE;
- }
- wu->swu_blk_start = 0;
- while (datalen != 0) {
- strip_no = lbaoffs >> strip_bits;
- strip_offs = lbaoffs & (strip_size - 1);
- chunk_offs = (strip_no / no_chunk) << strip_bits;
- offset = chunk_offs + strip_offs;
- /* get size remaining in this stripe */
- length = MIN(strip_size - strip_offs, datalen);
- /*
- * Map disk offset to data and parity chunks, using a left
- * asymmetric algorithm for the parity assignment.
- */
- chunk = strip_no % no_chunk;
- parity = no_chunk - ((strip_no / no_chunk) % (no_chunk + 1));
- if (chunk >= parity)
- chunk++;
- lba = offset >> DEV_BSHIFT;
- /* XXX big hammer.. exclude I/O from entire stripe */
- if (wu->swu_blk_start == 0)
- wu->swu_blk_start = (strip_no / no_chunk) * row_size;
- wu->swu_blk_end = (strip_no / no_chunk) * row_size +
- (row_size - 1);
- scp = sd->sd_vol.sv_chunks[chunk];
- if (xs->flags & SCSI_DATA_IN) {
- switch (scp->src_meta.scm_status) {
- case BIOC_SDONLINE:
- case BIOC_SDSCRUB:
- /*
- * Chunk is online, issue a single read
- * request.
- */
- if (sr_raid5_addio(wu, chunk, lba, length,
- data, xs->flags, 0, NULL))
- goto bad;
- break;
- case BIOC_SDOFFLINE:
- case BIOC_SDREBUILD:
- case BIOC_SDHOTSPARE:
- if (sr_raid5_regenerate(wu, chunk, lba,
- length, data))
- goto bad;
- break;
- default:
- printf("%s: is offline, can't read\n",
- DEVNAME(sd->sd_sc));
- goto bad;
- }
- } else {
- if (sr_raid5_write(wu, wu_r, chunk, parity, lba,
- length, data, xs->flags, 0))
- goto bad;
- }
- /* advance to next block */
- lbaoffs += length;
- datalen -= length;
- data += length;
- }
- s = splbio();
- if (wu_r) {
- if (wu_r->swu_io_count > 0) {
- /* collide write request with reads */
- wu_r->swu_blk_start = wu->swu_blk_start;
- wu_r->swu_blk_end = wu->swu_blk_end;
- wu->swu_state = SR_WU_DEFERRED;
- wu_r->swu_collider = wu;
- TAILQ_INSERT_TAIL(&sd->sd_wu_defq, wu, swu_link);
- wu = wu_r;
- } else {
- sr_scsi_wu_put(sd, wu_r);
- }
- }
- splx(s);
- sr_schedule_wu(wu);
- return (0);
- bad:
- /* wu is unwound by sr_wu_put */
- if (wu_r)
- sr_scsi_wu_put(sd, wu_r);
- return (1);
- }
- int
- sr_raid5_regenerate(struct sr_workunit *wu, int chunk, daddr_t blkno,
- long len, void *data)
- {
- struct sr_discipline *sd = wu->swu_dis;
- int i;
- /*
- * Regenerate a block on a RAID 5 volume by xoring the data and parity
- * from all of the remaining online chunks. This requires the parity
- * to already be correct.
- */
- DNPRINTF(SR_D_DIS, "%s: %s sr_raid5_regenerate chunk %d offline, "
- "regenerating block %llu\n",
- DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, chunk, blkno);
- memset(data, 0, len);
- for (i = 0; i < sd->sd_meta->ssdi.ssd_chunk_no; i++) {
- if (i == chunk)
- continue;
- if (!sr_raid5_chunk_online(sd, i))
- goto bad;
- if (sr_raid5_addio(wu, i, blkno, len, NULL, SCSI_DATA_IN,
- 0, data))
- goto bad;
- }
- return (0);
- bad:
- return (1);
- }
- int
- sr_raid5_write(struct sr_workunit *wu, struct sr_workunit *wu_r, int chunk,
- int parity, daddr_t blkno, long len, void *data, int xsflags,
- int ccbflags)
- {
- struct sr_discipline *sd = wu->swu_dis;
- struct scsi_xfer *xs = wu->swu_xs;
- void *xorbuf;
- int chunk_online, chunk_rebuild;
- int parity_online, parity_rebuild;
- int other_offline = 0, other_rebuild = 0;
- int i;
- /*
- * Perform a write to a RAID 5 volume. This write routine does not
- * require the parity to already be correct and will operate on a
- * uninitialised volume.
- *
- * There are four possible cases:
- *
- * 1) All data chunks and parity are online. In this case we read the
- * data from all data chunks, except the one we are writing to, in
- * order to calculate and write the new parity.
- *
- * 2) The parity chunk is offline. In this case we only need to write
- * to the data chunk. No parity calculation is required.
- *
- * 3) The data chunk is offline. In this case we read the data from all
- * online chunks in order to calculate and write the new parity.
- * This is the same as (1) except we do not write the data chunk.
- *
- * 4) A different data chunk is offline. The new parity is calculated
- * by taking the existing parity, xoring the original data and
- * xoring in the new data. This requires that the parity already be
- * correct, which it will be if any of the data chunks has
- * previously been written.
- *
- * There is an additional complication introduced by a chunk that is
- * being rebuilt. If this is the data or parity chunk, then we want
- * to write to it as per normal. If it is another data chunk then we
- * need to presume that it has not yet been regenerated and use the
- * same method as detailed in (4) above.
- */
- DNPRINTF(SR_D_DIS, "%s: %s sr_raid5_write chunk %i parity %i "
- "blkno %llu\n", DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
- chunk, parity, (unsigned long long)blkno);
- chunk_online = sr_raid5_chunk_online(sd, chunk);
- chunk_rebuild = sr_raid5_chunk_rebuild(sd, chunk);
- parity_online = sr_raid5_chunk_online(sd, parity);
- parity_rebuild = sr_raid5_chunk_rebuild(sd, parity);
- for (i = 0; i < sd->sd_meta->ssdi.ssd_chunk_no; i++) {
- if (i == chunk || i == parity)
- continue;
- if (sr_raid5_chunk_rebuild(sd, i))
- other_rebuild = 1;
- else if (!sr_raid5_chunk_online(sd, i))
- other_offline = 1;
- }
- DNPRINTF(SR_D_DIS, "%s: %s chunk online %d, parity online %d, "
- "other offline %d\n", DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
- chunk_online, parity_online, other_offline);
- if (!parity_online && !parity_rebuild)
- goto data_write;
- xorbuf = sr_block_get(sd, len);
- if (xorbuf == NULL)
- goto bad;
- memcpy(xorbuf, data, len);
- if (other_offline || other_rebuild) {
- /*
- * XXX - If we can guarantee that this LBA has been scrubbed
- * then we can also take this faster path.
- */
- /* Read in existing data and existing parity. */
- if (sr_raid5_addio(wu_r, chunk, blkno, len, NULL,
- SCSI_DATA_IN, 0, xorbuf))
- goto bad;
- if (sr_raid5_addio(wu_r, parity, blkno, len, NULL,
- SCSI_DATA_IN, 0, xorbuf))
- goto bad;
- } else {
- /* Read in existing data from all other chunks. */
- for (i = 0; i < sd->sd_meta->ssdi.ssd_chunk_no; i++) {
- if (i == chunk || i == parity)
- continue;
- if (sr_raid5_addio(wu_r, i, blkno, len, NULL,
- SCSI_DATA_IN, 0, xorbuf))
- goto bad;
- }
- }
- /* Write new parity. */
- if (sr_raid5_addio(wu, parity, blkno, len, xorbuf, xs->flags,
- SR_CCBF_FREEBUF, NULL))
- goto bad;
- data_write:
- /* Write new data. */
- if (chunk_online || chunk_rebuild)
- if (sr_raid5_addio(wu, chunk, blkno, len, data, xs->flags,
- 0, NULL))
- goto bad;
- return (0);
- bad:
- return (1);
- }
- void
- sr_raid5_intr(struct buf *bp)
- {
- struct sr_ccb *ccb = (struct sr_ccb *)bp;
- struct sr_workunit *wu = ccb->ccb_wu;
- struct sr_discipline *sd = wu->swu_dis;
- int s;
- DNPRINTF(SR_D_INTR, "%s: sr_raid5_intr bp %p xs %p\n",
- DEVNAME(sd->sd_sc), bp, wu->swu_xs);
- s = splbio();
- sr_ccb_done(ccb);
- /* XXX - Should this be done via the taskq? */
- /* XOR data to result. */
- if (ccb->ccb_state == SR_CCB_OK && ccb->ccb_opaque)
- sr_raid5_xor(ccb->ccb_opaque, ccb->ccb_buf.b_data,
- ccb->ccb_buf.b_bcount);
- /* Free allocated data buffer. */
- if (ccb->ccb_flags & SR_CCBF_FREEBUF) {
- sr_block_put(sd, ccb->ccb_buf.b_data, ccb->ccb_buf.b_bcount);
- ccb->ccb_buf.b_data = NULL;
- }
- sr_wu_done(wu);
- splx(s);
- }
- int
- sr_raid5_wu_done(struct sr_workunit *wu)
- {
- struct sr_discipline *sd = wu->swu_dis;
- struct scsi_xfer *xs = wu->swu_xs;
- /* XXX - we have no way of propagating errors... */
- if (wu->swu_flags & (SR_WUF_DISCIPLINE | SR_WUF_REBUILD))
- return SR_WU_OK;
- /* XXX - This is insufficient for RAID 5. */
- if (wu->swu_ios_succeeded > 0) {
- xs->error = XS_NOERROR;
- return SR_WU_OK;
- }
- if (xs->flags & SCSI_DATA_IN) {
- printf("%s: retrying read on block %lld\n",
- sd->sd_meta->ssd_devname, (long long)wu->swu_blk_start);
- sr_wu_release_ccbs(wu);
- wu->swu_state = SR_WU_RESTART;
- if (sd->sd_scsi_rw(wu) == 0)
- return SR_WU_RESTART;
- } else {
- /* XXX - retry write if we just went from online to degraded. */
- printf("%s: permanently fail write on block %lld\n",
- sd->sd_meta->ssd_devname, (long long)wu->swu_blk_start);
- }
- wu->swu_state = SR_WU_FAILED;
- xs->error = XS_DRIVER_STUFFUP;
- return SR_WU_FAILED;
- }
- int
- sr_raid5_addio(struct sr_workunit *wu, int chunk, daddr_t blkno,
- long len, void *data, int xsflags, int ccbflags, void *xorbuf)
- {
- struct sr_discipline *sd = wu->swu_dis;
- struct sr_ccb *ccb;
- DNPRINTF(SR_D_DIS, "sr_raid5_addio: %s chunk %d block %lld "
- "length %ld %s\n", (xsflags & SCSI_DATA_IN) ? "read" : "write",
- chunk, (long long)blkno, len, xorbuf ? "X0R" : "-");
- /* Allocate temporary buffer. */
- if (data == NULL) {
- data = sr_block_get(sd, len);
- if (data == NULL)
- return (-1);
- ccbflags |= SR_CCBF_FREEBUF;
- }
- ccb = sr_ccb_rw(sd, chunk, blkno, len, data, xsflags, ccbflags);
- if (ccb == NULL) {
- if (ccbflags & SR_CCBF_FREEBUF)
- sr_block_put(sd, data, len);
- return (-1);
- }
- ccb->ccb_opaque = xorbuf;
- sr_wu_enqueue_ccb(wu, ccb);
- return (0);
- }
- void
- sr_raid5_xor(void *a, void *b, int len)
- {
- uint32_t *xa = a, *xb = b;
- len >>= 2;
- while (len--)
- *xa++ ^= *xb++;
- }
- void
- sr_raid5_rebuild(struct sr_discipline *sd)
- {
- int64_t strip_no, strip_size, strip_bits, i, psz, rb, restart;
- int64_t chunk_count, chunk_strips, chunk_lba, chunk_size, row_size;
- struct sr_workunit *wu_r, *wu_w;
- int s, slept, percent = 0, old_percent = -1;
- int rebuild_chunk = -1;
- void *xorbuf;
- /* Find the rebuild chunk. */
- for (i = 0; i < sd->sd_meta->ssdi.ssd_chunk_no; i++) {
- if (sr_raid5_chunk_rebuild(sd, i)) {
- rebuild_chunk = i;
- break;
- }
- }
- if (rebuild_chunk == -1)
- goto bad;
- strip_size = sd->sd_meta->ssdi.ssd_strip_size;
- strip_bits = sd->mds.mdd_raid5.sr5_strip_bits;
- chunk_count = sd->sd_meta->ssdi.ssd_chunk_no - 1;
- chunk_size = sd->sd_meta->ssdi.ssd_size / chunk_count;
- chunk_strips = (chunk_size << DEV_BSHIFT) >> strip_bits;
- row_size = (chunk_count << strip_bits) >> DEV_BSHIFT;
- DNPRINTF(SR_D_REBUILD, "%s: %s sr_raid5_rebuild volume size = %lld, "
- "chunk count = %lld, chunk size = %lld, chunk strips = %lld, "
- "row size = %lld\n", DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname,
- sd->sd_meta->ssdi.ssd_size, chunk_count, chunk_size, chunk_strips,
- row_size);
- restart = sd->sd_meta->ssd_rebuild / row_size;
- if (restart > chunk_strips) {
- printf("%s: bogus rebuild restart offset, starting from 0\n",
- DEVNAME(sd->sd_sc));
- restart = 0;
- }
- if (restart != 0) {
- psz = sd->sd_meta->ssdi.ssd_size;
- rb = sd->sd_meta->ssd_rebuild;
- if (rb > 0)
- percent = 100 - ((psz * 100 - rb * 100) / psz) - 1;
- else
- percent = 0;
- printf("%s: resuming rebuild on %s at %d%%\n",
- DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname, percent);
- }
- for (strip_no = restart; strip_no < chunk_strips; strip_no++) {
- chunk_lba = (strip_size >> DEV_BSHIFT) * strip_no;
- DNPRINTF(SR_D_REBUILD, "%s: %s rebuild strip %lld, "
- "chunk lba = %lld\n", DEVNAME(sd->sd_sc),
- sd->sd_meta->ssd_devname, strip_no, chunk_lba);
- wu_w = sr_scsi_wu_get(sd, 0);
- wu_r = sr_scsi_wu_get(sd, 0);
- xorbuf = sr_block_get(sd, strip_size);
- if (sr_raid5_regenerate(wu_r, rebuild_chunk, chunk_lba,
- strip_size, xorbuf))
- goto bad;
- if (sr_raid5_addio(wu_w, rebuild_chunk, chunk_lba, strip_size,
- xorbuf, SCSI_DATA_OUT, SR_CCBF_FREEBUF, NULL))
- goto bad;
- /* Collide write work unit with read work unit. */
- wu_r->swu_state = SR_WU_INPROGRESS;
- wu_r->swu_flags |= SR_WUF_REBUILD;
- wu_w->swu_state = SR_WU_DEFERRED;
- wu_w->swu_flags |= SR_WUF_REBUILD | SR_WUF_WAKEUP;
- wu_r->swu_collider = wu_w;
- /* Block I/O to this strip while we rebuild it. */
- wu_r->swu_blk_start = (strip_no / chunk_count) * row_size;
- wu_r->swu_blk_end = wu_r->swu_blk_start + row_size - 1;
- wu_w->swu_blk_start = wu_r->swu_blk_start;
- wu_w->swu_blk_end = wu_r->swu_blk_end;
- DNPRINTF(SR_D_REBUILD, "%s: %s rebuild swu_blk_start = %lld, "
- "swu_blk_end = %lld\n", DEVNAME(sd->sd_sc),
- sd->sd_meta->ssd_devname,
- wu_r->swu_blk_start, wu_r->swu_blk_end);
- s = splbio();
- TAILQ_INSERT_TAIL(&sd->sd_wu_defq, wu_w, swu_link);
- splx(s);
- sr_schedule_wu(wu_r);
- slept = 0;
- while ((wu_w->swu_flags & SR_WUF_REBUILDIOCOMP) == 0) {
- tsleep(wu_w, PRIBIO, "sr_rebuild", 0);
- slept = 1;
- }
- if (!slept)
- tsleep(sd->sd_sc, PWAIT, "sr_yield", 1);
- sr_scsi_wu_put(sd, wu_r);
- sr_scsi_wu_put(sd, wu_w);
- sd->sd_meta->ssd_rebuild = chunk_lba * chunk_count;
- psz = sd->sd_meta->ssdi.ssd_size;
- rb = sd->sd_meta->ssd_rebuild;
- if (rb > 0)
- percent = 100 - ((psz * 100 - rb * 100) / psz) - 1;
- else
- percent = 0;
- if (percent != old_percent && strip_no != chunk_strips - 1) {
- if (sr_meta_save(sd, SR_META_DIRTY))
- printf("%s: could not save metadata to %s\n",
- DEVNAME(sd->sd_sc),
- sd->sd_meta->ssd_devname);
- old_percent = percent;
- }
- if (sd->sd_reb_abort)
- goto abort;
- }
- DNPRINTF(SR_D_REBUILD, "%s: %s rebuild complete\n", DEVNAME(sd->sd_sc),
- sd->sd_meta->ssd_devname);
- /* all done */
- sd->sd_meta->ssd_rebuild = 0;
- for (i = 0; i < sd->sd_meta->ssdi.ssd_chunk_no; i++) {
- if (sd->sd_vol.sv_chunks[i]->src_meta.scm_status ==
- BIOC_SDREBUILD) {
- sd->sd_set_chunk_state(sd, i, BIOC_SDONLINE);
- break;
- }
- }
- return;
- abort:
- if (sr_meta_save(sd, SR_META_DIRTY))
- printf("%s: could not save metadata to %s\n",
- DEVNAME(sd->sd_sc), sd->sd_meta->ssd_devname);
- bad:
- return;
- }
- #if 0
- void
- sr_raid5_scrub(struct sr_discipline *sd)
- {
- int64_t strip_no, strip_size, no_chunk, parity, max_strip, strip_bits;
- int64_t i;
- struct sr_workunit *wu_r, *wu_w;
- int s, slept;
- void *xorbuf;
- wu_w = sr_scsi_wu_get(sd, 0);
- wu_r = sr_scsi_wu_get(sd, 0);
- no_chunk = sd->sd_meta->ssdi.ssd_chunk_no - 1;
- strip_size = sd->sd_meta->ssdi.ssd_strip_size;
- strip_bits = sd->mds.mdd_raid5.sr5_strip_bits;
- max_strip = sd->sd_meta->ssdi.ssd_size >> strip_bits;
- for (strip_no = 0; strip_no < max_strip; strip_no++) {
- parity = no_chunk - ((strip_no / no_chunk) % (no_chunk + 1));
- xorbuf = sr_block_get(sd, strip_size);
- for (i = 0; i <= no_chunk; i++) {
- if (i != parity)
- sr_raid5_addio(wu_r, i, 0xBADCAFE, strip_size,
- NULL, SCSI_DATA_IN, 0, xorbuf);
- }
- sr_raid5_addio(wu_w, parity, 0xBADCAFE, strip_size, xorbuf,
- SCSI_DATA_OUT, SR_CCBF_FREEBUF, NULL);
- wu_r->swu_flags |= SR_WUF_REBUILD;
- /* Collide wu_w with wu_r */
- wu_w->swu_state = SR_WU_DEFERRED;
- wu_w->swu_flags |= SR_WUF_REBUILD | SR_WUF_WAKEUP;
- wu_r->swu_collider = wu_w;
- s = splbio();
- TAILQ_INSERT_TAIL(&sd->sd_wu_defq, wu_w, swu_link);
- splx(s);
- wu_r->swu_state = SR_WU_INPROGRESS;
- sr_schedule_wu(wu_r);
- slept = 0;
- while ((wu_w->swu_flags & SR_WUF_REBUILDIOCOMP) == 0) {
- tsleep(wu_w, PRIBIO, "sr_scrub", 0);
- slept = 1;
- }
- if (!slept)
- tsleep(sd->sd_sc, PWAIT, "sr_yield", 1);
- }
- done:
- return;
- }
- #endif
|