mpath.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611
  1. /* $OpenBSD: mpath.c,v 1.40 2015/06/07 19:13:27 krw Exp $ */
  2. /*
  3. * Copyright (c) 2009 David Gwynne <dlg@openbsd.org>
  4. *
  5. * Permission to use, copy, modify, and distribute this software for any
  6. * purpose with or without fee is hereby granted, provided that the above
  7. * copyright notice and this permission notice appear in all copies.
  8. *
  9. * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  10. * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  11. * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  12. * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  13. * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  14. * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  15. * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  16. */
  17. #include <sys/param.h>
  18. #include <sys/systm.h>
  19. #include <sys/kernel.h>
  20. #include <sys/malloc.h>
  21. #include <sys/device.h>
  22. #include <sys/conf.h>
  23. #include <sys/queue.h>
  24. #include <sys/rwlock.h>
  25. #include <sys/ioctl.h>
  26. #include <sys/poll.h>
  27. #include <sys/selinfo.h>
  28. #include <scsi/scsi_all.h>
  29. #include <scsi/scsiconf.h>
  30. #include <scsi/mpathvar.h>
  31. #define MPATH_BUSWIDTH 256
  32. int mpath_match(struct device *, void *, void *);
  33. void mpath_attach(struct device *, struct device *, void *);
  34. void mpath_shutdown(void *);
  35. TAILQ_HEAD(mpath_paths, mpath_path);
  36. struct mpath_group {
  37. TAILQ_ENTRY(mpath_group) g_entry;
  38. struct mpath_paths g_paths;
  39. struct mpath_dev *g_dev;
  40. u_int g_id;
  41. };
  42. TAILQ_HEAD(mpath_groups, mpath_group);
  43. struct mpath_dev {
  44. struct mutex d_mtx;
  45. struct scsi_xfer_list d_xfers;
  46. struct mpath_path *d_next_path;
  47. struct mpath_groups d_groups;
  48. struct mpath_group *d_failover_iter;
  49. struct timeout d_failover_tmo;
  50. u_int d_failover;
  51. const struct mpath_ops *d_ops;
  52. struct devid *d_id;
  53. };
  54. struct mpath_softc {
  55. struct device sc_dev;
  56. struct scsi_link sc_link;
  57. struct scsibus_softc *sc_scsibus;
  58. struct mpath_dev *sc_devs[MPATH_BUSWIDTH];
  59. };
  60. #define DEVNAME(_s) ((_s)->sc_dev.dv_xname)
  61. struct mpath_softc *mpath;
  62. struct cfattach mpath_ca = {
  63. sizeof(struct mpath_softc),
  64. mpath_match,
  65. mpath_attach
  66. };
  67. struct cfdriver mpath_cd = {
  68. NULL,
  69. "mpath",
  70. DV_DULL
  71. };
  72. void mpath_cmd(struct scsi_xfer *);
  73. void mpath_minphys(struct buf *, struct scsi_link *);
  74. int mpath_probe(struct scsi_link *);
  75. struct mpath_path *mpath_next_path(struct mpath_dev *);
  76. void mpath_done(struct scsi_xfer *);
  77. void mpath_failover(struct mpath_dev *);
  78. void mpath_failover_start(void *);
  79. void mpath_failover_check(struct mpath_dev *);
  80. struct scsi_adapter mpath_switch = {
  81. mpath_cmd,
  82. scsi_minphys,
  83. mpath_probe
  84. };
  85. void mpath_xs_stuffup(struct scsi_xfer *);
  86. int
  87. mpath_match(struct device *parent, void *match, void *aux)
  88. {
  89. return (1);
  90. }
  91. void
  92. mpath_attach(struct device *parent, struct device *self, void *aux)
  93. {
  94. struct mpath_softc *sc = (struct mpath_softc *)self;
  95. struct scsibus_attach_args saa;
  96. mpath = sc;
  97. printf("\n");
  98. sc->sc_link.adapter = &mpath_switch;
  99. sc->sc_link.adapter_softc = sc;
  100. sc->sc_link.adapter_target = MPATH_BUSWIDTH;
  101. sc->sc_link.adapter_buswidth = MPATH_BUSWIDTH;
  102. sc->sc_link.luns = 1;
  103. sc->sc_link.openings = 1024; /* XXX magical */
  104. bzero(&saa, sizeof(saa));
  105. saa.saa_sc_link = &sc->sc_link;
  106. sc->sc_scsibus = (struct scsibus_softc *)config_found(&sc->sc_dev,
  107. &saa, scsiprint);
  108. }
  109. void
  110. mpath_xs_stuffup(struct scsi_xfer *xs)
  111. {
  112. xs->error = XS_DRIVER_STUFFUP;
  113. scsi_done(xs);
  114. }
  115. int
  116. mpath_probe(struct scsi_link *link)
  117. {
  118. struct mpath_softc *sc = link->adapter_softc;
  119. struct mpath_dev *d = sc->sc_devs[link->target];
  120. if (link->lun != 0 || d == NULL)
  121. return (ENXIO);
  122. link->id = devid_copy(d->d_id);
  123. return (0);
  124. }
  125. struct mpath_path *
  126. mpath_next_path(struct mpath_dev *d)
  127. {
  128. struct mpath_group *g;
  129. struct mpath_path *p;
  130. #ifdef DIAGNOSTIC
  131. if (d == NULL)
  132. panic("%s: d is NULL", __func__);
  133. #endif
  134. p = d->d_next_path;
  135. if (p != NULL) {
  136. d->d_next_path = TAILQ_NEXT(p, p_entry);
  137. if (d->d_next_path == NULL &&
  138. (g = TAILQ_FIRST(&d->d_groups)) != NULL)
  139. d->d_next_path = TAILQ_FIRST(&g->g_paths);
  140. }
  141. return (p);
  142. }
  143. void
  144. mpath_cmd(struct scsi_xfer *xs)
  145. {
  146. struct scsi_link *link = xs->sc_link;
  147. struct mpath_softc *sc = link->adapter_softc;
  148. struct mpath_dev *d = sc->sc_devs[link->target];
  149. struct mpath_path *p;
  150. struct scsi_xfer *mxs;
  151. #ifdef DIAGNOSTIC
  152. if (d == NULL)
  153. panic("mpath_cmd issued against nonexistant device");
  154. #endif
  155. if (ISSET(xs->flags, SCSI_POLL)) {
  156. mtx_enter(&d->d_mtx);
  157. p = mpath_next_path(d);
  158. mtx_leave(&d->d_mtx);
  159. if (p == NULL) {
  160. mpath_xs_stuffup(xs);
  161. return;
  162. }
  163. mxs = scsi_xs_get(p->p_link, xs->flags);
  164. if (mxs == NULL) {
  165. mpath_xs_stuffup(xs);
  166. return;
  167. }
  168. memcpy(mxs->cmd, xs->cmd, xs->cmdlen);
  169. mxs->cmdlen = xs->cmdlen;
  170. mxs->data = xs->data;
  171. mxs->datalen = xs->datalen;
  172. mxs->retries = xs->retries;
  173. mxs->timeout = xs->timeout;
  174. mxs->bp = xs->bp;
  175. scsi_xs_sync(mxs);
  176. xs->error = mxs->error;
  177. xs->status = mxs->status;
  178. xs->resid = mxs->resid;
  179. memcpy(&xs->sense, &mxs->sense, sizeof(xs->sense));
  180. scsi_xs_put(mxs);
  181. scsi_done(xs);
  182. return;
  183. }
  184. mtx_enter(&d->d_mtx);
  185. SIMPLEQ_INSERT_TAIL(&d->d_xfers, xs, xfer_list);
  186. p = mpath_next_path(d);
  187. mtx_leave(&d->d_mtx);
  188. if (p != NULL)
  189. scsi_xsh_add(&p->p_xsh);
  190. }
  191. void
  192. mpath_start(struct mpath_path *p, struct scsi_xfer *mxs)
  193. {
  194. struct mpath_dev *d = p->p_group->g_dev;
  195. struct scsi_xfer *xs;
  196. int addxsh = 0;
  197. if (ISSET(p->p_link->state, SDEV_S_DYING) || d == NULL)
  198. goto fail;
  199. mtx_enter(&d->d_mtx);
  200. xs = SIMPLEQ_FIRST(&d->d_xfers);
  201. if (xs != NULL) {
  202. SIMPLEQ_REMOVE_HEAD(&d->d_xfers, xfer_list);
  203. if (!SIMPLEQ_EMPTY(&d->d_xfers))
  204. addxsh = 1;
  205. }
  206. mtx_leave(&d->d_mtx);
  207. if (xs == NULL)
  208. goto fail;
  209. memcpy(mxs->cmd, xs->cmd, xs->cmdlen);
  210. mxs->cmdlen = xs->cmdlen;
  211. mxs->data = xs->data;
  212. mxs->datalen = xs->datalen;
  213. mxs->retries = xs->retries;
  214. mxs->timeout = xs->timeout;
  215. mxs->bp = xs->bp;
  216. mxs->flags = xs->flags;
  217. mxs->cookie = xs;
  218. mxs->done = mpath_done;
  219. scsi_xs_exec(mxs);
  220. if (addxsh)
  221. scsi_xsh_add(&p->p_xsh);
  222. return;
  223. fail:
  224. scsi_xs_put(mxs);
  225. }
  226. void
  227. mpath_done(struct scsi_xfer *mxs)
  228. {
  229. struct scsi_xfer *xs = mxs->cookie;
  230. struct scsi_link *link = xs->sc_link;
  231. struct mpath_softc *sc = link->adapter_softc;
  232. struct mpath_dev *d = sc->sc_devs[link->target];
  233. struct mpath_path *p;
  234. switch (mxs->error) {
  235. case XS_SELTIMEOUT: /* physical path is gone, try the next */
  236. case XS_RESET:
  237. mtx_enter(&d->d_mtx);
  238. SIMPLEQ_INSERT_HEAD(&d->d_xfers, xs, xfer_list);
  239. p = mpath_next_path(d);
  240. mtx_leave(&d->d_mtx);
  241. scsi_xs_put(mxs);
  242. if (p != NULL)
  243. scsi_xsh_add(&p->p_xsh);
  244. return;
  245. case XS_SENSE:
  246. switch (d->d_ops->op_checksense(mxs)) {
  247. case MPATH_SENSE_FAILOVER:
  248. mtx_enter(&d->d_mtx);
  249. SIMPLEQ_INSERT_HEAD(&d->d_xfers, xs, xfer_list);
  250. p = mpath_next_path(d);
  251. mtx_leave(&d->d_mtx);
  252. scsi_xs_put(mxs);
  253. mpath_failover(d);
  254. return;
  255. case MPATH_SENSE_DECLINED:
  256. break;
  257. #ifdef DIAGNOSTIC
  258. default:
  259. panic("unexpected return from checksense");
  260. #endif
  261. }
  262. break;
  263. }
  264. xs->error = mxs->error;
  265. xs->status = mxs->status;
  266. xs->resid = mxs->resid;
  267. memcpy(&xs->sense, &mxs->sense, sizeof(xs->sense));
  268. scsi_xs_put(mxs);
  269. scsi_done(xs);
  270. }
  271. void
  272. mpath_failover(struct mpath_dev *d)
  273. {
  274. if (!scsi_pending_start(&d->d_mtx, &d->d_failover))
  275. return;
  276. mpath_failover_start(d);
  277. }
  278. void
  279. mpath_failover_start(void *xd)
  280. {
  281. struct mpath_dev *d = xd;
  282. mtx_enter(&d->d_mtx);
  283. d->d_failover_iter = TAILQ_FIRST(&d->d_groups);
  284. mtx_leave(&d->d_mtx);
  285. mpath_failover_check(d);
  286. }
  287. void
  288. mpath_failover_check(struct mpath_dev *d)
  289. {
  290. struct mpath_group *g = d->d_failover_iter;
  291. struct mpath_path *p;
  292. if (g == NULL)
  293. timeout_add_sec(&d->d_failover_tmo, 1);
  294. else {
  295. p = TAILQ_FIRST(&g->g_paths);
  296. d->d_ops->op_status(p->p_link);
  297. }
  298. }
  299. void
  300. mpath_path_status(struct mpath_path *p, int status)
  301. {
  302. struct mpath_group *g = p->p_group;
  303. struct mpath_dev *d = g->g_dev;
  304. mtx_enter(&d->d_mtx);
  305. if (status == MPATH_S_ACTIVE) {
  306. TAILQ_REMOVE(&d->d_groups, g, g_entry);
  307. TAILQ_INSERT_HEAD(&d->d_groups, g, g_entry);
  308. d->d_next_path = p;
  309. } else
  310. d->d_failover_iter = TAILQ_NEXT(d->d_failover_iter, g_entry);
  311. mtx_leave(&d->d_mtx);
  312. if (status == MPATH_S_ACTIVE) {
  313. scsi_xsh_add(&p->p_xsh);
  314. if (!scsi_pending_finish(&d->d_mtx, &d->d_failover))
  315. mpath_failover_start(d);
  316. } else
  317. mpath_failover_check(d);
  318. }
  319. void
  320. mpath_minphys(struct buf *bp, struct scsi_link *link)
  321. {
  322. struct mpath_softc *sc = link->adapter_softc;
  323. struct mpath_dev *d = sc->sc_devs[link->target];
  324. struct mpath_group *g;
  325. struct mpath_path *p;
  326. #ifdef DIAGNOSTIC
  327. if (d == NULL)
  328. panic("mpath_minphys against nonexistant device");
  329. #endif
  330. mtx_enter(&d->d_mtx);
  331. TAILQ_FOREACH(g, &d->d_groups, g_entry) {
  332. TAILQ_FOREACH(p, &g->g_paths, p_entry) {
  333. /* XXX crossing layers with mutex held */
  334. p->p_link->adapter->scsi_minphys(bp, p->p_link);
  335. }
  336. }
  337. mtx_leave(&d->d_mtx);
  338. }
  339. int
  340. mpath_path_probe(struct scsi_link *link)
  341. {
  342. if (mpath == NULL)
  343. return (ENXIO);
  344. if (link->id == NULL)
  345. return (EINVAL);
  346. if (ISSET(link->flags, SDEV_UMASS))
  347. return (EINVAL);
  348. if (mpath == link->adapter_softc)
  349. return (ENXIO);
  350. return (0);
  351. }
  352. int
  353. mpath_path_attach(struct mpath_path *p, u_int g_id, const struct mpath_ops *ops)
  354. {
  355. struct mpath_softc *sc = mpath;
  356. struct scsi_link *link = p->p_link;
  357. struct mpath_dev *d = NULL;
  358. struct mpath_group *g;
  359. int newdev = 0, addxsh = 0;
  360. int target;
  361. #ifdef DIAGNOSTIC
  362. if (p->p_link == NULL)
  363. panic("mpath_path_attach: NULL link");
  364. if (p->p_group != NULL)
  365. panic("mpath_path_attach: group is not NULL");
  366. #endif
  367. for (target = 0; target < MPATH_BUSWIDTH; target++) {
  368. if ((d = sc->sc_devs[target]) == NULL)
  369. continue;
  370. if (DEVID_CMP(d->d_id, link->id) && d->d_ops == ops)
  371. break;
  372. d = NULL;
  373. }
  374. if (d == NULL) {
  375. for (target = 0; target < MPATH_BUSWIDTH; target++) {
  376. if (sc->sc_devs[target] == NULL)
  377. break;
  378. }
  379. if (target >= MPATH_BUSWIDTH)
  380. return (ENXIO);
  381. d = malloc(sizeof(*d), M_DEVBUF, M_WAITOK | M_CANFAIL | M_ZERO);
  382. if (d == NULL)
  383. return (ENOMEM);
  384. mtx_init(&d->d_mtx, IPL_BIO);
  385. TAILQ_INIT(&d->d_groups);
  386. SIMPLEQ_INIT(&d->d_xfers);
  387. d->d_id = devid_copy(link->id);
  388. d->d_ops = ops;
  389. timeout_set(&d->d_failover_tmo, mpath_failover_start, d);
  390. sc->sc_devs[target] = d;
  391. newdev = 1;
  392. } else {
  393. /*
  394. * instead of carrying identical values in different devid
  395. * instances, delete the new one and reference the old one in
  396. * the new scsi_link.
  397. */
  398. devid_free(link->id);
  399. link->id = devid_copy(d->d_id);
  400. }
  401. TAILQ_FOREACH(g, &d->d_groups, g_entry) {
  402. if (g->g_id == g_id)
  403. break;
  404. }
  405. if (g == NULL) {
  406. g = malloc(sizeof(*g), M_DEVBUF,
  407. M_WAITOK | M_CANFAIL | M_ZERO);
  408. if (g == NULL) {
  409. if (newdev) {
  410. free(d, M_DEVBUF, 0);
  411. sc->sc_devs[target] = NULL;
  412. }
  413. return (ENOMEM);
  414. }
  415. TAILQ_INIT(&g->g_paths);
  416. g->g_dev = d;
  417. g->g_id = g_id;
  418. mtx_enter(&d->d_mtx);
  419. TAILQ_INSERT_TAIL(&d->d_groups, g, g_entry);
  420. mtx_leave(&d->d_mtx);
  421. }
  422. p->p_group = g;
  423. mtx_enter(&d->d_mtx);
  424. TAILQ_INSERT_TAIL(&g->g_paths, p, p_entry);
  425. if (!SIMPLEQ_EMPTY(&d->d_xfers))
  426. addxsh = 1;
  427. if (d->d_next_path == NULL)
  428. d->d_next_path = p;
  429. mtx_leave(&d->d_mtx);
  430. if (newdev)
  431. scsi_probe_target(mpath->sc_scsibus, target);
  432. else if (addxsh)
  433. scsi_xsh_add(&p->p_xsh);
  434. return (0);
  435. }
  436. int
  437. mpath_path_detach(struct mpath_path *p)
  438. {
  439. struct mpath_group *g = p->p_group;
  440. struct mpath_dev *d;
  441. struct mpath_path *np = NULL;
  442. #ifdef DIAGNOSTIC
  443. if (g == NULL)
  444. panic("mpath: detaching a path from a nonexistant bus");
  445. #endif
  446. d = g->g_dev;
  447. p->p_group = NULL;
  448. mtx_enter(&d->d_mtx);
  449. TAILQ_REMOVE(&g->g_paths, p, p_entry);
  450. if (d->d_next_path == p)
  451. d->d_next_path = TAILQ_FIRST(&g->g_paths);
  452. if (TAILQ_EMPTY(&g->g_paths))
  453. TAILQ_REMOVE(&d->d_groups, g, g_entry);
  454. else
  455. g = NULL;
  456. if (!SIMPLEQ_EMPTY(&d->d_xfers))
  457. np = d->d_next_path;
  458. mtx_leave(&d->d_mtx);
  459. if (g != NULL)
  460. free(g, M_DEVBUF, 0);
  461. scsi_xsh_del(&p->p_xsh);
  462. if (np == NULL)
  463. mpath_failover(d);
  464. else
  465. scsi_xsh_add(&np->p_xsh);
  466. return (0);
  467. }
  468. struct device *
  469. mpath_bootdv(struct device *dev)
  470. {
  471. struct mpath_softc *sc = mpath;
  472. struct mpath_dev *d;
  473. struct mpath_group *g;
  474. struct mpath_path *p;
  475. int target;
  476. if (sc == NULL)
  477. return (dev);
  478. for (target = 0; target < MPATH_BUSWIDTH; target++) {
  479. if ((d = sc->sc_devs[target]) == NULL)
  480. continue;
  481. TAILQ_FOREACH(g, &d->d_groups, g_entry) {
  482. TAILQ_FOREACH(p, &g->g_paths, p_entry) {
  483. if (p->p_link->device_softc == dev) {
  484. return (scsi_get_link(mpath->sc_scsibus,
  485. target, 0)->device_softc);
  486. }
  487. }
  488. }
  489. }
  490. return (dev);
  491. }