geom_disk.c 28 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115
  1. /*-
  2. * SPDX-License-Identifier: BSD-3-Clause
  3. *
  4. * Copyright (c) 2002 Poul-Henning Kamp
  5. * Copyright (c) 2002 Networks Associates Technology, Inc.
  6. * All rights reserved.
  7. *
  8. * This software was developed for the FreeBSD Project by Poul-Henning Kamp
  9. * and NAI Labs, the Security Research Division of Network Associates, Inc.
  10. * under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
  11. * DARPA CHATS research program.
  12. *
  13. * Redistribution and use in source and binary forms, with or without
  14. * modification, are permitted provided that the following conditions
  15. * are met:
  16. * 1. Redistributions of source code must retain the above copyright
  17. * notice, this list of conditions and the following disclaimer.
  18. * 2. Redistributions in binary form must reproduce the above copyright
  19. * notice, this list of conditions and the following disclaimer in the
  20. * documentation and/or other materials provided with the distribution.
  21. * 3. The names of the authors may not be used to endorse or promote
  22. * products derived from this software without specific prior written
  23. * permission.
  24. *
  25. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  26. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  27. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  28. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  29. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  30. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  31. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  32. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  33. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  34. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  35. * SUCH DAMAGE.
  36. */
  37. #include <sys/cdefs.h>
  38. #include "opt_geom.h"
  39. #include <sys/param.h>
  40. #include <sys/systm.h>
  41. #include <sys/kernel.h>
  42. #include <sys/sysctl.h>
  43. #include <sys/bio.h>
  44. #include <sys/ctype.h>
  45. #include <sys/devctl.h>
  46. #include <sys/fcntl.h>
  47. #include <sys/malloc.h>
  48. #include <sys/msan.h>
  49. #include <sys/sbuf.h>
  50. #include <sys/devicestat.h>
  51. #include <sys/lock.h>
  52. #include <sys/mutex.h>
  53. #include <geom/geom.h>
  54. #include <geom/geom_disk.h>
  55. #include <geom/geom_int.h>
  56. #include <dev/led/led.h>
  57. #include <machine/bus.h>
  58. struct g_disk_softc {
  59. struct disk *dp;
  60. struct devstat *d_devstat;
  61. struct sysctl_ctx_list sysctl_ctx;
  62. struct sysctl_oid *sysctl_tree;
  63. char led[64];
  64. uint32_t state;
  65. struct mtx done_mtx;
  66. bool flush_notsup_succeed;
  67. };
  68. static g_access_t g_disk_access;
  69. static g_start_t g_disk_start;
  70. static g_ioctl_t g_disk_ioctl;
  71. static g_dumpconf_t g_disk_dumpconf;
  72. static g_provgone_t g_disk_providergone;
  73. static int g_disk_sysctl_flags(SYSCTL_HANDLER_ARGS);
  74. static struct g_class g_disk_class = {
  75. .name = G_DISK_CLASS_NAME,
  76. .version = G_VERSION,
  77. .start = g_disk_start,
  78. .access = g_disk_access,
  79. .ioctl = g_disk_ioctl,
  80. .providergone = g_disk_providergone,
  81. .dumpconf = g_disk_dumpconf,
  82. };
  83. SYSCTL_DECL(_kern_geom);
  84. static SYSCTL_NODE(_kern_geom, OID_AUTO, disk, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
  85. "GEOM_DISK stuff");
  86. DECLARE_GEOM_CLASS(g_disk_class, g_disk);
  87. static int
  88. g_disk_access(struct g_provider *pp, int r, int w, int e)
  89. {
  90. struct disk *dp;
  91. struct g_disk_softc *sc;
  92. int error;
  93. g_trace(G_T_ACCESS, "g_disk_access(%s, %d, %d, %d)",
  94. pp->name, r, w, e);
  95. g_topology_assert();
  96. sc = pp->private;
  97. if ((dp = sc->dp) == NULL || dp->d_destroyed) {
  98. /*
  99. * Allow decreasing access count even if disk is not
  100. * available anymore.
  101. */
  102. if (r <= 0 && w <= 0 && e <= 0)
  103. return (0);
  104. return (ENXIO);
  105. }
  106. r += pp->acr;
  107. w += pp->acw;
  108. e += pp->ace;
  109. error = 0;
  110. if ((pp->acr + pp->acw + pp->ace) == 0 && (r + w + e) > 0) {
  111. /*
  112. * It would be better to defer this decision to d_open if
  113. * it was able to take flags.
  114. */
  115. if (w > 0 && (dp->d_flags & DISKFLAG_WRITE_PROTECT) != 0)
  116. error = EROFS;
  117. if (error == 0 && dp->d_open != NULL)
  118. error = dp->d_open(dp);
  119. if (bootverbose && error != 0)
  120. printf("Opened disk %s -> %d\n", pp->name, error);
  121. if (error != 0)
  122. return (error);
  123. pp->sectorsize = dp->d_sectorsize;
  124. if (dp->d_maxsize == 0) {
  125. printf("WARNING: Disk drive %s%d has no d_maxsize\n",
  126. dp->d_name, dp->d_unit);
  127. dp->d_maxsize = DFLTPHYS;
  128. }
  129. if (dp->d_delmaxsize == 0) {
  130. if (bootverbose && dp->d_flags & DISKFLAG_CANDELETE) {
  131. printf("WARNING: Disk drive %s%d has no "
  132. "d_delmaxsize\n", dp->d_name, dp->d_unit);
  133. }
  134. dp->d_delmaxsize = dp->d_maxsize;
  135. }
  136. pp->stripeoffset = dp->d_stripeoffset;
  137. pp->stripesize = dp->d_stripesize;
  138. dp->d_flags |= DISKFLAG_OPEN;
  139. /*
  140. * Do not invoke resize event when initial size was zero.
  141. * Some disks report its size only after first opening.
  142. */
  143. if (pp->mediasize == 0)
  144. pp->mediasize = dp->d_mediasize;
  145. else
  146. g_resize_provider(pp, dp->d_mediasize);
  147. } else if ((pp->acr + pp->acw + pp->ace) > 0 && (r + w + e) == 0) {
  148. if (dp->d_close != NULL) {
  149. error = dp->d_close(dp);
  150. if (error != 0)
  151. printf("Closed disk %s -> %d\n",
  152. pp->name, error);
  153. }
  154. sc->state = G_STATE_ACTIVE;
  155. if (sc->led[0] != 0)
  156. led_set(sc->led, "0");
  157. dp->d_flags &= ~DISKFLAG_OPEN;
  158. }
  159. return (error);
  160. }
  161. static void
  162. g_disk_kerneldump(struct bio *bp, struct disk *dp)
  163. {
  164. struct g_kerneldump *gkd;
  165. struct g_geom *gp;
  166. gkd = (struct g_kerneldump*)bp->bio_data;
  167. gp = bp->bio_to->geom;
  168. g_trace(G_T_TOPOLOGY, "g_disk_kerneldump(%s, %jd, %jd)",
  169. gp->name, (intmax_t)gkd->offset, (intmax_t)gkd->length);
  170. if (dp->d_dump == NULL) {
  171. g_io_deliver(bp, ENODEV);
  172. return;
  173. }
  174. gkd->di.dumper = dp->d_dump;
  175. gkd->di.priv = dp;
  176. gkd->di.blocksize = dp->d_sectorsize;
  177. gkd->di.maxiosize = dp->d_maxsize;
  178. gkd->di.mediaoffset = gkd->offset;
  179. if ((gkd->offset + gkd->length) > dp->d_mediasize)
  180. gkd->length = dp->d_mediasize - gkd->offset;
  181. gkd->di.mediasize = gkd->length;
  182. g_io_deliver(bp, 0);
  183. }
  184. static void
  185. g_disk_setstate(struct bio *bp, struct g_disk_softc *sc)
  186. {
  187. const char *cmd;
  188. memcpy(&sc->state, bp->bio_data, sizeof(sc->state));
  189. if (sc->led[0] != 0) {
  190. switch (sc->state) {
  191. case G_STATE_FAILED:
  192. cmd = "1";
  193. break;
  194. case G_STATE_REBUILD:
  195. cmd = "f5";
  196. break;
  197. case G_STATE_RESYNC:
  198. cmd = "f1";
  199. break;
  200. default:
  201. cmd = "0";
  202. break;
  203. }
  204. led_set(sc->led, cmd);
  205. }
  206. g_io_deliver(bp, 0);
  207. }
  208. static void
  209. g_disk_done(struct bio *bp)
  210. {
  211. struct bintime now;
  212. struct bio *bp2;
  213. struct g_disk_softc *sc;
  214. /* See "notes" for why we need a mutex here */
  215. sc = bp->bio_caller1;
  216. bp2 = bp->bio_parent;
  217. binuptime(&now);
  218. mtx_lock(&sc->done_mtx);
  219. if (bp2->bio_error == 0)
  220. bp2->bio_error = bp->bio_error;
  221. bp2->bio_completed += bp->bio_length - bp->bio_resid;
  222. if (bp->bio_cmd == BIO_READ)
  223. kmsan_check(bp2->bio_data, bp2->bio_completed, "g_disk_done");
  224. switch (bp->bio_cmd) {
  225. case BIO_ZONE:
  226. bcopy(&bp->bio_zone, &bp2->bio_zone, sizeof(bp->bio_zone));
  227. /*FALLTHROUGH*/
  228. case BIO_READ:
  229. case BIO_WRITE:
  230. case BIO_DELETE:
  231. case BIO_FLUSH:
  232. devstat_end_transaction_bio_bt(sc->d_devstat, bp, &now);
  233. break;
  234. default:
  235. break;
  236. }
  237. bp2->bio_inbed++;
  238. if (bp2->bio_children == bp2->bio_inbed) {
  239. mtx_unlock(&sc->done_mtx);
  240. bp2->bio_resid = bp2->bio_bcount - bp2->bio_completed;
  241. g_io_deliver(bp2, bp2->bio_error);
  242. } else
  243. mtx_unlock(&sc->done_mtx);
  244. g_destroy_bio(bp);
  245. }
  246. static int
  247. g_disk_ioctl(struct g_provider *pp, u_long cmd, void * data, int fflag, struct thread *td)
  248. {
  249. struct disk *dp;
  250. struct g_disk_softc *sc;
  251. sc = pp->private;
  252. dp = sc->dp;
  253. KASSERT(dp != NULL && !dp->d_destroyed,
  254. ("g_disk_ioctl(%lx) on destroyed disk %s", cmd, pp->name));
  255. if (dp->d_ioctl == NULL)
  256. return (ENOIOCTL);
  257. return (dp->d_ioctl(dp, cmd, data, fflag, td));
  258. }
  259. static off_t
  260. g_disk_maxsize(struct disk *dp, struct bio *bp)
  261. {
  262. if (bp->bio_cmd == BIO_DELETE)
  263. return (dp->d_delmaxsize);
  264. return (dp->d_maxsize);
  265. }
  266. static int
  267. g_disk_maxsegs(struct disk *dp, struct bio *bp)
  268. {
  269. return ((g_disk_maxsize(dp, bp) / PAGE_SIZE) + 1);
  270. }
  271. static void
  272. g_disk_advance(struct disk *dp, struct bio *bp, off_t off)
  273. {
  274. bp->bio_offset += off;
  275. bp->bio_length -= off;
  276. if ((bp->bio_flags & BIO_VLIST) != 0) {
  277. bus_dma_segment_t *seg, *end;
  278. seg = (bus_dma_segment_t *)bp->bio_data;
  279. end = (bus_dma_segment_t *)bp->bio_data + bp->bio_ma_n;
  280. off += bp->bio_ma_offset;
  281. while (off >= seg->ds_len) {
  282. KASSERT((seg != end),
  283. ("vlist request runs off the end"));
  284. off -= seg->ds_len;
  285. seg++;
  286. }
  287. bp->bio_ma_offset = off;
  288. bp->bio_ma_n = end - seg;
  289. bp->bio_data = (void *)seg;
  290. } else if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
  291. bp->bio_ma += off / PAGE_SIZE;
  292. bp->bio_ma_offset += off;
  293. bp->bio_ma_offset %= PAGE_SIZE;
  294. bp->bio_ma_n -= off / PAGE_SIZE;
  295. } else {
  296. bp->bio_data += off;
  297. }
  298. }
  299. static void
  300. g_disk_seg_limit(bus_dma_segment_t *seg, off_t *poffset,
  301. off_t *plength, int *ppages)
  302. {
  303. uintptr_t seg_page_base;
  304. uintptr_t seg_page_end;
  305. off_t offset;
  306. off_t length;
  307. int seg_pages;
  308. offset = *poffset;
  309. length = *plength;
  310. if (length > seg->ds_len - offset)
  311. length = seg->ds_len - offset;
  312. seg_page_base = trunc_page(seg->ds_addr + offset);
  313. seg_page_end = round_page(seg->ds_addr + offset + length);
  314. seg_pages = (seg_page_end - seg_page_base) >> PAGE_SHIFT;
  315. if (seg_pages > *ppages) {
  316. seg_pages = *ppages;
  317. length = (seg_page_base + (seg_pages << PAGE_SHIFT)) -
  318. (seg->ds_addr + offset);
  319. }
  320. *poffset = 0;
  321. *plength -= length;
  322. *ppages -= seg_pages;
  323. }
  324. static off_t
  325. g_disk_vlist_limit(struct disk *dp, struct bio *bp, bus_dma_segment_t **pendseg)
  326. {
  327. bus_dma_segment_t *seg, *end __diagused;
  328. off_t residual;
  329. off_t offset;
  330. int pages;
  331. seg = (bus_dma_segment_t *)bp->bio_data;
  332. end = (bus_dma_segment_t *)bp->bio_data + bp->bio_ma_n;
  333. residual = bp->bio_length;
  334. offset = bp->bio_ma_offset;
  335. pages = g_disk_maxsegs(dp, bp);
  336. while (residual != 0 && pages != 0) {
  337. KASSERT((seg != end),
  338. ("vlist limit runs off the end"));
  339. g_disk_seg_limit(seg, &offset, &residual, &pages);
  340. seg++;
  341. }
  342. if (pendseg != NULL)
  343. *pendseg = seg;
  344. return (residual);
  345. }
  346. static bool
  347. g_disk_limit(struct disk *dp, struct bio *bp)
  348. {
  349. bool limited = false;
  350. off_t maxsz;
  351. maxsz = g_disk_maxsize(dp, bp);
  352. /*
  353. * XXX: If we have a stripesize we should really use it here.
  354. * Care should be taken in the delete case if this is done
  355. * as deletes can be very sensitive to size given how they
  356. * are processed.
  357. */
  358. if (bp->bio_length > maxsz) {
  359. bp->bio_length = maxsz;
  360. limited = true;
  361. }
  362. if ((bp->bio_flags & BIO_VLIST) != 0) {
  363. bus_dma_segment_t *firstseg, *endseg;
  364. off_t residual;
  365. firstseg = (bus_dma_segment_t*)bp->bio_data;
  366. residual = g_disk_vlist_limit(dp, bp, &endseg);
  367. if (residual != 0) {
  368. bp->bio_ma_n = endseg - firstseg;
  369. bp->bio_length -= residual;
  370. limited = true;
  371. }
  372. } else if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
  373. bp->bio_ma_n =
  374. howmany(bp->bio_ma_offset + bp->bio_length, PAGE_SIZE);
  375. }
  376. return (limited);
  377. }
  378. static void
  379. g_disk_start(struct bio *bp)
  380. {
  381. struct bio *bp2, *bp3;
  382. struct disk *dp;
  383. struct g_disk_softc *sc;
  384. int error;
  385. off_t off;
  386. biotrack(bp, __func__);
  387. sc = bp->bio_to->private;
  388. dp = sc->dp;
  389. KASSERT(dp != NULL && !dp->d_destroyed,
  390. ("g_disk_start(%p) on destroyed disk %s", bp, bp->bio_to->name));
  391. error = EJUSTRETURN;
  392. switch(bp->bio_cmd) {
  393. case BIO_DELETE:
  394. if (!(dp->d_flags & DISKFLAG_CANDELETE)) {
  395. error = EOPNOTSUPP;
  396. break;
  397. }
  398. /* fall-through */
  399. case BIO_READ:
  400. case BIO_WRITE:
  401. KASSERT((dp->d_flags & DISKFLAG_UNMAPPED_BIO) != 0 ||
  402. (bp->bio_flags & BIO_UNMAPPED) == 0,
  403. ("unmapped bio not supported by disk %s", dp->d_name));
  404. if (bp->bio_cmd == BIO_WRITE)
  405. kmsan_check_bio(bp, "g_disk_start");
  406. off = 0;
  407. bp3 = NULL;
  408. bp2 = g_clone_bio(bp);
  409. if (bp2 == NULL) {
  410. error = ENOMEM;
  411. break;
  412. }
  413. for (;;) {
  414. if (g_disk_limit(dp, bp2)) {
  415. off += bp2->bio_length;
  416. /*
  417. * To avoid a race, we need to grab the next bio
  418. * before we schedule this one. See "notes".
  419. */
  420. bp3 = g_clone_bio(bp);
  421. if (bp3 == NULL)
  422. bp->bio_error = ENOMEM;
  423. }
  424. bp2->bio_done = g_disk_done;
  425. bp2->bio_caller1 = sc;
  426. bp2->bio_pblkno = bp2->bio_offset / dp->d_sectorsize;
  427. bp2->bio_bcount = bp2->bio_length;
  428. bp2->bio_disk = dp;
  429. devstat_start_transaction_bio(dp->d_devstat, bp2);
  430. dp->d_strategy(bp2);
  431. if (bp3 == NULL)
  432. break;
  433. bp2 = bp3;
  434. bp3 = NULL;
  435. g_disk_advance(dp, bp2, off);
  436. }
  437. break;
  438. case BIO_GETATTR:
  439. /* Give the driver a chance to override */
  440. if (dp->d_getattr != NULL) {
  441. if (bp->bio_disk == NULL)
  442. bp->bio_disk = dp;
  443. error = dp->d_getattr(bp);
  444. if (error != -1)
  445. break;
  446. error = EJUSTRETURN;
  447. }
  448. if (g_handleattr_int(bp, "GEOM::candelete",
  449. (dp->d_flags & DISKFLAG_CANDELETE) != 0))
  450. break;
  451. else if (g_handleattr_int(bp, "GEOM::fwsectors",
  452. dp->d_fwsectors))
  453. break;
  454. else if (g_handleattr_int(bp, "GEOM::fwheads", dp->d_fwheads))
  455. break;
  456. else if (g_handleattr_str(bp, "GEOM::ident", dp->d_ident))
  457. break;
  458. else if (g_handleattr_str(bp, "GEOM::descr", dp->d_descr))
  459. break;
  460. else if (g_handleattr_uint16_t(bp, "GEOM::hba_vendor",
  461. dp->d_hba_vendor))
  462. break;
  463. else if (g_handleattr_uint16_t(bp, "GEOM::hba_device",
  464. dp->d_hba_device))
  465. break;
  466. else if (g_handleattr_uint16_t(bp, "GEOM::hba_subvendor",
  467. dp->d_hba_subvendor))
  468. break;
  469. else if (g_handleattr_uint16_t(bp, "GEOM::hba_subdevice",
  470. dp->d_hba_subdevice))
  471. break;
  472. else if (!strcmp(bp->bio_attribute, "GEOM::kerneldump"))
  473. g_disk_kerneldump(bp, dp);
  474. else if (!strcmp(bp->bio_attribute, "GEOM::setstate"))
  475. g_disk_setstate(bp, sc);
  476. else if (g_handleattr_uint16_t(bp, "GEOM::rotation_rate",
  477. dp->d_rotation_rate))
  478. break;
  479. else if (g_handleattr_str(bp, "GEOM::attachment",
  480. dp->d_attachment))
  481. break;
  482. else
  483. error = ENOIOCTL;
  484. break;
  485. case BIO_FLUSH:
  486. g_trace(G_T_BIO, "g_disk_flushcache(%s)",
  487. bp->bio_to->name);
  488. if (!(dp->d_flags & DISKFLAG_CANFLUSHCACHE)) {
  489. error = (sc->flush_notsup_succeed) ? 0 : EOPNOTSUPP;
  490. break;
  491. }
  492. /*FALLTHROUGH*/
  493. case BIO_ZONE:
  494. if (bp->bio_cmd == BIO_ZONE) {
  495. if (!(dp->d_flags & DISKFLAG_CANZONE)) {
  496. error = EOPNOTSUPP;
  497. break;
  498. }
  499. g_trace(G_T_BIO, "g_disk_zone(%s)",
  500. bp->bio_to->name);
  501. }
  502. bp2 = g_clone_bio(bp);
  503. if (bp2 == NULL) {
  504. g_io_deliver(bp, ENOMEM);
  505. return;
  506. }
  507. bp2->bio_done = g_disk_done;
  508. bp2->bio_caller1 = sc;
  509. bp2->bio_disk = dp;
  510. devstat_start_transaction_bio(dp->d_devstat, bp2);
  511. dp->d_strategy(bp2);
  512. break;
  513. case BIO_SPEEDUP:
  514. bp2 = g_clone_bio(bp);
  515. if (bp2 == NULL) {
  516. g_io_deliver(bp, ENOMEM);
  517. return;
  518. }
  519. bp2->bio_done = g_disk_done;
  520. bp2->bio_caller1 = sc;
  521. bp2->bio_disk = dp;
  522. dp->d_strategy(bp2);
  523. break;
  524. default:
  525. error = EOPNOTSUPP;
  526. break;
  527. }
  528. if (error != EJUSTRETURN)
  529. g_io_deliver(bp, error);
  530. return;
  531. }
  532. static void
  533. g_disk_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp, struct g_provider *pp)
  534. {
  535. struct bio *bp;
  536. struct disk *dp;
  537. struct g_disk_softc *sc;
  538. char *buf;
  539. int res = 0;
  540. sc = gp->softc;
  541. if (sc == NULL || (dp = sc->dp) == NULL)
  542. return;
  543. if (indent == NULL) {
  544. sbuf_printf(sb, " hd %u", dp->d_fwheads);
  545. sbuf_printf(sb, " sc %u", dp->d_fwsectors);
  546. return;
  547. }
  548. if (pp != NULL) {
  549. sbuf_printf(sb, "%s<fwheads>%u</fwheads>\n",
  550. indent, dp->d_fwheads);
  551. sbuf_printf(sb, "%s<fwsectors>%u</fwsectors>\n",
  552. indent, dp->d_fwsectors);
  553. /*
  554. * "rotationrate" is a little complicated, because the value
  555. * returned by the drive might not be the RPM; 0 and 1 are
  556. * special cases, and there's also a valid range.
  557. */
  558. sbuf_printf(sb, "%s<rotationrate>", indent);
  559. if (dp->d_rotation_rate == DISK_RR_UNKNOWN) /* Old drives */
  560. sbuf_cat(sb, "unknown"); /* don't report RPM. */
  561. else if (dp->d_rotation_rate == DISK_RR_NON_ROTATING)
  562. sbuf_cat(sb, "0");
  563. else if ((dp->d_rotation_rate >= DISK_RR_MIN) &&
  564. (dp->d_rotation_rate <= DISK_RR_MAX))
  565. sbuf_printf(sb, "%u", dp->d_rotation_rate);
  566. else
  567. sbuf_cat(sb, "invalid");
  568. sbuf_cat(sb, "</rotationrate>\n");
  569. if (dp->d_getattr != NULL) {
  570. buf = g_malloc(DISK_IDENT_SIZE, M_WAITOK);
  571. bp = g_alloc_bio();
  572. bp->bio_disk = dp;
  573. bp->bio_attribute = "GEOM::ident";
  574. bp->bio_length = DISK_IDENT_SIZE;
  575. bp->bio_data = buf;
  576. res = dp->d_getattr(bp);
  577. sbuf_printf(sb, "%s<ident>", indent);
  578. g_conf_cat_escaped(sb, res == 0 ? buf : dp->d_ident);
  579. sbuf_cat(sb, "</ident>\n");
  580. bp->bio_attribute = "GEOM::lunid";
  581. bp->bio_length = DISK_IDENT_SIZE;
  582. bp->bio_data = buf;
  583. if (dp->d_getattr(bp) == 0) {
  584. sbuf_printf(sb, "%s<lunid>", indent);
  585. g_conf_cat_escaped(sb, buf);
  586. sbuf_cat(sb, "</lunid>\n");
  587. }
  588. bp->bio_attribute = "GEOM::lunname";
  589. bp->bio_length = DISK_IDENT_SIZE;
  590. bp->bio_data = buf;
  591. if (dp->d_getattr(bp) == 0) {
  592. sbuf_printf(sb, "%s<lunname>", indent);
  593. g_conf_cat_escaped(sb, buf);
  594. sbuf_cat(sb, "</lunname>\n");
  595. }
  596. g_destroy_bio(bp);
  597. g_free(buf);
  598. } else {
  599. sbuf_printf(sb, "%s<ident>", indent);
  600. g_conf_cat_escaped(sb, dp->d_ident);
  601. sbuf_cat(sb, "</ident>\n");
  602. }
  603. sbuf_printf(sb, "%s<descr>", indent);
  604. g_conf_cat_escaped(sb, dp->d_descr);
  605. sbuf_cat(sb, "</descr>\n");
  606. }
  607. }
  608. static void
  609. g_disk_resize(void *ptr, int flag)
  610. {
  611. struct disk *dp;
  612. struct g_geom *gp;
  613. struct g_provider *pp;
  614. if (flag == EV_CANCEL)
  615. return;
  616. g_topology_assert();
  617. dp = ptr;
  618. gp = dp->d_geom;
  619. if (dp->d_destroyed || gp == NULL)
  620. return;
  621. LIST_FOREACH(pp, &gp->provider, provider) {
  622. if (pp->sectorsize != 0 &&
  623. pp->sectorsize != dp->d_sectorsize)
  624. g_wither_provider(pp, ENXIO);
  625. else
  626. g_resize_provider(pp, dp->d_mediasize);
  627. }
  628. }
  629. static void
  630. g_disk_create(void *arg, int flag)
  631. {
  632. struct g_geom *gp;
  633. struct g_provider *pp;
  634. struct disk *dp;
  635. struct g_disk_softc *sc;
  636. struct disk_alias *dap;
  637. char tmpstr[80];
  638. if (flag == EV_CANCEL)
  639. return;
  640. g_topology_assert();
  641. dp = arg;
  642. mtx_pool_lock(mtxpool_sleep, dp);
  643. dp->d_init_level = DISK_INIT_START;
  644. /*
  645. * If the disk has already gone away, we can just stop here and
  646. * call the user's callback to tell him we've cleaned things up.
  647. */
  648. if (dp->d_goneflag != 0) {
  649. mtx_pool_unlock(mtxpool_sleep, dp);
  650. if (dp->d_gone != NULL)
  651. dp->d_gone(dp);
  652. return;
  653. }
  654. mtx_pool_unlock(mtxpool_sleep, dp);
  655. sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO);
  656. mtx_init(&sc->done_mtx, "g_disk_done", NULL, MTX_DEF);
  657. sc->dp = dp;
  658. if (dp->d_devstat == NULL) {
  659. dp->d_devstat = devstat_new_entry(dp->d_name, dp->d_unit,
  660. dp->d_sectorsize, DEVSTAT_ALL_SUPPORTED,
  661. DEVSTAT_TYPE_DIRECT, DEVSTAT_PRIORITY_MAX);
  662. }
  663. sc->d_devstat = dp->d_devstat;
  664. gp = g_new_geomf(&g_disk_class, "%s%d", dp->d_name, dp->d_unit);
  665. gp->softc = sc;
  666. pp = g_new_providerf(gp, "%s", gp->name);
  667. LIST_FOREACH(dap, &dp->d_aliases, da_next)
  668. g_provider_add_alias(pp, "%s%d", dap->da_alias, dp->d_unit);
  669. devstat_remove_entry(pp->stat);
  670. pp->stat = NULL;
  671. dp->d_devstat->id = pp;
  672. pp->mediasize = dp->d_mediasize;
  673. pp->sectorsize = dp->d_sectorsize;
  674. pp->stripeoffset = dp->d_stripeoffset;
  675. pp->stripesize = dp->d_stripesize;
  676. if ((dp->d_flags & DISKFLAG_UNMAPPED_BIO) != 0)
  677. pp->flags |= G_PF_ACCEPT_UNMAPPED;
  678. if ((dp->d_flags & DISKFLAG_DIRECT_COMPLETION) != 0)
  679. pp->flags |= G_PF_DIRECT_SEND;
  680. pp->flags |= G_PF_DIRECT_RECEIVE;
  681. if (bootverbose)
  682. printf("GEOM: new disk %s\n", gp->name);
  683. sysctl_ctx_init(&sc->sysctl_ctx);
  684. snprintf(tmpstr, sizeof(tmpstr), "GEOM disk %s", gp->name);
  685. sc->sysctl_tree = SYSCTL_ADD_NODE(&sc->sysctl_ctx,
  686. SYSCTL_STATIC_CHILDREN(_kern_geom_disk), OID_AUTO, gp->name,
  687. CTLFLAG_RD | CTLFLAG_MPSAFE, 0, tmpstr);
  688. if (sc->sysctl_tree != NULL) {
  689. SYSCTL_ADD_STRING(&sc->sysctl_ctx,
  690. SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO, "led",
  691. CTLFLAG_RWTUN, sc->led, sizeof(sc->led),
  692. "LED name");
  693. SYSCTL_ADD_PROC(&sc->sysctl_ctx,
  694. SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO, "flags",
  695. CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, dp, 0,
  696. g_disk_sysctl_flags, "A", "Report disk flags");
  697. SYSCTL_ADD_BOOL(&sc->sysctl_ctx,
  698. SYSCTL_CHILDREN(sc->sysctl_tree), OID_AUTO, "flush_notsup_succeed",
  699. CTLFLAG_RWTUN, &sc->flush_notsup_succeed, sizeof(sc->flush_notsup_succeed),
  700. "Do not return EOPNOTSUPP if there is no cache to flush");
  701. }
  702. pp->private = sc;
  703. dp->d_geom = gp;
  704. g_error_provider(pp, 0);
  705. mtx_pool_lock(mtxpool_sleep, dp);
  706. dp->d_init_level = DISK_INIT_DONE;
  707. /*
  708. * If the disk has gone away at this stage, start the withering
  709. * process for it.
  710. */
  711. if (dp->d_goneflag != 0) {
  712. mtx_pool_unlock(mtxpool_sleep, dp);
  713. g_wither_provider(pp, ENXIO);
  714. return;
  715. }
  716. mtx_pool_unlock(mtxpool_sleep, dp);
  717. }
  718. /*
  719. * We get this callback after all of the consumers have gone away, and just
  720. * before the provider is freed. If the disk driver provided a d_gone
  721. * callback, let them know that it is okay to free resources -- they won't
  722. * be getting any more accesses from GEOM.
  723. */
  724. static void
  725. g_disk_providergone(struct g_provider *pp)
  726. {
  727. struct disk *dp;
  728. struct g_disk_softc *sc;
  729. sc = (struct g_disk_softc *)pp->private;
  730. dp = sc->dp;
  731. if (dp != NULL && dp->d_gone != NULL)
  732. dp->d_gone(dp);
  733. if (sc->sysctl_tree != NULL) {
  734. sysctl_ctx_free(&sc->sysctl_ctx);
  735. sc->sysctl_tree = NULL;
  736. }
  737. if (sc->led[0] != 0) {
  738. led_set(sc->led, "0");
  739. sc->led[0] = 0;
  740. }
  741. pp->private = NULL;
  742. pp->geom->softc = NULL;
  743. mtx_destroy(&sc->done_mtx);
  744. g_free(sc);
  745. }
  746. static void
  747. g_disk_destroy(void *ptr, int flag)
  748. {
  749. struct disk *dp;
  750. struct g_geom *gp;
  751. struct g_disk_softc *sc;
  752. struct disk_alias *dap, *daptmp;
  753. g_topology_assert();
  754. dp = ptr;
  755. gp = dp->d_geom;
  756. if (gp != NULL) {
  757. sc = gp->softc;
  758. if (sc != NULL)
  759. sc->dp = NULL;
  760. dp->d_geom = NULL;
  761. g_wither_geom(gp, ENXIO);
  762. }
  763. LIST_FOREACH_SAFE(dap, &dp->d_aliases, da_next, daptmp)
  764. g_free(dap);
  765. g_free(dp);
  766. }
  767. /*
  768. * We only allow printable characters in disk ident,
  769. * the rest is converted to 'x<HH>'.
  770. */
  771. static void
  772. g_disk_ident_adjust(char *ident, size_t size)
  773. {
  774. char *p, tmp[4], newid[DISK_IDENT_SIZE];
  775. newid[0] = '\0';
  776. for (p = ident; *p != '\0'; p++) {
  777. if (isprint(*p)) {
  778. tmp[0] = *p;
  779. tmp[1] = '\0';
  780. } else {
  781. snprintf(tmp, sizeof(tmp), "x%02hhx",
  782. *(unsigned char *)p);
  783. }
  784. if (strlcat(newid, tmp, sizeof(newid)) >= sizeof(newid))
  785. break;
  786. }
  787. bzero(ident, size);
  788. strlcpy(ident, newid, size);
  789. }
  790. struct disk *
  791. disk_alloc(void)
  792. {
  793. struct disk *dp;
  794. dp = g_malloc(sizeof(struct disk), M_WAITOK | M_ZERO);
  795. LIST_INIT(&dp->d_aliases);
  796. dp->d_init_level = DISK_INIT_NONE;
  797. dp->d_cevent = g_alloc_event(M_WAITOK);
  798. dp->d_devent = g_alloc_event(M_WAITOK);
  799. return (dp);
  800. }
  801. void
  802. disk_create(struct disk *dp, int version)
  803. {
  804. if (version != DISK_VERSION) {
  805. printf("WARNING: Attempt to add disk %s%d %s",
  806. dp->d_name, dp->d_unit,
  807. " using incompatible ABI version of disk(9)\n");
  808. printf("WARNING: Ignoring disk %s%d\n",
  809. dp->d_name, dp->d_unit);
  810. return;
  811. }
  812. if (dp->d_flags & DISKFLAG_RESERVED) {
  813. printf("WARNING: Attempt to add non-MPSAFE disk %s%d\n",
  814. dp->d_name, dp->d_unit);
  815. printf("WARNING: Ignoring disk %s%d\n",
  816. dp->d_name, dp->d_unit);
  817. return;
  818. }
  819. KASSERT(dp->d_strategy != NULL, ("disk_create need d_strategy"));
  820. KASSERT(dp->d_name != NULL, ("disk_create need d_name"));
  821. KASSERT(*dp->d_name != 0, ("disk_create need d_name"));
  822. KASSERT(strlen(dp->d_name) < SPECNAMELEN - 4, ("disk name too long"));
  823. g_disk_ident_adjust(dp->d_ident, sizeof(dp->d_ident));
  824. dp->d_init_level = DISK_INIT_CREATE;
  825. KASSERT(dp->d_cevent != NULL,
  826. ("Disk create for %p with event NULL", dp));
  827. g_post_event_ep(g_disk_create, dp, dp->d_cevent, dp, NULL);
  828. }
  829. void
  830. disk_destroy(struct disk *dp)
  831. {
  832. struct disk_alias *dap, *daptmp;
  833. /* If disk_create() was never called, just free the resources. */
  834. if (dp->d_init_level < DISK_INIT_CREATE) {
  835. if (dp->d_devstat != NULL)
  836. devstat_remove_entry(dp->d_devstat);
  837. LIST_FOREACH_SAFE(dap, &dp->d_aliases, da_next, daptmp)
  838. g_free(dap);
  839. g_free(dp->d_cevent);
  840. g_free(dp->d_devent);
  841. g_free(dp);
  842. return;
  843. }
  844. KASSERT(dp->d_devent != NULL,
  845. ("Disk destroy for %p with event NULL", dp));
  846. disk_gone(dp);
  847. dp->d_destroyed = 1;
  848. g_cancel_event(dp);
  849. if (dp->d_devstat != NULL)
  850. devstat_remove_entry(dp->d_devstat);
  851. g_post_event_ep(g_disk_destroy, dp, dp->d_devent, NULL);
  852. }
  853. void
  854. disk_add_alias(struct disk *dp, const char *name)
  855. {
  856. struct disk_alias *dap;
  857. dap = (struct disk_alias *)g_malloc(
  858. sizeof(struct disk_alias) + strlen(name) + 1, M_WAITOK);
  859. strcpy((char *)(dap + 1), name);
  860. dap->da_alias = (const char *)(dap + 1);
  861. LIST_INSERT_HEAD(&dp->d_aliases, dap, da_next);
  862. }
  863. void
  864. disk_gone(struct disk *dp)
  865. {
  866. struct g_geom *gp;
  867. struct g_provider *pp;
  868. mtx_pool_lock(mtxpool_sleep, dp);
  869. /*
  870. * Second wither call makes no sense, plus we can not access the list
  871. * of providers without topology lock after calling wither once.
  872. */
  873. if (dp->d_goneflag != 0) {
  874. mtx_pool_unlock(mtxpool_sleep, dp);
  875. return;
  876. }
  877. dp->d_goneflag = 1;
  878. /*
  879. * If we're still in the process of creating this disk (the
  880. * g_disk_create() function is still queued, or is in
  881. * progress), the init level will not yet be DISK_INIT_DONE.
  882. *
  883. * If that is the case, g_disk_create() will see d_goneflag
  884. * and take care of cleaning things up.
  885. *
  886. * If the disk has already been created, we default to
  887. * withering the provider as usual below.
  888. *
  889. * If the caller has not set a d_gone() callback, he will
  890. * not be any worse off by returning here, because the geom
  891. * has not been fully setup in any case.
  892. */
  893. if (dp->d_init_level < DISK_INIT_DONE) {
  894. mtx_pool_unlock(mtxpool_sleep, dp);
  895. return;
  896. }
  897. mtx_pool_unlock(mtxpool_sleep, dp);
  898. gp = dp->d_geom;
  899. pp = LIST_FIRST(&gp->provider);
  900. if (pp != NULL) {
  901. KASSERT(LIST_NEXT(pp, provider) == NULL,
  902. ("geom %p has more than one provider", gp));
  903. g_wither_provider(pp, ENXIO);
  904. }
  905. }
  906. void
  907. disk_attr_changed(struct disk *dp, const char *attr, int flag)
  908. {
  909. struct g_geom *gp = dp->d_geom;
  910. struct g_provider *pp;
  911. char devnamebuf[128];
  912. if (gp == NULL)
  913. return;
  914. LIST_FOREACH(pp, &gp->provider, provider)
  915. (void)g_attr_changed(pp, attr, flag);
  916. snprintf(devnamebuf, sizeof(devnamebuf), "devname=%s%d", dp->d_name,
  917. dp->d_unit);
  918. devctl_notify("GEOM", "disk", attr, devnamebuf);
  919. }
  920. void
  921. disk_media_changed(struct disk *dp, int flag)
  922. {
  923. struct g_geom *gp = dp->d_geom;
  924. struct g_provider *pp;
  925. if (gp == NULL)
  926. return;
  927. pp = LIST_FIRST(&gp->provider);
  928. if (pp != NULL) {
  929. KASSERT(LIST_NEXT(pp, provider) == NULL,
  930. ("geom %p has more than one provider", gp));
  931. g_media_changed(pp, flag);
  932. }
  933. }
  934. void
  935. disk_media_gone(struct disk *dp, int flag)
  936. {
  937. struct g_geom *gp = dp->d_geom;
  938. struct g_provider *pp;
  939. if (gp == NULL)
  940. return;
  941. pp = LIST_FIRST(&gp->provider);
  942. if (pp != NULL) {
  943. KASSERT(LIST_NEXT(pp, provider) == NULL,
  944. ("geom %p has more than one provider", gp));
  945. g_media_gone(pp, flag);
  946. }
  947. }
  948. int
  949. disk_resize(struct disk *dp, int flag)
  950. {
  951. if (dp->d_destroyed || dp->d_geom == NULL)
  952. return (0);
  953. return (g_post_event(g_disk_resize, dp, flag, NULL));
  954. }
  955. static void
  956. g_kern_disks(void *p, int flag __unused)
  957. {
  958. struct sbuf *sb;
  959. struct g_geom *gp;
  960. char *sp;
  961. sb = p;
  962. sp = "";
  963. g_topology_assert();
  964. LIST_FOREACH(gp, &g_disk_class.geom, geom) {
  965. sbuf_printf(sb, "%s%s", sp, gp->name);
  966. sp = " ";
  967. }
  968. sbuf_finish(sb);
  969. }
  970. static int
  971. g_disk_sysctl_flags(SYSCTL_HANDLER_ARGS)
  972. {
  973. struct disk *dp;
  974. struct sbuf *sb;
  975. int error;
  976. sb = sbuf_new_auto();
  977. dp = (struct disk *)arg1;
  978. sbuf_printf(sb, "%b", dp->d_flags,
  979. "\20"
  980. "\2OPEN"
  981. "\3CANDELETE"
  982. "\4CANFLUSHCACHE"
  983. "\5UNMAPPEDBIO"
  984. "\6DIRECTCOMPLETION"
  985. "\10CANZONE"
  986. "\11WRITEPROTECT");
  987. sbuf_finish(sb);
  988. error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
  989. sbuf_delete(sb);
  990. return (error);
  991. }
  992. static int
  993. sysctl_disks(SYSCTL_HANDLER_ARGS)
  994. {
  995. int error;
  996. struct sbuf *sb;
  997. sb = sbuf_new_auto();
  998. g_waitfor_event(g_kern_disks, sb, M_WAITOK, NULL);
  999. error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
  1000. sbuf_delete(sb);
  1001. return error;
  1002. }
  1003. SYSCTL_PROC(_kern, OID_AUTO, disks,
  1004. CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
  1005. sysctl_disks, "A", "names of available disks");