xenbus.c 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128
  1. /* Xenbus code for blkif backend
  2. Copyright (C) 2005 Rusty Russell <rusty@rustcorp.com.au>
  3. Copyright (C) 2005 XenSource Ltd
  4. This program is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2 of the License, or
  7. (at your option) any later version.
  8. This program is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. */
  13. #define pr_fmt(fmt) "xen-blkback: " fmt
  14. #include <stdarg.h>
  15. #include <linux/module.h>
  16. #include <linux/kthread.h>
  17. #include <xen/events.h>
  18. #include <xen/grant_table.h>
  19. #include "common.h"
  20. /* On the XenBus the max length of 'ring-ref%u'. */
  21. #define RINGREF_NAME_LEN (20)
  22. struct backend_info {
  23. struct xenbus_device *dev;
  24. struct xen_blkif *blkif;
  25. struct xenbus_watch backend_watch;
  26. unsigned major;
  27. unsigned minor;
  28. char *mode;
  29. };
  30. static struct kmem_cache *xen_blkif_cachep;
  31. static void connect(struct backend_info *);
  32. static int connect_ring(struct backend_info *);
  33. static void backend_changed(struct xenbus_watch *, const char *,
  34. const char *);
  35. static void xen_blkif_free(struct xen_blkif *blkif);
  36. static void xen_vbd_free(struct xen_vbd *vbd);
  37. struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be)
  38. {
  39. return be->dev;
  40. }
  41. /*
  42. * The last request could free the device from softirq context and
  43. * xen_blkif_free() can sleep.
  44. */
  45. static void xen_blkif_deferred_free(struct work_struct *work)
  46. {
  47. struct xen_blkif *blkif;
  48. blkif = container_of(work, struct xen_blkif, free_work);
  49. xen_blkif_free(blkif);
  50. }
  51. static int blkback_name(struct xen_blkif *blkif, char *buf)
  52. {
  53. char *devpath, *devname;
  54. struct xenbus_device *dev = blkif->be->dev;
  55. devpath = xenbus_read(XBT_NIL, dev->nodename, "dev", NULL);
  56. if (IS_ERR(devpath))
  57. return PTR_ERR(devpath);
  58. devname = strstr(devpath, "/dev/");
  59. if (devname != NULL)
  60. devname += strlen("/dev/");
  61. else
  62. devname = devpath;
  63. snprintf(buf, TASK_COMM_LEN, "%d.%s", blkif->domid, devname);
  64. kfree(devpath);
  65. return 0;
  66. }
  67. static void xen_update_blkif_status(struct xen_blkif *blkif)
  68. {
  69. int err;
  70. char name[TASK_COMM_LEN];
  71. struct xen_blkif_ring *ring;
  72. int i;
  73. /* Not ready to connect? */
  74. if (!blkif->rings || !blkif->rings[0].irq || !blkif->vbd.bdev)
  75. return;
  76. /* Already connected? */
  77. if (blkif->be->dev->state == XenbusStateConnected)
  78. return;
  79. /* Attempt to connect: exit if we fail to. */
  80. connect(blkif->be);
  81. if (blkif->be->dev->state != XenbusStateConnected)
  82. return;
  83. err = blkback_name(blkif, name);
  84. if (err) {
  85. xenbus_dev_error(blkif->be->dev, err, "get blkback dev name");
  86. return;
  87. }
  88. err = filemap_write_and_wait(blkif->vbd.bdev->bd_inode->i_mapping);
  89. if (err) {
  90. xenbus_dev_error(blkif->be->dev, err, "block flush");
  91. return;
  92. }
  93. invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping);
  94. for (i = 0; i < blkif->nr_rings; i++) {
  95. ring = &blkif->rings[i];
  96. ring->xenblkd = kthread_run(xen_blkif_schedule, ring, "%s-%d", name, i);
  97. if (IS_ERR(ring->xenblkd)) {
  98. err = PTR_ERR(ring->xenblkd);
  99. ring->xenblkd = NULL;
  100. xenbus_dev_fatal(blkif->be->dev, err,
  101. "start %s-%d xenblkd", name, i);
  102. goto out;
  103. }
  104. }
  105. return;
  106. out:
  107. while (--i >= 0) {
  108. ring = &blkif->rings[i];
  109. kthread_stop(ring->xenblkd);
  110. }
  111. return;
  112. }
  113. static int xen_blkif_alloc_rings(struct xen_blkif *blkif)
  114. {
  115. unsigned int r;
  116. blkif->rings = kcalloc(blkif->nr_rings, sizeof(struct xen_blkif_ring),
  117. GFP_KERNEL);
  118. if (!blkif->rings)
  119. return -ENOMEM;
  120. for (r = 0; r < blkif->nr_rings; r++) {
  121. struct xen_blkif_ring *ring = &blkif->rings[r];
  122. spin_lock_init(&ring->blk_ring_lock);
  123. init_waitqueue_head(&ring->wq);
  124. INIT_LIST_HEAD(&ring->pending_free);
  125. INIT_LIST_HEAD(&ring->persistent_purge_list);
  126. INIT_WORK(&ring->persistent_purge_work, xen_blkbk_unmap_purged_grants);
  127. spin_lock_init(&ring->free_pages_lock);
  128. INIT_LIST_HEAD(&ring->free_pages);
  129. spin_lock_init(&ring->pending_free_lock);
  130. init_waitqueue_head(&ring->pending_free_wq);
  131. init_waitqueue_head(&ring->shutdown_wq);
  132. ring->blkif = blkif;
  133. ring->st_print = jiffies;
  134. ring->active = true;
  135. }
  136. return 0;
  137. }
  138. static struct xen_blkif *xen_blkif_alloc(domid_t domid)
  139. {
  140. struct xen_blkif *blkif;
  141. BUILD_BUG_ON(MAX_INDIRECT_PAGES > BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST);
  142. blkif = kmem_cache_zalloc(xen_blkif_cachep, GFP_KERNEL);
  143. if (!blkif)
  144. return ERR_PTR(-ENOMEM);
  145. blkif->domid = domid;
  146. atomic_set(&blkif->refcnt, 1);
  147. init_completion(&blkif->drain_complete);
  148. /*
  149. * Because freeing back to the cache may be deferred, it is not
  150. * safe to unload the module (and hence destroy the cache) until
  151. * this has completed. To prevent premature unloading, take an
  152. * extra module reference here and release only when the object
  153. * has been freed back to the cache.
  154. */
  155. __module_get(THIS_MODULE);
  156. INIT_WORK(&blkif->free_work, xen_blkif_deferred_free);
  157. return blkif;
  158. }
  159. static int xen_blkif_map(struct xen_blkif_ring *ring, grant_ref_t *gref,
  160. unsigned int nr_grefs, unsigned int evtchn)
  161. {
  162. int err;
  163. struct xen_blkif *blkif = ring->blkif;
  164. /* Already connected through? */
  165. if (ring->irq)
  166. return 0;
  167. err = xenbus_map_ring_valloc(blkif->be->dev, gref, nr_grefs,
  168. &ring->blk_ring);
  169. if (err < 0)
  170. return err;
  171. switch (blkif->blk_protocol) {
  172. case BLKIF_PROTOCOL_NATIVE:
  173. {
  174. struct blkif_sring *sring;
  175. sring = (struct blkif_sring *)ring->blk_ring;
  176. BACK_RING_INIT(&ring->blk_rings.native, sring,
  177. XEN_PAGE_SIZE * nr_grefs);
  178. break;
  179. }
  180. case BLKIF_PROTOCOL_X86_32:
  181. {
  182. struct blkif_x86_32_sring *sring_x86_32;
  183. sring_x86_32 = (struct blkif_x86_32_sring *)ring->blk_ring;
  184. BACK_RING_INIT(&ring->blk_rings.x86_32, sring_x86_32,
  185. XEN_PAGE_SIZE * nr_grefs);
  186. break;
  187. }
  188. case BLKIF_PROTOCOL_X86_64:
  189. {
  190. struct blkif_x86_64_sring *sring_x86_64;
  191. sring_x86_64 = (struct blkif_x86_64_sring *)ring->blk_ring;
  192. BACK_RING_INIT(&ring->blk_rings.x86_64, sring_x86_64,
  193. XEN_PAGE_SIZE * nr_grefs);
  194. break;
  195. }
  196. default:
  197. BUG();
  198. }
  199. err = bind_interdomain_evtchn_to_irqhandler(blkif->domid, evtchn,
  200. xen_blkif_be_int, 0,
  201. "blkif-backend", ring);
  202. if (err < 0) {
  203. xenbus_unmap_ring_vfree(blkif->be->dev, ring->blk_ring);
  204. ring->blk_rings.common.sring = NULL;
  205. return err;
  206. }
  207. ring->irq = err;
  208. return 0;
  209. }
  210. static int xen_blkif_disconnect(struct xen_blkif *blkif)
  211. {
  212. struct pending_req *req, *n;
  213. unsigned int j, r;
  214. bool busy = false;
  215. for (r = 0; r < blkif->nr_rings; r++) {
  216. struct xen_blkif_ring *ring = &blkif->rings[r];
  217. unsigned int i = 0;
  218. if (!ring->active)
  219. continue;
  220. if (ring->xenblkd) {
  221. kthread_stop(ring->xenblkd);
  222. wake_up(&ring->shutdown_wq);
  223. }
  224. /* The above kthread_stop() guarantees that at this point we
  225. * don't have any discard_io or other_io requests. So, checking
  226. * for inflight IO is enough.
  227. */
  228. if (atomic_read(&ring->inflight) > 0) {
  229. busy = true;
  230. continue;
  231. }
  232. if (ring->irq) {
  233. unbind_from_irqhandler(ring->irq, ring);
  234. ring->irq = 0;
  235. }
  236. if (ring->blk_rings.common.sring) {
  237. xenbus_unmap_ring_vfree(blkif->be->dev, ring->blk_ring);
  238. ring->blk_rings.common.sring = NULL;
  239. }
  240. /* Remove all persistent grants and the cache of ballooned pages. */
  241. xen_blkbk_free_caches(ring);
  242. /* Check that there is no request in use */
  243. list_for_each_entry_safe(req, n, &ring->pending_free, free_list) {
  244. list_del(&req->free_list);
  245. for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++)
  246. kfree(req->segments[j]);
  247. for (j = 0; j < MAX_INDIRECT_PAGES; j++)
  248. kfree(req->indirect_pages[j]);
  249. kfree(req);
  250. i++;
  251. }
  252. BUG_ON(atomic_read(&ring->persistent_gnt_in_use) != 0);
  253. BUG_ON(!list_empty(&ring->persistent_purge_list));
  254. BUG_ON(!RB_EMPTY_ROOT(&ring->persistent_gnts));
  255. BUG_ON(!list_empty(&ring->free_pages));
  256. BUG_ON(ring->free_pages_num != 0);
  257. BUG_ON(ring->persistent_gnt_c != 0);
  258. WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));
  259. ring->active = false;
  260. }
  261. if (busy)
  262. return -EBUSY;
  263. blkif->nr_ring_pages = 0;
  264. /*
  265. * blkif->rings was allocated in connect_ring, so we should free it in
  266. * here.
  267. */
  268. kfree(blkif->rings);
  269. blkif->rings = NULL;
  270. blkif->nr_rings = 0;
  271. return 0;
  272. }
  273. static void xen_blkif_free(struct xen_blkif *blkif)
  274. {
  275. WARN_ON(xen_blkif_disconnect(blkif));
  276. xen_vbd_free(&blkif->vbd);
  277. kfree(blkif->be->mode);
  278. kfree(blkif->be);
  279. /* Make sure everything is drained before shutting down */
  280. kmem_cache_free(xen_blkif_cachep, blkif);
  281. module_put(THIS_MODULE);
  282. }
  283. int __init xen_blkif_interface_init(void)
  284. {
  285. xen_blkif_cachep = kmem_cache_create("blkif_cache",
  286. sizeof(struct xen_blkif),
  287. 0, 0, NULL);
  288. if (!xen_blkif_cachep)
  289. return -ENOMEM;
  290. return 0;
  291. }
  292. /*
  293. * sysfs interface for VBD I/O requests
  294. */
  295. #define VBD_SHOW_ALLRING(name, format) \
  296. static ssize_t show_##name(struct device *_dev, \
  297. struct device_attribute *attr, \
  298. char *buf) \
  299. { \
  300. struct xenbus_device *dev = to_xenbus_device(_dev); \
  301. struct backend_info *be = dev_get_drvdata(&dev->dev); \
  302. struct xen_blkif *blkif = be->blkif; \
  303. unsigned int i; \
  304. unsigned long long result = 0; \
  305. \
  306. if (!blkif->rings) \
  307. goto out; \
  308. \
  309. for (i = 0; i < blkif->nr_rings; i++) { \
  310. struct xen_blkif_ring *ring = &blkif->rings[i]; \
  311. \
  312. result += ring->st_##name; \
  313. } \
  314. \
  315. out: \
  316. return sprintf(buf, format, result); \
  317. } \
  318. static DEVICE_ATTR(name, 0444, show_##name, NULL)
  319. VBD_SHOW_ALLRING(oo_req, "%llu\n");
  320. VBD_SHOW_ALLRING(rd_req, "%llu\n");
  321. VBD_SHOW_ALLRING(wr_req, "%llu\n");
  322. VBD_SHOW_ALLRING(f_req, "%llu\n");
  323. VBD_SHOW_ALLRING(ds_req, "%llu\n");
  324. VBD_SHOW_ALLRING(rd_sect, "%llu\n");
  325. VBD_SHOW_ALLRING(wr_sect, "%llu\n");
  326. static struct attribute *xen_vbdstat_attrs[] = {
  327. &dev_attr_oo_req.attr,
  328. &dev_attr_rd_req.attr,
  329. &dev_attr_wr_req.attr,
  330. &dev_attr_f_req.attr,
  331. &dev_attr_ds_req.attr,
  332. &dev_attr_rd_sect.attr,
  333. &dev_attr_wr_sect.attr,
  334. NULL
  335. };
  336. static const struct attribute_group xen_vbdstat_group = {
  337. .name = "statistics",
  338. .attrs = xen_vbdstat_attrs,
  339. };
  340. #define VBD_SHOW(name, format, args...) \
  341. static ssize_t show_##name(struct device *_dev, \
  342. struct device_attribute *attr, \
  343. char *buf) \
  344. { \
  345. struct xenbus_device *dev = to_xenbus_device(_dev); \
  346. struct backend_info *be = dev_get_drvdata(&dev->dev); \
  347. \
  348. return sprintf(buf, format, ##args); \
  349. } \
  350. static DEVICE_ATTR(name, 0444, show_##name, NULL)
  351. VBD_SHOW(physical_device, "%x:%x\n", be->major, be->minor);
  352. VBD_SHOW(mode, "%s\n", be->mode);
  353. static int xenvbd_sysfs_addif(struct xenbus_device *dev)
  354. {
  355. int error;
  356. error = device_create_file(&dev->dev, &dev_attr_physical_device);
  357. if (error)
  358. goto fail1;
  359. error = device_create_file(&dev->dev, &dev_attr_mode);
  360. if (error)
  361. goto fail2;
  362. error = sysfs_create_group(&dev->dev.kobj, &xen_vbdstat_group);
  363. if (error)
  364. goto fail3;
  365. return 0;
  366. fail3: sysfs_remove_group(&dev->dev.kobj, &xen_vbdstat_group);
  367. fail2: device_remove_file(&dev->dev, &dev_attr_mode);
  368. fail1: device_remove_file(&dev->dev, &dev_attr_physical_device);
  369. return error;
  370. }
  371. static void xenvbd_sysfs_delif(struct xenbus_device *dev)
  372. {
  373. sysfs_remove_group(&dev->dev.kobj, &xen_vbdstat_group);
  374. device_remove_file(&dev->dev, &dev_attr_mode);
  375. device_remove_file(&dev->dev, &dev_attr_physical_device);
  376. }
  377. static void xen_vbd_free(struct xen_vbd *vbd)
  378. {
  379. if (vbd->bdev)
  380. blkdev_put(vbd->bdev, vbd->readonly ? FMODE_READ : FMODE_WRITE);
  381. vbd->bdev = NULL;
  382. }
  383. static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
  384. unsigned major, unsigned minor, int readonly,
  385. int cdrom)
  386. {
  387. struct xen_vbd *vbd;
  388. struct block_device *bdev;
  389. struct request_queue *q;
  390. vbd = &blkif->vbd;
  391. vbd->handle = handle;
  392. vbd->readonly = readonly;
  393. vbd->type = 0;
  394. vbd->pdevice = MKDEV(major, minor);
  395. bdev = blkdev_get_by_dev(vbd->pdevice, vbd->readonly ?
  396. FMODE_READ : FMODE_WRITE, NULL);
  397. if (IS_ERR(bdev)) {
  398. pr_warn("xen_vbd_create: device %08x could not be opened\n",
  399. vbd->pdevice);
  400. return -ENOENT;
  401. }
  402. vbd->bdev = bdev;
  403. if (vbd->bdev->bd_disk == NULL) {
  404. pr_warn("xen_vbd_create: device %08x doesn't exist\n",
  405. vbd->pdevice);
  406. xen_vbd_free(vbd);
  407. return -ENOENT;
  408. }
  409. vbd->size = vbd_sz(vbd);
  410. if (vbd->bdev->bd_disk->flags & GENHD_FL_CD || cdrom)
  411. vbd->type |= VDISK_CDROM;
  412. if (vbd->bdev->bd_disk->flags & GENHD_FL_REMOVABLE)
  413. vbd->type |= VDISK_REMOVABLE;
  414. q = bdev_get_queue(bdev);
  415. if (q && test_bit(QUEUE_FLAG_WC, &q->queue_flags))
  416. vbd->flush_support = true;
  417. if (q && blk_queue_secure_erase(q))
  418. vbd->discard_secure = true;
  419. pr_debug("Successful creation of handle=%04x (dom=%u)\n",
  420. handle, blkif->domid);
  421. return 0;
  422. }
  423. static int xen_blkbk_remove(struct xenbus_device *dev)
  424. {
  425. struct backend_info *be = dev_get_drvdata(&dev->dev);
  426. pr_debug("%s %p %d\n", __func__, dev, dev->otherend_id);
  427. if (be->major || be->minor)
  428. xenvbd_sysfs_delif(dev);
  429. if (be->backend_watch.node) {
  430. unregister_xenbus_watch(&be->backend_watch);
  431. kfree(be->backend_watch.node);
  432. be->backend_watch.node = NULL;
  433. }
  434. dev_set_drvdata(&dev->dev, NULL);
  435. if (be->blkif) {
  436. xen_blkif_disconnect(be->blkif);
  437. /* Put the reference we set in xen_blkif_alloc(). */
  438. xen_blkif_put(be->blkif);
  439. }
  440. return 0;
  441. }
  442. int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
  443. struct backend_info *be, int state)
  444. {
  445. struct xenbus_device *dev = be->dev;
  446. int err;
  447. err = xenbus_printf(xbt, dev->nodename, "feature-flush-cache",
  448. "%d", state);
  449. if (err)
  450. dev_warn(&dev->dev, "writing feature-flush-cache (%d)", err);
  451. return err;
  452. }
  453. static void xen_blkbk_discard(struct xenbus_transaction xbt, struct backend_info *be)
  454. {
  455. struct xenbus_device *dev = be->dev;
  456. struct xen_blkif *blkif = be->blkif;
  457. int err;
  458. int state = 0;
  459. struct block_device *bdev = be->blkif->vbd.bdev;
  460. struct request_queue *q = bdev_get_queue(bdev);
  461. if (!xenbus_read_unsigned(dev->nodename, "discard-enable", 1))
  462. return;
  463. if (blk_queue_discard(q)) {
  464. err = xenbus_printf(xbt, dev->nodename,
  465. "discard-granularity", "%u",
  466. q->limits.discard_granularity);
  467. if (err) {
  468. dev_warn(&dev->dev, "writing discard-granularity (%d)", err);
  469. return;
  470. }
  471. err = xenbus_printf(xbt, dev->nodename,
  472. "discard-alignment", "%u",
  473. q->limits.discard_alignment);
  474. if (err) {
  475. dev_warn(&dev->dev, "writing discard-alignment (%d)", err);
  476. return;
  477. }
  478. state = 1;
  479. /* Optional. */
  480. err = xenbus_printf(xbt, dev->nodename,
  481. "discard-secure", "%d",
  482. blkif->vbd.discard_secure);
  483. if (err) {
  484. dev_warn(&dev->dev, "writing discard-secure (%d)", err);
  485. return;
  486. }
  487. }
  488. err = xenbus_printf(xbt, dev->nodename, "feature-discard",
  489. "%d", state);
  490. if (err)
  491. dev_warn(&dev->dev, "writing feature-discard (%d)", err);
  492. }
  493. int xen_blkbk_barrier(struct xenbus_transaction xbt,
  494. struct backend_info *be, int state)
  495. {
  496. struct xenbus_device *dev = be->dev;
  497. int err;
  498. err = xenbus_printf(xbt, dev->nodename, "feature-barrier",
  499. "%d", state);
  500. if (err)
  501. dev_warn(&dev->dev, "writing feature-barrier (%d)", err);
  502. return err;
  503. }
  504. /*
  505. * Entry point to this code when a new device is created. Allocate the basic
  506. * structures, and watch the store waiting for the hotplug scripts to tell us
  507. * the device's physical major and minor numbers. Switch to InitWait.
  508. */
  509. static int xen_blkbk_probe(struct xenbus_device *dev,
  510. const struct xenbus_device_id *id)
  511. {
  512. int err;
  513. struct backend_info *be = kzalloc(sizeof(struct backend_info),
  514. GFP_KERNEL);
  515. /* match the pr_debug in xen_blkbk_remove */
  516. pr_debug("%s %p %d\n", __func__, dev, dev->otherend_id);
  517. if (!be) {
  518. xenbus_dev_fatal(dev, -ENOMEM,
  519. "allocating backend structure");
  520. return -ENOMEM;
  521. }
  522. be->dev = dev;
  523. dev_set_drvdata(&dev->dev, be);
  524. be->blkif = xen_blkif_alloc(dev->otherend_id);
  525. if (IS_ERR(be->blkif)) {
  526. err = PTR_ERR(be->blkif);
  527. be->blkif = NULL;
  528. xenbus_dev_fatal(dev, err, "creating block interface");
  529. goto fail;
  530. }
  531. err = xenbus_printf(XBT_NIL, dev->nodename,
  532. "feature-max-indirect-segments", "%u",
  533. MAX_INDIRECT_SEGMENTS);
  534. if (err)
  535. dev_warn(&dev->dev,
  536. "writing %s/feature-max-indirect-segments (%d)",
  537. dev->nodename, err);
  538. /* Multi-queue: advertise how many queues are supported by us.*/
  539. err = xenbus_printf(XBT_NIL, dev->nodename,
  540. "multi-queue-max-queues", "%u", xenblk_max_queues);
  541. if (err)
  542. pr_warn("Error writing multi-queue-max-queues\n");
  543. /* setup back pointer */
  544. be->blkif->be = be;
  545. err = xenbus_watch_pathfmt(dev, &be->backend_watch, backend_changed,
  546. "%s/%s", dev->nodename, "physical-device");
  547. if (err)
  548. goto fail;
  549. err = xenbus_printf(XBT_NIL, dev->nodename, "max-ring-page-order", "%u",
  550. xen_blkif_max_ring_order);
  551. if (err)
  552. pr_warn("%s write out 'max-ring-page-order' failed\n", __func__);
  553. err = xenbus_switch_state(dev, XenbusStateInitWait);
  554. if (err)
  555. goto fail;
  556. return 0;
  557. fail:
  558. pr_warn("%s failed\n", __func__);
  559. xen_blkbk_remove(dev);
  560. return err;
  561. }
  562. /*
  563. * Callback received when the hotplug scripts have placed the physical-device
  564. * node. Read it and the mode node, and create a vbd. If the frontend is
  565. * ready, connect.
  566. */
  567. static void backend_changed(struct xenbus_watch *watch,
  568. const char *path, const char *token)
  569. {
  570. int err;
  571. unsigned major;
  572. unsigned minor;
  573. struct backend_info *be
  574. = container_of(watch, struct backend_info, backend_watch);
  575. struct xenbus_device *dev = be->dev;
  576. int cdrom = 0;
  577. unsigned long handle;
  578. char *device_type;
  579. pr_debug("%s %p %d\n", __func__, dev, dev->otherend_id);
  580. err = xenbus_scanf(XBT_NIL, dev->nodename, "physical-device", "%x:%x",
  581. &major, &minor);
  582. if (XENBUS_EXIST_ERR(err)) {
  583. /*
  584. * Since this watch will fire once immediately after it is
  585. * registered, we expect this. Ignore it, and wait for the
  586. * hotplug scripts.
  587. */
  588. return;
  589. }
  590. if (err != 2) {
  591. xenbus_dev_fatal(dev, err, "reading physical-device");
  592. return;
  593. }
  594. if (be->major | be->minor) {
  595. if (be->major != major || be->minor != minor)
  596. pr_warn("changing physical device (from %x:%x to %x:%x) not supported.\n",
  597. be->major, be->minor, major, minor);
  598. return;
  599. }
  600. be->mode = xenbus_read(XBT_NIL, dev->nodename, "mode", NULL);
  601. if (IS_ERR(be->mode)) {
  602. err = PTR_ERR(be->mode);
  603. be->mode = NULL;
  604. xenbus_dev_fatal(dev, err, "reading mode");
  605. return;
  606. }
  607. device_type = xenbus_read(XBT_NIL, dev->otherend, "device-type", NULL);
  608. if (!IS_ERR(device_type)) {
  609. cdrom = strcmp(device_type, "cdrom") == 0;
  610. kfree(device_type);
  611. }
  612. /* Front end dir is a number, which is used as the handle. */
  613. err = kstrtoul(strrchr(dev->otherend, '/') + 1, 0, &handle);
  614. if (err) {
  615. kfree(be->mode);
  616. be->mode = NULL;
  617. return;
  618. }
  619. be->major = major;
  620. be->minor = minor;
  621. err = xen_vbd_create(be->blkif, handle, major, minor,
  622. !strchr(be->mode, 'w'), cdrom);
  623. if (err)
  624. xenbus_dev_fatal(dev, err, "creating vbd structure");
  625. else {
  626. err = xenvbd_sysfs_addif(dev);
  627. if (err) {
  628. xen_vbd_free(&be->blkif->vbd);
  629. xenbus_dev_fatal(dev, err, "creating sysfs entries");
  630. }
  631. }
  632. if (err) {
  633. kfree(be->mode);
  634. be->mode = NULL;
  635. be->major = 0;
  636. be->minor = 0;
  637. } else {
  638. /* We're potentially connected now */
  639. xen_update_blkif_status(be->blkif);
  640. }
  641. }
  642. /*
  643. * Callback received when the frontend's state changes.
  644. */
  645. static void frontend_changed(struct xenbus_device *dev,
  646. enum xenbus_state frontend_state)
  647. {
  648. struct backend_info *be = dev_get_drvdata(&dev->dev);
  649. int err;
  650. pr_debug("%s %p %s\n", __func__, dev, xenbus_strstate(frontend_state));
  651. switch (frontend_state) {
  652. case XenbusStateInitialising:
  653. if (dev->state == XenbusStateClosed) {
  654. pr_info("%s: prepare for reconnect\n", dev->nodename);
  655. xenbus_switch_state(dev, XenbusStateInitWait);
  656. }
  657. break;
  658. case XenbusStateInitialised:
  659. case XenbusStateConnected:
  660. /*
  661. * Ensure we connect even when two watches fire in
  662. * close succession and we miss the intermediate value
  663. * of frontend_state.
  664. */
  665. if (dev->state == XenbusStateConnected)
  666. break;
  667. /*
  668. * Enforce precondition before potential leak point.
  669. * xen_blkif_disconnect() is idempotent.
  670. */
  671. err = xen_blkif_disconnect(be->blkif);
  672. if (err) {
  673. xenbus_dev_fatal(dev, err, "pending I/O");
  674. break;
  675. }
  676. err = connect_ring(be);
  677. if (err) {
  678. /*
  679. * Clean up so that memory resources can be used by
  680. * other devices. connect_ring reported already error.
  681. */
  682. xen_blkif_disconnect(be->blkif);
  683. break;
  684. }
  685. xen_update_blkif_status(be->blkif);
  686. break;
  687. case XenbusStateClosing:
  688. xenbus_switch_state(dev, XenbusStateClosing);
  689. break;
  690. case XenbusStateClosed:
  691. xen_blkif_disconnect(be->blkif);
  692. xenbus_switch_state(dev, XenbusStateClosed);
  693. if (xenbus_dev_is_online(dev))
  694. break;
  695. /* fall through */
  696. /* if not online */
  697. case XenbusStateUnknown:
  698. /* implies xen_blkif_disconnect() via xen_blkbk_remove() */
  699. device_unregister(&dev->dev);
  700. break;
  701. default:
  702. xenbus_dev_fatal(dev, -EINVAL, "saw state %d at frontend",
  703. frontend_state);
  704. break;
  705. }
  706. }
  707. /* ** Connection ** */
  708. /*
  709. * Write the physical details regarding the block device to the store, and
  710. * switch to Connected state.
  711. */
  712. static void connect(struct backend_info *be)
  713. {
  714. struct xenbus_transaction xbt;
  715. int err;
  716. struct xenbus_device *dev = be->dev;
  717. pr_debug("%s %s\n", __func__, dev->otherend);
  718. /* Supply the information about the device the frontend needs */
  719. again:
  720. err = xenbus_transaction_start(&xbt);
  721. if (err) {
  722. xenbus_dev_fatal(dev, err, "starting transaction");
  723. return;
  724. }
  725. /* If we can't advertise it is OK. */
  726. xen_blkbk_flush_diskcache(xbt, be, be->blkif->vbd.flush_support);
  727. xen_blkbk_discard(xbt, be);
  728. xen_blkbk_barrier(xbt, be, be->blkif->vbd.flush_support);
  729. err = xenbus_printf(xbt, dev->nodename, "feature-persistent", "%u", 1);
  730. if (err) {
  731. xenbus_dev_fatal(dev, err, "writing %s/feature-persistent",
  732. dev->nodename);
  733. goto abort;
  734. }
  735. err = xenbus_printf(xbt, dev->nodename, "sectors", "%llu",
  736. (unsigned long long)vbd_sz(&be->blkif->vbd));
  737. if (err) {
  738. xenbus_dev_fatal(dev, err, "writing %s/sectors",
  739. dev->nodename);
  740. goto abort;
  741. }
  742. /* FIXME: use a typename instead */
  743. err = xenbus_printf(xbt, dev->nodename, "info", "%u",
  744. be->blkif->vbd.type |
  745. (be->blkif->vbd.readonly ? VDISK_READONLY : 0));
  746. if (err) {
  747. xenbus_dev_fatal(dev, err, "writing %s/info",
  748. dev->nodename);
  749. goto abort;
  750. }
  751. err = xenbus_printf(xbt, dev->nodename, "sector-size", "%lu",
  752. (unsigned long)
  753. bdev_logical_block_size(be->blkif->vbd.bdev));
  754. if (err) {
  755. xenbus_dev_fatal(dev, err, "writing %s/sector-size",
  756. dev->nodename);
  757. goto abort;
  758. }
  759. err = xenbus_printf(xbt, dev->nodename, "physical-sector-size", "%u",
  760. bdev_physical_block_size(be->blkif->vbd.bdev));
  761. if (err)
  762. xenbus_dev_error(dev, err, "writing %s/physical-sector-size",
  763. dev->nodename);
  764. err = xenbus_transaction_end(xbt, 0);
  765. if (err == -EAGAIN)
  766. goto again;
  767. if (err)
  768. xenbus_dev_fatal(dev, err, "ending transaction");
  769. err = xenbus_switch_state(dev, XenbusStateConnected);
  770. if (err)
  771. xenbus_dev_fatal(dev, err, "%s: switching to Connected state",
  772. dev->nodename);
  773. return;
  774. abort:
  775. xenbus_transaction_end(xbt, 1);
  776. }
  777. /*
  778. * Each ring may have multi pages, depends on "ring-page-order".
  779. */
  780. static int read_per_ring_refs(struct xen_blkif_ring *ring, const char *dir)
  781. {
  782. unsigned int ring_ref[XENBUS_MAX_RING_GRANTS];
  783. struct pending_req *req, *n;
  784. int err, i, j;
  785. struct xen_blkif *blkif = ring->blkif;
  786. struct xenbus_device *dev = blkif->be->dev;
  787. unsigned int ring_page_order, nr_grefs, evtchn;
  788. err = xenbus_scanf(XBT_NIL, dir, "event-channel", "%u",
  789. &evtchn);
  790. if (err != 1) {
  791. err = -EINVAL;
  792. xenbus_dev_fatal(dev, err, "reading %s/event-channel", dir);
  793. return err;
  794. }
  795. err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-page-order", "%u",
  796. &ring_page_order);
  797. if (err != 1) {
  798. err = xenbus_scanf(XBT_NIL, dir, "ring-ref", "%u", &ring_ref[0]);
  799. if (err != 1) {
  800. err = -EINVAL;
  801. xenbus_dev_fatal(dev, err, "reading %s/ring-ref", dir);
  802. return err;
  803. }
  804. nr_grefs = 1;
  805. } else {
  806. unsigned int i;
  807. if (ring_page_order > xen_blkif_max_ring_order) {
  808. err = -EINVAL;
  809. xenbus_dev_fatal(dev, err, "%s/request %d ring page order exceed max:%d",
  810. dir, ring_page_order,
  811. xen_blkif_max_ring_order);
  812. return err;
  813. }
  814. nr_grefs = 1 << ring_page_order;
  815. for (i = 0; i < nr_grefs; i++) {
  816. char ring_ref_name[RINGREF_NAME_LEN];
  817. snprintf(ring_ref_name, RINGREF_NAME_LEN, "ring-ref%u", i);
  818. err = xenbus_scanf(XBT_NIL, dir, ring_ref_name,
  819. "%u", &ring_ref[i]);
  820. if (err != 1) {
  821. err = -EINVAL;
  822. xenbus_dev_fatal(dev, err, "reading %s/%s",
  823. dir, ring_ref_name);
  824. return err;
  825. }
  826. }
  827. }
  828. blkif->nr_ring_pages = nr_grefs;
  829. err = -ENOMEM;
  830. for (i = 0; i < nr_grefs * XEN_BLKIF_REQS_PER_PAGE; i++) {
  831. req = kzalloc(sizeof(*req), GFP_KERNEL);
  832. if (!req)
  833. goto fail;
  834. list_add_tail(&req->free_list, &ring->pending_free);
  835. for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
  836. req->segments[j] = kzalloc(sizeof(*req->segments[0]), GFP_KERNEL);
  837. if (!req->segments[j])
  838. goto fail;
  839. }
  840. for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
  841. req->indirect_pages[j] = kzalloc(sizeof(*req->indirect_pages[0]),
  842. GFP_KERNEL);
  843. if (!req->indirect_pages[j])
  844. goto fail;
  845. }
  846. }
  847. /* Map the shared frame, irq etc. */
  848. err = xen_blkif_map(ring, ring_ref, nr_grefs, evtchn);
  849. if (err) {
  850. xenbus_dev_fatal(dev, err, "mapping ring-ref port %u", evtchn);
  851. goto fail;
  852. }
  853. return 0;
  854. fail:
  855. list_for_each_entry_safe(req, n, &ring->pending_free, free_list) {
  856. list_del(&req->free_list);
  857. for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++) {
  858. if (!req->segments[j])
  859. break;
  860. kfree(req->segments[j]);
  861. }
  862. for (j = 0; j < MAX_INDIRECT_PAGES; j++) {
  863. if (!req->indirect_pages[j])
  864. break;
  865. kfree(req->indirect_pages[j]);
  866. }
  867. kfree(req);
  868. }
  869. return err;
  870. }
  871. static int connect_ring(struct backend_info *be)
  872. {
  873. struct xenbus_device *dev = be->dev;
  874. unsigned int pers_grants;
  875. char protocol[64] = "";
  876. int err, i;
  877. char *xspath;
  878. size_t xspathsize;
  879. const size_t xenstore_path_ext_size = 11; /* sufficient for "/queue-NNN" */
  880. unsigned int requested_num_queues = 0;
  881. pr_debug("%s %s\n", __func__, dev->otherend);
  882. be->blkif->blk_protocol = BLKIF_PROTOCOL_DEFAULT;
  883. err = xenbus_scanf(XBT_NIL, dev->otherend, "protocol",
  884. "%63s", protocol);
  885. if (err <= 0)
  886. strcpy(protocol, "unspecified, assuming default");
  887. else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_NATIVE))
  888. be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE;
  889. else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_32))
  890. be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_32;
  891. else if (0 == strcmp(protocol, XEN_IO_PROTO_ABI_X86_64))
  892. be->blkif->blk_protocol = BLKIF_PROTOCOL_X86_64;
  893. else {
  894. xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol);
  895. return -ENOSYS;
  896. }
  897. pers_grants = xenbus_read_unsigned(dev->otherend, "feature-persistent",
  898. 0);
  899. be->blkif->vbd.feature_gnt_persistent = pers_grants;
  900. be->blkif->vbd.overflow_max_grants = 0;
  901. /*
  902. * Read the number of hardware queues from frontend.
  903. */
  904. requested_num_queues = xenbus_read_unsigned(dev->otherend,
  905. "multi-queue-num-queues",
  906. 1);
  907. if (requested_num_queues > xenblk_max_queues
  908. || requested_num_queues == 0) {
  909. /* Buggy or malicious guest. */
  910. xenbus_dev_fatal(dev, err,
  911. "guest requested %u queues, exceeding the maximum of %u.",
  912. requested_num_queues, xenblk_max_queues);
  913. return -ENOSYS;
  914. }
  915. be->blkif->nr_rings = requested_num_queues;
  916. if (xen_blkif_alloc_rings(be->blkif))
  917. return -ENOMEM;
  918. pr_info("%s: using %d queues, protocol %d (%s) %s\n", dev->nodename,
  919. be->blkif->nr_rings, be->blkif->blk_protocol, protocol,
  920. pers_grants ? "persistent grants" : "");
  921. if (be->blkif->nr_rings == 1)
  922. return read_per_ring_refs(&be->blkif->rings[0], dev->otherend);
  923. else {
  924. xspathsize = strlen(dev->otherend) + xenstore_path_ext_size;
  925. xspath = kmalloc(xspathsize, GFP_KERNEL);
  926. if (!xspath) {
  927. xenbus_dev_fatal(dev, -ENOMEM, "reading ring references");
  928. return -ENOMEM;
  929. }
  930. for (i = 0; i < be->blkif->nr_rings; i++) {
  931. memset(xspath, 0, xspathsize);
  932. snprintf(xspath, xspathsize, "%s/queue-%u", dev->otherend, i);
  933. err = read_per_ring_refs(&be->blkif->rings[i], xspath);
  934. if (err) {
  935. kfree(xspath);
  936. return err;
  937. }
  938. }
  939. kfree(xspath);
  940. }
  941. return 0;
  942. }
  943. static const struct xenbus_device_id xen_blkbk_ids[] = {
  944. { "vbd" },
  945. { "" }
  946. };
  947. static struct xenbus_driver xen_blkbk_driver = {
  948. .ids = xen_blkbk_ids,
  949. .probe = xen_blkbk_probe,
  950. .remove = xen_blkbk_remove,
  951. .otherend_changed = frontend_changed
  952. };
  953. int xen_blkif_xenbus_init(void)
  954. {
  955. return xenbus_register_backend(&xen_blkbk_driver);
  956. }