blk-zoned.c 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353
  1. /*
  2. * Zoned block device handling
  3. *
  4. * Copyright (c) 2015, Hannes Reinecke
  5. * Copyright (c) 2015, SUSE Linux GmbH
  6. *
  7. * Copyright (c) 2016, Damien Le Moal
  8. * Copyright (c) 2016, Western Digital
  9. */
  10. #include <linux/kernel.h>
  11. #include <linux/module.h>
  12. #include <linux/rbtree.h>
  13. #include <linux/blkdev.h>
  14. static inline sector_t blk_zone_start(struct request_queue *q,
  15. sector_t sector)
  16. {
  17. sector_t zone_mask = blk_queue_zone_sectors(q) - 1;
  18. return sector & ~zone_mask;
  19. }
  20. /*
  21. * Check that a zone report belongs to the partition.
  22. * If yes, fix its start sector and write pointer, copy it in the
  23. * zone information array and return true. Return false otherwise.
  24. */
  25. static bool blkdev_report_zone(struct block_device *bdev,
  26. struct blk_zone *rep,
  27. struct blk_zone *zone)
  28. {
  29. sector_t offset = get_start_sect(bdev);
  30. if (rep->start < offset)
  31. return false;
  32. rep->start -= offset;
  33. if (rep->start + rep->len > bdev->bd_part->nr_sects)
  34. return false;
  35. if (rep->type == BLK_ZONE_TYPE_CONVENTIONAL)
  36. rep->wp = rep->start + rep->len;
  37. else
  38. rep->wp -= offset;
  39. memcpy(zone, rep, sizeof(struct blk_zone));
  40. return true;
  41. }
  42. /**
  43. * blkdev_report_zones - Get zones information
  44. * @bdev: Target block device
  45. * @sector: Sector from which to report zones
  46. * @zones: Array of zone structures where to return the zones information
  47. * @nr_zones: Number of zone structures in the zone array
  48. * @gfp_mask: Memory allocation flags (for bio_alloc)
  49. *
  50. * Description:
  51. * Get zone information starting from the zone containing @sector.
  52. * The number of zone information reported may be less than the number
  53. * requested by @nr_zones. The number of zones actually reported is
  54. * returned in @nr_zones.
  55. */
  56. int blkdev_report_zones(struct block_device *bdev,
  57. sector_t sector,
  58. struct blk_zone *zones,
  59. unsigned int *nr_zones,
  60. gfp_t gfp_mask)
  61. {
  62. struct request_queue *q = bdev_get_queue(bdev);
  63. struct blk_zone_report_hdr *hdr;
  64. unsigned int nrz = *nr_zones;
  65. struct page *page;
  66. unsigned int nr_rep;
  67. size_t rep_bytes;
  68. unsigned int nr_pages;
  69. struct bio *bio;
  70. struct bio_vec *bv;
  71. unsigned int i, n, nz;
  72. unsigned int ofst;
  73. void *addr;
  74. int ret;
  75. if (!q)
  76. return -ENXIO;
  77. if (!blk_queue_is_zoned(q))
  78. return -EOPNOTSUPP;
  79. if (!nrz)
  80. return 0;
  81. if (sector > bdev->bd_part->nr_sects) {
  82. *nr_zones = 0;
  83. return 0;
  84. }
  85. /*
  86. * The zone report has a header. So make room for it in the
  87. * payload. Also make sure that the report fits in a single BIO
  88. * that will not be split down the stack.
  89. */
  90. rep_bytes = sizeof(struct blk_zone_report_hdr) +
  91. sizeof(struct blk_zone) * nrz;
  92. rep_bytes = (rep_bytes + PAGE_SIZE - 1) & PAGE_MASK;
  93. if (rep_bytes > (queue_max_sectors(q) << 9))
  94. rep_bytes = queue_max_sectors(q) << 9;
  95. nr_pages = min_t(unsigned int, BIO_MAX_PAGES,
  96. rep_bytes >> PAGE_SHIFT);
  97. nr_pages = min_t(unsigned int, nr_pages,
  98. queue_max_segments(q));
  99. bio = bio_alloc(gfp_mask, nr_pages);
  100. if (!bio)
  101. return -ENOMEM;
  102. bio_set_dev(bio, bdev);
  103. bio->bi_iter.bi_sector = blk_zone_start(q, sector);
  104. bio_set_op_attrs(bio, REQ_OP_ZONE_REPORT, 0);
  105. for (i = 0; i < nr_pages; i++) {
  106. page = alloc_page(gfp_mask);
  107. if (!page) {
  108. ret = -ENOMEM;
  109. goto out;
  110. }
  111. if (!bio_add_page(bio, page, PAGE_SIZE, 0)) {
  112. __free_page(page);
  113. break;
  114. }
  115. }
  116. if (i == 0)
  117. ret = -ENOMEM;
  118. else
  119. ret = submit_bio_wait(bio);
  120. if (ret)
  121. goto out;
  122. /*
  123. * Process the report result: skip the header and go through the
  124. * reported zones to fixup and fixup the zone information for
  125. * partitions. At the same time, return the zone information into
  126. * the zone array.
  127. */
  128. n = 0;
  129. nz = 0;
  130. nr_rep = 0;
  131. bio_for_each_segment_all(bv, bio, i) {
  132. if (!bv->bv_page)
  133. break;
  134. addr = kmap_atomic(bv->bv_page);
  135. /* Get header in the first page */
  136. ofst = 0;
  137. if (!nr_rep) {
  138. hdr = (struct blk_zone_report_hdr *) addr;
  139. nr_rep = hdr->nr_zones;
  140. ofst = sizeof(struct blk_zone_report_hdr);
  141. }
  142. /* Fixup and report zones */
  143. while (ofst < bv->bv_len &&
  144. n < nr_rep && nz < nrz) {
  145. if (blkdev_report_zone(bdev, addr + ofst, &zones[nz]))
  146. nz++;
  147. ofst += sizeof(struct blk_zone);
  148. n++;
  149. }
  150. kunmap_atomic(addr);
  151. if (n >= nr_rep || nz >= nrz)
  152. break;
  153. }
  154. *nr_zones = nz;
  155. out:
  156. bio_for_each_segment_all(bv, bio, i)
  157. __free_page(bv->bv_page);
  158. bio_put(bio);
  159. return ret;
  160. }
  161. EXPORT_SYMBOL_GPL(blkdev_report_zones);
  162. /**
  163. * blkdev_reset_zones - Reset zones write pointer
  164. * @bdev: Target block device
  165. * @sector: Start sector of the first zone to reset
  166. * @nr_sectors: Number of sectors, at least the length of one zone
  167. * @gfp_mask: Memory allocation flags (for bio_alloc)
  168. *
  169. * Description:
  170. * Reset the write pointer of the zones contained in the range
  171. * @sector..@sector+@nr_sectors. Specifying the entire disk sector range
  172. * is valid, but the specified range should not contain conventional zones.
  173. */
  174. int blkdev_reset_zones(struct block_device *bdev,
  175. sector_t sector, sector_t nr_sectors,
  176. gfp_t gfp_mask)
  177. {
  178. struct request_queue *q = bdev_get_queue(bdev);
  179. sector_t zone_sectors;
  180. sector_t end_sector = sector + nr_sectors;
  181. struct bio *bio;
  182. int ret;
  183. if (!q)
  184. return -ENXIO;
  185. if (!blk_queue_is_zoned(q))
  186. return -EOPNOTSUPP;
  187. if (end_sector > bdev->bd_part->nr_sects)
  188. /* Out of range */
  189. return -EINVAL;
  190. /* Check alignment (handle eventual smaller last zone) */
  191. zone_sectors = blk_queue_zone_sectors(q);
  192. if (sector & (zone_sectors - 1))
  193. return -EINVAL;
  194. if ((nr_sectors & (zone_sectors - 1)) &&
  195. end_sector != bdev->bd_part->nr_sects)
  196. return -EINVAL;
  197. while (sector < end_sector) {
  198. bio = bio_alloc(gfp_mask, 0);
  199. bio->bi_iter.bi_sector = sector;
  200. bio_set_dev(bio, bdev);
  201. bio_set_op_attrs(bio, REQ_OP_ZONE_RESET, 0);
  202. ret = submit_bio_wait(bio);
  203. bio_put(bio);
  204. if (ret)
  205. return ret;
  206. sector += zone_sectors;
  207. /* This may take a while, so be nice to others */
  208. cond_resched();
  209. }
  210. return 0;
  211. }
  212. EXPORT_SYMBOL_GPL(blkdev_reset_zones);
  213. /**
  214. * BLKREPORTZONE ioctl processing.
  215. * Called from blkdev_ioctl.
  216. */
  217. int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
  218. unsigned int cmd, unsigned long arg)
  219. {
  220. void __user *argp = (void __user *)arg;
  221. struct request_queue *q;
  222. struct blk_zone_report rep;
  223. struct blk_zone *zones;
  224. int ret;
  225. if (!argp)
  226. return -EINVAL;
  227. q = bdev_get_queue(bdev);
  228. if (!q)
  229. return -ENXIO;
  230. if (!blk_queue_is_zoned(q))
  231. return -ENOTTY;
  232. if (!capable(CAP_SYS_ADMIN))
  233. return -EACCES;
  234. if (copy_from_user(&rep, argp, sizeof(struct blk_zone_report)))
  235. return -EFAULT;
  236. if (!rep.nr_zones)
  237. return -EINVAL;
  238. if (rep.nr_zones > INT_MAX / sizeof(struct blk_zone))
  239. return -ERANGE;
  240. zones = kvmalloc(rep.nr_zones * sizeof(struct blk_zone),
  241. GFP_KERNEL | __GFP_ZERO);
  242. if (!zones)
  243. return -ENOMEM;
  244. ret = blkdev_report_zones(bdev, rep.sector,
  245. zones, &rep.nr_zones,
  246. GFP_KERNEL);
  247. if (ret)
  248. goto out;
  249. if (copy_to_user(argp, &rep, sizeof(struct blk_zone_report))) {
  250. ret = -EFAULT;
  251. goto out;
  252. }
  253. if (rep.nr_zones) {
  254. if (copy_to_user(argp + sizeof(struct blk_zone_report), zones,
  255. sizeof(struct blk_zone) * rep.nr_zones))
  256. ret = -EFAULT;
  257. }
  258. out:
  259. kvfree(zones);
  260. return ret;
  261. }
  262. /**
  263. * BLKRESETZONE ioctl processing.
  264. * Called from blkdev_ioctl.
  265. */
  266. int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
  267. unsigned int cmd, unsigned long arg)
  268. {
  269. void __user *argp = (void __user *)arg;
  270. struct request_queue *q;
  271. struct blk_zone_range zrange;
  272. if (!argp)
  273. return -EINVAL;
  274. q = bdev_get_queue(bdev);
  275. if (!q)
  276. return -ENXIO;
  277. if (!blk_queue_is_zoned(q))
  278. return -ENOTTY;
  279. if (!capable(CAP_SYS_ADMIN))
  280. return -EACCES;
  281. if (!(mode & FMODE_WRITE))
  282. return -EBADF;
  283. if (copy_from_user(&zrange, argp, sizeof(struct blk_zone_range)))
  284. return -EFAULT;
  285. return blkdev_reset_zones(bdev, zrange.sector, zrange.nr_sectors,
  286. GFP_KERNEL);
  287. }