blocklayout.c 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412
  1. /*
  2. * Copyright (c) 2014-2016 Christoph Hellwig.
  3. */
  4. #include <linux/exportfs.h>
  5. #include <linux/iomap.h>
  6. #include <linux/genhd.h>
  7. #include <linux/slab.h>
  8. #include <linux/pr.h>
  9. #include <linux/nfsd/debug.h>
  10. #include <scsi/scsi_proto.h>
  11. #include <scsi/scsi_common.h>
  12. #include "blocklayoutxdr.h"
  13. #include "pnfs.h"
  14. #define NFSDDBG_FACILITY NFSDDBG_PNFS
  15. static __be32
  16. nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
  17. struct nfsd4_layoutget *args)
  18. {
  19. struct nfsd4_layout_seg *seg = &args->lg_seg;
  20. struct super_block *sb = inode->i_sb;
  21. u32 block_size = i_blocksize(inode);
  22. struct pnfs_block_extent *bex;
  23. struct iomap iomap;
  24. u32 device_generation = 0;
  25. int error;
  26. if (seg->offset & (block_size - 1)) {
  27. dprintk("pnfsd: I/O misaligned\n");
  28. goto out_layoutunavailable;
  29. }
  30. /*
  31. * Some clients barf on non-zero block numbers for NONE or INVALID
  32. * layouts, so make sure to zero the whole structure.
  33. */
  34. error = -ENOMEM;
  35. bex = kzalloc(sizeof(*bex), GFP_KERNEL);
  36. if (!bex)
  37. goto out_error;
  38. args->lg_content = bex;
  39. error = sb->s_export_op->map_blocks(inode, seg->offset, seg->length,
  40. &iomap, seg->iomode != IOMODE_READ,
  41. &device_generation);
  42. if (error) {
  43. if (error == -ENXIO)
  44. goto out_layoutunavailable;
  45. goto out_error;
  46. }
  47. if (iomap.length < args->lg_minlength) {
  48. dprintk("pnfsd: extent smaller than minlength\n");
  49. goto out_layoutunavailable;
  50. }
  51. switch (iomap.type) {
  52. case IOMAP_MAPPED:
  53. if (seg->iomode == IOMODE_READ)
  54. bex->es = PNFS_BLOCK_READ_DATA;
  55. else
  56. bex->es = PNFS_BLOCK_READWRITE_DATA;
  57. bex->soff = (iomap.blkno << 9);
  58. break;
  59. case IOMAP_UNWRITTEN:
  60. if (seg->iomode & IOMODE_RW) {
  61. /*
  62. * Crack monkey special case from section 2.3.1.
  63. */
  64. if (args->lg_minlength == 0) {
  65. dprintk("pnfsd: no soup for you!\n");
  66. goto out_layoutunavailable;
  67. }
  68. bex->es = PNFS_BLOCK_INVALID_DATA;
  69. bex->soff = (iomap.blkno << 9);
  70. break;
  71. }
  72. /*FALLTHRU*/
  73. case IOMAP_HOLE:
  74. if (seg->iomode == IOMODE_READ) {
  75. bex->es = PNFS_BLOCK_NONE_DATA;
  76. break;
  77. }
  78. /*FALLTHRU*/
  79. case IOMAP_DELALLOC:
  80. default:
  81. WARN(1, "pnfsd: filesystem returned %d extent\n", iomap.type);
  82. goto out_layoutunavailable;
  83. }
  84. error = nfsd4_set_deviceid(&bex->vol_id, fhp, device_generation);
  85. if (error)
  86. goto out_error;
  87. bex->foff = iomap.offset;
  88. bex->len = iomap.length;
  89. seg->offset = iomap.offset;
  90. seg->length = iomap.length;
  91. dprintk("GET: 0x%llx:0x%llx %d\n", bex->foff, bex->len, bex->es);
  92. return 0;
  93. out_error:
  94. seg->length = 0;
  95. return nfserrno(error);
  96. out_layoutunavailable:
  97. seg->length = 0;
  98. return nfserr_layoutunavailable;
  99. }
  100. static __be32
  101. nfsd4_block_commit_blocks(struct inode *inode, struct nfsd4_layoutcommit *lcp,
  102. struct iomap *iomaps, int nr_iomaps)
  103. {
  104. loff_t new_size = lcp->lc_last_wr + 1;
  105. struct iattr iattr = { .ia_valid = 0 };
  106. int error;
  107. if (lcp->lc_mtime.tv_nsec == UTIME_NOW ||
  108. timespec_compare(&lcp->lc_mtime, &inode->i_mtime) < 0)
  109. lcp->lc_mtime = current_time(inode);
  110. iattr.ia_valid |= ATTR_ATIME | ATTR_CTIME | ATTR_MTIME;
  111. iattr.ia_atime = iattr.ia_ctime = iattr.ia_mtime = lcp->lc_mtime;
  112. if (new_size > i_size_read(inode)) {
  113. iattr.ia_valid |= ATTR_SIZE;
  114. iattr.ia_size = new_size;
  115. }
  116. error = inode->i_sb->s_export_op->commit_blocks(inode, iomaps,
  117. nr_iomaps, &iattr);
  118. kfree(iomaps);
  119. return nfserrno(error);
  120. }
  121. #ifdef CONFIG_NFSD_BLOCKLAYOUT
  122. static int
  123. nfsd4_block_get_device_info_simple(struct super_block *sb,
  124. struct nfsd4_getdeviceinfo *gdp)
  125. {
  126. struct pnfs_block_deviceaddr *dev;
  127. struct pnfs_block_volume *b;
  128. dev = kzalloc(sizeof(struct pnfs_block_deviceaddr) +
  129. sizeof(struct pnfs_block_volume), GFP_KERNEL);
  130. if (!dev)
  131. return -ENOMEM;
  132. gdp->gd_device = dev;
  133. dev->nr_volumes = 1;
  134. b = &dev->volumes[0];
  135. b->type = PNFS_BLOCK_VOLUME_SIMPLE;
  136. b->simple.sig_len = PNFS_BLOCK_UUID_LEN;
  137. return sb->s_export_op->get_uuid(sb, b->simple.sig, &b->simple.sig_len,
  138. &b->simple.offset);
  139. }
  140. static __be32
  141. nfsd4_block_proc_getdeviceinfo(struct super_block *sb,
  142. struct svc_rqst *rqstp,
  143. struct nfs4_client *clp,
  144. struct nfsd4_getdeviceinfo *gdp)
  145. {
  146. if (sb->s_bdev != sb->s_bdev->bd_contains)
  147. return nfserr_inval;
  148. return nfserrno(nfsd4_block_get_device_info_simple(sb, gdp));
  149. }
  150. static __be32
  151. nfsd4_block_proc_layoutcommit(struct inode *inode,
  152. struct nfsd4_layoutcommit *lcp)
  153. {
  154. struct iomap *iomaps;
  155. int nr_iomaps;
  156. nr_iomaps = nfsd4_block_decode_layoutupdate(lcp->lc_up_layout,
  157. lcp->lc_up_len, &iomaps, i_blocksize(inode));
  158. if (nr_iomaps < 0)
  159. return nfserrno(nr_iomaps);
  160. return nfsd4_block_commit_blocks(inode, lcp, iomaps, nr_iomaps);
  161. }
  162. const struct nfsd4_layout_ops bl_layout_ops = {
  163. /*
  164. * Pretend that we send notification to the client. This is a blatant
  165. * lie to force recent Linux clients to cache our device IDs.
  166. * We rarely ever change the device ID, so the harm of leaking deviceids
  167. * for a while isn't too bad. Unfortunately RFC5661 is a complete mess
  168. * in this regard, but I filed errata 4119 for this a while ago, and
  169. * hopefully the Linux client will eventually start caching deviceids
  170. * without this again.
  171. */
  172. .notify_types =
  173. NOTIFY_DEVICEID4_DELETE | NOTIFY_DEVICEID4_CHANGE,
  174. .proc_getdeviceinfo = nfsd4_block_proc_getdeviceinfo,
  175. .encode_getdeviceinfo = nfsd4_block_encode_getdeviceinfo,
  176. .proc_layoutget = nfsd4_block_proc_layoutget,
  177. .encode_layoutget = nfsd4_block_encode_layoutget,
  178. .proc_layoutcommit = nfsd4_block_proc_layoutcommit,
  179. };
  180. #endif /* CONFIG_NFSD_BLOCKLAYOUT */
  181. #ifdef CONFIG_NFSD_SCSILAYOUT
  182. static int nfsd4_scsi_identify_device(struct block_device *bdev,
  183. struct pnfs_block_volume *b)
  184. {
  185. struct request_queue *q = bdev->bd_disk->queue;
  186. struct request *rq;
  187. size_t bufflen = 252, len, id_len;
  188. u8 *buf, *d, type, assoc;
  189. int error;
  190. buf = kzalloc(bufflen, GFP_KERNEL);
  191. if (!buf)
  192. return -ENOMEM;
  193. rq = blk_get_request(q, READ, GFP_KERNEL);
  194. if (IS_ERR(rq)) {
  195. error = -ENOMEM;
  196. goto out_free_buf;
  197. }
  198. blk_rq_set_block_pc(rq);
  199. error = blk_rq_map_kern(q, rq, buf, bufflen, GFP_KERNEL);
  200. if (error)
  201. goto out_put_request;
  202. rq->cmd[0] = INQUIRY;
  203. rq->cmd[1] = 1;
  204. rq->cmd[2] = 0x83;
  205. rq->cmd[3] = bufflen >> 8;
  206. rq->cmd[4] = bufflen & 0xff;
  207. rq->cmd_len = COMMAND_SIZE(INQUIRY);
  208. error = blk_execute_rq(rq->q, NULL, rq, 1);
  209. if (error) {
  210. pr_err("pNFS: INQUIRY 0x83 failed with: %x\n",
  211. rq->errors);
  212. goto out_put_request;
  213. }
  214. len = (buf[2] << 8) + buf[3] + 4;
  215. if (len > bufflen) {
  216. pr_err("pNFS: INQUIRY 0x83 response invalid (len = %zd)\n",
  217. len);
  218. goto out_put_request;
  219. }
  220. d = buf + 4;
  221. for (d = buf + 4; d < buf + len; d += id_len + 4) {
  222. id_len = d[3];
  223. type = d[1] & 0xf;
  224. assoc = (d[1] >> 4) & 0x3;
  225. /*
  226. * We only care about a EUI-64 and NAA designator types
  227. * with LU association.
  228. */
  229. if (assoc != 0x00)
  230. continue;
  231. if (type != 0x02 && type != 0x03)
  232. continue;
  233. if (id_len != 8 && id_len != 12 && id_len != 16)
  234. continue;
  235. b->scsi.code_set = PS_CODE_SET_BINARY;
  236. b->scsi.designator_type = type == 0x02 ?
  237. PS_DESIGNATOR_EUI64 : PS_DESIGNATOR_NAA;
  238. b->scsi.designator_len = id_len;
  239. memcpy(b->scsi.designator, d + 4, id_len);
  240. /*
  241. * If we found a 8 or 12 byte descriptor continue on to
  242. * see if a 16 byte one is available. If we find a
  243. * 16 byte descriptor we're done.
  244. */
  245. if (id_len == 16)
  246. break;
  247. }
  248. out_put_request:
  249. blk_put_request(rq);
  250. out_free_buf:
  251. kfree(buf);
  252. return error;
  253. }
  254. #define NFSD_MDS_PR_KEY 0x0100000000000000ULL
  255. /*
  256. * We use the client ID as a unique key for the reservations.
  257. * This allows us to easily fence a client when recalls fail.
  258. */
  259. static u64 nfsd4_scsi_pr_key(struct nfs4_client *clp)
  260. {
  261. return ((u64)clp->cl_clientid.cl_boot << 32) | clp->cl_clientid.cl_id;
  262. }
  263. static int
  264. nfsd4_block_get_device_info_scsi(struct super_block *sb,
  265. struct nfs4_client *clp,
  266. struct nfsd4_getdeviceinfo *gdp)
  267. {
  268. struct pnfs_block_deviceaddr *dev;
  269. struct pnfs_block_volume *b;
  270. const struct pr_ops *ops;
  271. int error;
  272. dev = kzalloc(sizeof(struct pnfs_block_deviceaddr) +
  273. sizeof(struct pnfs_block_volume), GFP_KERNEL);
  274. if (!dev)
  275. return -ENOMEM;
  276. gdp->gd_device = dev;
  277. dev->nr_volumes = 1;
  278. b = &dev->volumes[0];
  279. b->type = PNFS_BLOCK_VOLUME_SCSI;
  280. b->scsi.pr_key = nfsd4_scsi_pr_key(clp);
  281. error = nfsd4_scsi_identify_device(sb->s_bdev, b);
  282. if (error)
  283. return error;
  284. ops = sb->s_bdev->bd_disk->fops->pr_ops;
  285. if (!ops) {
  286. pr_err("pNFS: device %s does not support PRs.\n",
  287. sb->s_id);
  288. return -EINVAL;
  289. }
  290. error = ops->pr_register(sb->s_bdev, 0, NFSD_MDS_PR_KEY, true);
  291. if (error) {
  292. pr_err("pNFS: failed to register key for device %s.\n",
  293. sb->s_id);
  294. return -EINVAL;
  295. }
  296. error = ops->pr_reserve(sb->s_bdev, NFSD_MDS_PR_KEY,
  297. PR_EXCLUSIVE_ACCESS_REG_ONLY, 0);
  298. if (error) {
  299. pr_err("pNFS: failed to reserve device %s.\n",
  300. sb->s_id);
  301. return -EINVAL;
  302. }
  303. return 0;
  304. }
  305. static __be32
  306. nfsd4_scsi_proc_getdeviceinfo(struct super_block *sb,
  307. struct svc_rqst *rqstp,
  308. struct nfs4_client *clp,
  309. struct nfsd4_getdeviceinfo *gdp)
  310. {
  311. if (sb->s_bdev != sb->s_bdev->bd_contains)
  312. return nfserr_inval;
  313. return nfserrno(nfsd4_block_get_device_info_scsi(sb, clp, gdp));
  314. }
  315. static __be32
  316. nfsd4_scsi_proc_layoutcommit(struct inode *inode,
  317. struct nfsd4_layoutcommit *lcp)
  318. {
  319. struct iomap *iomaps;
  320. int nr_iomaps;
  321. nr_iomaps = nfsd4_scsi_decode_layoutupdate(lcp->lc_up_layout,
  322. lcp->lc_up_len, &iomaps, i_blocksize(inode));
  323. if (nr_iomaps < 0)
  324. return nfserrno(nr_iomaps);
  325. return nfsd4_block_commit_blocks(inode, lcp, iomaps, nr_iomaps);
  326. }
  327. static void
  328. nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls)
  329. {
  330. struct nfs4_client *clp = ls->ls_stid.sc_client;
  331. struct block_device *bdev = ls->ls_file->f_path.mnt->mnt_sb->s_bdev;
  332. bdev->bd_disk->fops->pr_ops->pr_preempt(bdev, NFSD_MDS_PR_KEY,
  333. nfsd4_scsi_pr_key(clp), 0, true);
  334. }
  335. const struct nfsd4_layout_ops scsi_layout_ops = {
  336. /*
  337. * Pretend that we send notification to the client. This is a blatant
  338. * lie to force recent Linux clients to cache our device IDs.
  339. * We rarely ever change the device ID, so the harm of leaking deviceids
  340. * for a while isn't too bad. Unfortunately RFC5661 is a complete mess
  341. * in this regard, but I filed errata 4119 for this a while ago, and
  342. * hopefully the Linux client will eventually start caching deviceids
  343. * without this again.
  344. */
  345. .notify_types =
  346. NOTIFY_DEVICEID4_DELETE | NOTIFY_DEVICEID4_CHANGE,
  347. .proc_getdeviceinfo = nfsd4_scsi_proc_getdeviceinfo,
  348. .encode_getdeviceinfo = nfsd4_block_encode_getdeviceinfo,
  349. .proc_layoutget = nfsd4_block_proc_layoutget,
  350. .encode_layoutget = nfsd4_block_encode_layoutget,
  351. .proc_layoutcommit = nfsd4_scsi_proc_layoutcommit,
  352. .fence_client = nfsd4_scsi_fence_client,
  353. };
  354. #endif /* CONFIG_NFSD_SCSILAYOUT */