fmr_ops.c 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright (c) 2015, 2017 Oracle. All rights reserved.
  4. * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
  5. */
  6. /* Lightweight memory registration using Fast Memory Regions (FMR).
  7. * Referred to sometimes as MTHCAFMR mode.
  8. *
  9. * FMR uses synchronous memory registration and deregistration.
  10. * FMR registration is known to be fast, but FMR deregistration
  11. * can take tens of usecs to complete.
  12. */
  13. /* Normal operation
  14. *
  15. * A Memory Region is prepared for RDMA READ or WRITE using the
  16. * ib_map_phys_fmr verb (fmr_op_map). When the RDMA operation is
  17. * finished, the Memory Region is unmapped using the ib_unmap_fmr
  18. * verb (fmr_op_unmap).
  19. */
  20. #include <linux/sunrpc/svc_rdma.h>
  21. #include "xprt_rdma.h"
  22. #include <trace/events/rpcrdma.h>
  23. #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
  24. # define RPCDBG_FACILITY RPCDBG_TRANS
  25. #endif
  26. /* Maximum scatter/gather per FMR */
  27. #define RPCRDMA_MAX_FMR_SGES (64)
  28. /* Access mode of externally registered pages */
  29. enum {
  30. RPCRDMA_FMR_ACCESS_FLAGS = IB_ACCESS_REMOTE_WRITE |
  31. IB_ACCESS_REMOTE_READ,
  32. };
  33. bool
  34. fmr_is_supported(struct rpcrdma_ia *ia)
  35. {
  36. if (!ia->ri_device->alloc_fmr) {
  37. pr_info("rpcrdma: 'fmr' mode is not supported by device %s\n",
  38. ia->ri_device->name);
  39. return false;
  40. }
  41. return true;
  42. }
  43. static int
  44. fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
  45. {
  46. static struct ib_fmr_attr fmr_attr = {
  47. .max_pages = RPCRDMA_MAX_FMR_SGES,
  48. .max_maps = 1,
  49. .page_shift = PAGE_SHIFT
  50. };
  51. mr->fmr.fm_physaddrs = kcalloc(RPCRDMA_MAX_FMR_SGES,
  52. sizeof(u64), GFP_KERNEL);
  53. if (!mr->fmr.fm_physaddrs)
  54. goto out_free;
  55. mr->mr_sg = kcalloc(RPCRDMA_MAX_FMR_SGES,
  56. sizeof(*mr->mr_sg), GFP_KERNEL);
  57. if (!mr->mr_sg)
  58. goto out_free;
  59. sg_init_table(mr->mr_sg, RPCRDMA_MAX_FMR_SGES);
  60. mr->fmr.fm_mr = ib_alloc_fmr(ia->ri_pd, RPCRDMA_FMR_ACCESS_FLAGS,
  61. &fmr_attr);
  62. if (IS_ERR(mr->fmr.fm_mr))
  63. goto out_fmr_err;
  64. INIT_LIST_HEAD(&mr->mr_list);
  65. return 0;
  66. out_fmr_err:
  67. dprintk("RPC: %s: ib_alloc_fmr returned %ld\n", __func__,
  68. PTR_ERR(mr->fmr.fm_mr));
  69. out_free:
  70. kfree(mr->mr_sg);
  71. kfree(mr->fmr.fm_physaddrs);
  72. return -ENOMEM;
  73. }
  74. static int
  75. __fmr_unmap(struct rpcrdma_mr *mr)
  76. {
  77. LIST_HEAD(l);
  78. int rc;
  79. list_add(&mr->fmr.fm_mr->list, &l);
  80. rc = ib_unmap_fmr(&l);
  81. list_del(&mr->fmr.fm_mr->list);
  82. return rc;
  83. }
  84. static void
  85. fmr_op_release_mr(struct rpcrdma_mr *mr)
  86. {
  87. LIST_HEAD(unmap_list);
  88. int rc;
  89. kfree(mr->fmr.fm_physaddrs);
  90. kfree(mr->mr_sg);
  91. /* In case this one was left mapped, try to unmap it
  92. * to prevent dealloc_fmr from failing with EBUSY
  93. */
  94. rc = __fmr_unmap(mr);
  95. if (rc)
  96. pr_err("rpcrdma: final ib_unmap_fmr for %p failed %i\n",
  97. mr, rc);
  98. rc = ib_dealloc_fmr(mr->fmr.fm_mr);
  99. if (rc)
  100. pr_err("rpcrdma: final ib_dealloc_fmr for %p returned %i\n",
  101. mr, rc);
  102. kfree(mr);
  103. }
  104. /* Reset of a single FMR.
  105. */
  106. static void
  107. fmr_op_recover_mr(struct rpcrdma_mr *mr)
  108. {
  109. struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
  110. int rc;
  111. /* ORDER: invalidate first */
  112. rc = __fmr_unmap(mr);
  113. if (rc)
  114. goto out_release;
  115. /* ORDER: then DMA unmap */
  116. rpcrdma_mr_unmap_and_put(mr);
  117. r_xprt->rx_stats.mrs_recovered++;
  118. return;
  119. out_release:
  120. pr_err("rpcrdma: FMR reset failed (%d), %p released\n", rc, mr);
  121. r_xprt->rx_stats.mrs_orphaned++;
  122. trace_xprtrdma_dma_unmap(mr);
  123. ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
  124. mr->mr_sg, mr->mr_nents, mr->mr_dir);
  125. spin_lock(&r_xprt->rx_buf.rb_mrlock);
  126. list_del(&mr->mr_all);
  127. spin_unlock(&r_xprt->rx_buf.rb_mrlock);
  128. fmr_op_release_mr(mr);
  129. }
  130. /* On success, sets:
  131. * ep->rep_attr.cap.max_send_wr
  132. * ep->rep_attr.cap.max_recv_wr
  133. * cdata->max_requests
  134. * ia->ri_max_segs
  135. */
  136. static int
  137. fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
  138. struct rpcrdma_create_data_internal *cdata)
  139. {
  140. int max_qp_wr;
  141. max_qp_wr = ia->ri_device->attrs.max_qp_wr;
  142. max_qp_wr -= RPCRDMA_BACKWARD_WRS;
  143. max_qp_wr -= 1;
  144. if (max_qp_wr < RPCRDMA_MIN_SLOT_TABLE)
  145. return -ENOMEM;
  146. if (cdata->max_requests > max_qp_wr)
  147. cdata->max_requests = max_qp_wr;
  148. ep->rep_attr.cap.max_send_wr = cdata->max_requests;
  149. ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS;
  150. ep->rep_attr.cap.max_send_wr += 1; /* for ib_drain_sq */
  151. ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
  152. ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
  153. ep->rep_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */
  154. ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
  155. RPCRDMA_MAX_FMR_SGES);
  156. return 0;
  157. }
  158. /* FMR mode conveys up to 64 pages of payload per chunk segment.
  159. */
  160. static size_t
  161. fmr_op_maxpages(struct rpcrdma_xprt *r_xprt)
  162. {
  163. return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
  164. RPCRDMA_MAX_HDR_SEGS * RPCRDMA_MAX_FMR_SGES);
  165. }
  166. /* Use the ib_map_phys_fmr() verb to register a memory region
  167. * for remote access via RDMA READ or RDMA WRITE.
  168. */
  169. static struct rpcrdma_mr_seg *
  170. fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
  171. int nsegs, bool writing, struct rpcrdma_mr **out)
  172. {
  173. struct rpcrdma_mr_seg *seg1 = seg;
  174. int len, pageoff, i, rc;
  175. struct rpcrdma_mr *mr;
  176. u64 *dma_pages;
  177. mr = rpcrdma_mr_get(r_xprt);
  178. if (!mr)
  179. return ERR_PTR(-EAGAIN);
  180. pageoff = offset_in_page(seg1->mr_offset);
  181. seg1->mr_offset -= pageoff; /* start of page */
  182. seg1->mr_len += pageoff;
  183. len = -pageoff;
  184. if (nsegs > RPCRDMA_MAX_FMR_SGES)
  185. nsegs = RPCRDMA_MAX_FMR_SGES;
  186. for (i = 0; i < nsegs;) {
  187. if (seg->mr_page)
  188. sg_set_page(&mr->mr_sg[i],
  189. seg->mr_page,
  190. seg->mr_len,
  191. offset_in_page(seg->mr_offset));
  192. else
  193. sg_set_buf(&mr->mr_sg[i], seg->mr_offset,
  194. seg->mr_len);
  195. len += seg->mr_len;
  196. ++seg;
  197. ++i;
  198. /* Check for holes */
  199. if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
  200. offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
  201. break;
  202. }
  203. mr->mr_dir = rpcrdma_data_dir(writing);
  204. mr->mr_nents = ib_dma_map_sg(r_xprt->rx_ia.ri_device,
  205. mr->mr_sg, i, mr->mr_dir);
  206. if (!mr->mr_nents)
  207. goto out_dmamap_err;
  208. trace_xprtrdma_dma_map(mr);
  209. for (i = 0, dma_pages = mr->fmr.fm_physaddrs; i < mr->mr_nents; i++)
  210. dma_pages[i] = sg_dma_address(&mr->mr_sg[i]);
  211. rc = ib_map_phys_fmr(mr->fmr.fm_mr, dma_pages, mr->mr_nents,
  212. dma_pages[0]);
  213. if (rc)
  214. goto out_maperr;
  215. mr->mr_handle = mr->fmr.fm_mr->rkey;
  216. mr->mr_length = len;
  217. mr->mr_offset = dma_pages[0] + pageoff;
  218. *out = mr;
  219. return seg;
  220. out_dmamap_err:
  221. pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n",
  222. mr->mr_sg, i);
  223. rpcrdma_mr_put(mr);
  224. return ERR_PTR(-EIO);
  225. out_maperr:
  226. pr_err("rpcrdma: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n",
  227. len, (unsigned long long)dma_pages[0],
  228. pageoff, mr->mr_nents, rc);
  229. rpcrdma_mr_unmap_and_put(mr);
  230. return ERR_PTR(-EIO);
  231. }
  232. /* Post Send WR containing the RPC Call message.
  233. */
  234. static int
  235. fmr_op_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
  236. {
  237. return ib_post_send(ia->ri_id->qp, &req->rl_sendctx->sc_wr, NULL);
  238. }
  239. /* Invalidate all memory regions that were registered for "req".
  240. *
  241. * Sleeps until it is safe for the host CPU to access the
  242. * previously mapped memory regions.
  243. *
  244. * Caller ensures that @mrs is not empty before the call. This
  245. * function empties the list.
  246. */
  247. static void
  248. fmr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
  249. {
  250. struct rpcrdma_mr *mr;
  251. LIST_HEAD(unmap_list);
  252. int rc;
  253. /* ORDER: Invalidate all of the req's MRs first
  254. *
  255. * ib_unmap_fmr() is slow, so use a single call instead
  256. * of one call per mapped FMR.
  257. */
  258. list_for_each_entry(mr, mrs, mr_list) {
  259. dprintk("RPC: %s: unmapping fmr %p\n",
  260. __func__, &mr->fmr);
  261. trace_xprtrdma_localinv(mr);
  262. list_add_tail(&mr->fmr.fm_mr->list, &unmap_list);
  263. }
  264. r_xprt->rx_stats.local_inv_needed++;
  265. rc = ib_unmap_fmr(&unmap_list);
  266. if (rc)
  267. goto out_reset;
  268. /* ORDER: Now DMA unmap all of the req's MRs, and return
  269. * them to the free MW list.
  270. */
  271. while (!list_empty(mrs)) {
  272. mr = rpcrdma_mr_pop(mrs);
  273. list_del(&mr->fmr.fm_mr->list);
  274. rpcrdma_mr_unmap_and_put(mr);
  275. }
  276. return;
  277. out_reset:
  278. pr_err("rpcrdma: ib_unmap_fmr failed (%i)\n", rc);
  279. while (!list_empty(mrs)) {
  280. mr = rpcrdma_mr_pop(mrs);
  281. list_del(&mr->fmr.fm_mr->list);
  282. fmr_op_recover_mr(mr);
  283. }
  284. }
  285. const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
  286. .ro_map = fmr_op_map,
  287. .ro_send = fmr_op_send,
  288. .ro_unmap_sync = fmr_op_unmap_sync,
  289. .ro_recover_mr = fmr_op_recover_mr,
  290. .ro_open = fmr_op_open,
  291. .ro_maxpages = fmr_op_maxpages,
  292. .ro_init_mr = fmr_op_init_mr,
  293. .ro_release_mr = fmr_op_release_mr,
  294. .ro_displayname = "fmr",
  295. .ro_send_w_inv_ok = 0,
  296. };