fmr_ops.c 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241
  1. /*
  2. * Copyright (c) 2015 Oracle. All rights reserved.
  3. * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
  4. */
  5. /* Lightweight memory registration using Fast Memory Regions (FMR).
  6. * Referred to sometimes as MTHCAFMR mode.
  7. *
  8. * FMR uses synchronous memory registration and deregistration.
  9. * FMR registration is known to be fast, but FMR deregistration
  10. * can take tens of usecs to complete.
  11. */
  12. /* Normal operation
  13. *
  14. * A Memory Region is prepared for RDMA READ or WRITE using the
  15. * ib_map_phys_fmr verb (fmr_op_map). When the RDMA operation is
  16. * finished, the Memory Region is unmapped using the ib_unmap_fmr
  17. * verb (fmr_op_unmap).
  18. */
  19. /* Transport recovery
  20. *
  21. * After a transport reconnect, fmr_op_map re-uses the MR already
  22. * allocated for the RPC, but generates a fresh rkey then maps the
  23. * MR again. This process is synchronous.
  24. */
  25. #include "xprt_rdma.h"
  26. #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
  27. # define RPCDBG_FACILITY RPCDBG_TRANS
  28. #endif
  29. /* Maximum scatter/gather per FMR */
  30. #define RPCRDMA_MAX_FMR_SGES (64)
  31. static int
  32. fmr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
  33. struct rpcrdma_create_data_internal *cdata)
  34. {
  35. return 0;
  36. }
  37. /* FMR mode conveys up to 64 pages of payload per chunk segment.
  38. */
  39. static size_t
  40. fmr_op_maxpages(struct rpcrdma_xprt *r_xprt)
  41. {
  42. return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
  43. rpcrdma_max_segments(r_xprt) * RPCRDMA_MAX_FMR_SGES);
  44. }
  45. static int
  46. fmr_op_init(struct rpcrdma_xprt *r_xprt)
  47. {
  48. struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
  49. int mr_access_flags = IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ;
  50. struct ib_fmr_attr fmr_attr = {
  51. .max_pages = RPCRDMA_MAX_FMR_SGES,
  52. .max_maps = 1,
  53. .page_shift = PAGE_SHIFT
  54. };
  55. struct ib_pd *pd = r_xprt->rx_ia.ri_pd;
  56. struct rpcrdma_mw *r;
  57. int i, rc;
  58. spin_lock_init(&buf->rb_mwlock);
  59. INIT_LIST_HEAD(&buf->rb_mws);
  60. INIT_LIST_HEAD(&buf->rb_all);
  61. i = max_t(int, RPCRDMA_MAX_DATA_SEGS / RPCRDMA_MAX_FMR_SGES, 1);
  62. i += 2; /* head + tail */
  63. i *= buf->rb_max_requests; /* one set for each RPC slot */
  64. dprintk("RPC: %s: initalizing %d FMRs\n", __func__, i);
  65. rc = -ENOMEM;
  66. while (i--) {
  67. r = kzalloc(sizeof(*r), GFP_KERNEL);
  68. if (!r)
  69. goto out;
  70. r->r.fmr.physaddrs = kmalloc(RPCRDMA_MAX_FMR_SGES *
  71. sizeof(u64), GFP_KERNEL);
  72. if (!r->r.fmr.physaddrs)
  73. goto out_free;
  74. r->r.fmr.fmr = ib_alloc_fmr(pd, mr_access_flags, &fmr_attr);
  75. if (IS_ERR(r->r.fmr.fmr))
  76. goto out_fmr_err;
  77. list_add(&r->mw_list, &buf->rb_mws);
  78. list_add(&r->mw_all, &buf->rb_all);
  79. }
  80. return 0;
  81. out_fmr_err:
  82. rc = PTR_ERR(r->r.fmr.fmr);
  83. dprintk("RPC: %s: ib_alloc_fmr status %i\n", __func__, rc);
  84. kfree(r->r.fmr.physaddrs);
  85. out_free:
  86. kfree(r);
  87. out:
  88. return rc;
  89. }
  90. static int
  91. __fmr_unmap(struct rpcrdma_mw *r)
  92. {
  93. LIST_HEAD(l);
  94. list_add(&r->r.fmr.fmr->list, &l);
  95. return ib_unmap_fmr(&l);
  96. }
  97. /* Use the ib_map_phys_fmr() verb to register a memory region
  98. * for remote access via RDMA READ or RDMA WRITE.
  99. */
  100. static int
  101. fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
  102. int nsegs, bool writing)
  103. {
  104. struct rpcrdma_ia *ia = &r_xprt->rx_ia;
  105. struct ib_device *device = ia->ri_device;
  106. enum dma_data_direction direction = rpcrdma_data_dir(writing);
  107. struct rpcrdma_mr_seg *seg1 = seg;
  108. int len, pageoff, i, rc;
  109. struct rpcrdma_mw *mw;
  110. mw = seg1->rl_mw;
  111. seg1->rl_mw = NULL;
  112. if (!mw) {
  113. mw = rpcrdma_get_mw(r_xprt);
  114. if (!mw)
  115. return -ENOMEM;
  116. } else {
  117. /* this is a retransmit; generate a fresh rkey */
  118. rc = __fmr_unmap(mw);
  119. if (rc)
  120. return rc;
  121. }
  122. pageoff = offset_in_page(seg1->mr_offset);
  123. seg1->mr_offset -= pageoff; /* start of page */
  124. seg1->mr_len += pageoff;
  125. len = -pageoff;
  126. if (nsegs > RPCRDMA_MAX_FMR_SGES)
  127. nsegs = RPCRDMA_MAX_FMR_SGES;
  128. for (i = 0; i < nsegs;) {
  129. rpcrdma_map_one(device, seg, direction);
  130. mw->r.fmr.physaddrs[i] = seg->mr_dma;
  131. len += seg->mr_len;
  132. ++seg;
  133. ++i;
  134. /* Check for holes */
  135. if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
  136. offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
  137. break;
  138. }
  139. rc = ib_map_phys_fmr(mw->r.fmr.fmr, mw->r.fmr.physaddrs,
  140. i, seg1->mr_dma);
  141. if (rc)
  142. goto out_maperr;
  143. seg1->rl_mw = mw;
  144. seg1->mr_rkey = mw->r.fmr.fmr->rkey;
  145. seg1->mr_base = seg1->mr_dma + pageoff;
  146. seg1->mr_nsegs = i;
  147. seg1->mr_len = len;
  148. return i;
  149. out_maperr:
  150. dprintk("RPC: %s: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n",
  151. __func__, len, (unsigned long long)seg1->mr_dma,
  152. pageoff, i, rc);
  153. while (i--)
  154. rpcrdma_unmap_one(device, --seg);
  155. return rc;
  156. }
  157. /* Use the ib_unmap_fmr() verb to prevent further remote
  158. * access via RDMA READ or RDMA WRITE.
  159. */
  160. static int
  161. fmr_op_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg)
  162. {
  163. struct rpcrdma_ia *ia = &r_xprt->rx_ia;
  164. struct rpcrdma_mr_seg *seg1 = seg;
  165. struct rpcrdma_mw *mw = seg1->rl_mw;
  166. int rc, nsegs = seg->mr_nsegs;
  167. dprintk("RPC: %s: FMR %p\n", __func__, mw);
  168. seg1->rl_mw = NULL;
  169. while (seg1->mr_nsegs--)
  170. rpcrdma_unmap_one(ia->ri_device, seg++);
  171. rc = __fmr_unmap(mw);
  172. if (rc)
  173. goto out_err;
  174. rpcrdma_put_mw(r_xprt, mw);
  175. return nsegs;
  176. out_err:
  177. /* The FMR is abandoned, but remains in rb_all. fmr_op_destroy
  178. * will attempt to release it when the transport is destroyed.
  179. */
  180. dprintk("RPC: %s: ib_unmap_fmr status %i\n", __func__, rc);
  181. return nsegs;
  182. }
  183. static void
  184. fmr_op_destroy(struct rpcrdma_buffer *buf)
  185. {
  186. struct rpcrdma_mw *r;
  187. int rc;
  188. while (!list_empty(&buf->rb_all)) {
  189. r = list_entry(buf->rb_all.next, struct rpcrdma_mw, mw_all);
  190. list_del(&r->mw_all);
  191. kfree(r->r.fmr.physaddrs);
  192. rc = ib_dealloc_fmr(r->r.fmr.fmr);
  193. if (rc)
  194. dprintk("RPC: %s: ib_dealloc_fmr failed %i\n",
  195. __func__, rc);
  196. kfree(r);
  197. }
  198. }
  199. const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = {
  200. .ro_map = fmr_op_map,
  201. .ro_unmap = fmr_op_unmap,
  202. .ro_open = fmr_op_open,
  203. .ro_maxpages = fmr_op_maxpages,
  204. .ro_init = fmr_op_init,
  205. .ro_destroy = fmr_op_destroy,
  206. .ro_displayname = "fmr",
  207. };