frwr_ops.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright (c) 2015, 2017 Oracle. All rights reserved.
  4. * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
  5. */
  6. /* Lightweight memory registration using Fast Registration Work
  7. * Requests (FRWR).
  8. *
  9. * FRWR features ordered asynchronous registration and deregistration
  10. * of arbitrarily sized memory regions. This is the fastest and safest
  11. * but most complex memory registration mode.
  12. */
  13. /* Normal operation
  14. *
  15. * A Memory Region is prepared for RDMA READ or WRITE using a FAST_REG
  16. * Work Request (frwr_op_map). When the RDMA operation is finished, this
  17. * Memory Region is invalidated using a LOCAL_INV Work Request
  18. * (frwr_op_unmap_sync).
  19. *
  20. * Typically these Work Requests are not signaled, and neither are RDMA
  21. * SEND Work Requests (with the exception of signaling occasionally to
  22. * prevent provider work queue overflows). This greatly reduces HCA
  23. * interrupt workload.
  24. *
  25. * As an optimization, frwr_op_unmap marks MRs INVALID before the
  26. * LOCAL_INV WR is posted. If posting succeeds, the MR is placed on
  27. * rb_mrs immediately so that no work (like managing a linked list
  28. * under a spinlock) is needed in the completion upcall.
  29. *
  30. * But this means that frwr_op_map() can occasionally encounter an MR
  31. * that is INVALID but the LOCAL_INV WR has not completed. Work Queue
  32. * ordering prevents a subsequent FAST_REG WR from executing against
  33. * that MR while it is still being invalidated.
  34. */
  35. /* Transport recovery
  36. *
  37. * ->op_map and the transport connect worker cannot run at the same
  38. * time, but ->op_unmap can fire while the transport connect worker
  39. * is running. Thus MR recovery is handled in ->op_map, to guarantee
  40. * that recovered MRs are owned by a sending RPC, and not one where
  41. * ->op_unmap could fire at the same time transport reconnect is
  42. * being done.
  43. *
  44. * When the underlying transport disconnects, MRs are left in one of
  45. * four states:
  46. *
  47. * INVALID: The MR was not in use before the QP entered ERROR state.
  48. *
  49. * VALID: The MR was registered before the QP entered ERROR state.
  50. *
  51. * FLUSHED_FR: The MR was being registered when the QP entered ERROR
  52. * state, and the pending WR was flushed.
  53. *
  54. * FLUSHED_LI: The MR was being invalidated when the QP entered ERROR
  55. * state, and the pending WR was flushed.
  56. *
  57. * When frwr_op_map encounters FLUSHED and VALID MRs, they are recovered
  58. * with ib_dereg_mr and then are re-initialized. Because MR recovery
  59. * allocates fresh resources, it is deferred to a workqueue, and the
  60. * recovered MRs are placed back on the rb_mrs list when recovery is
  61. * complete. frwr_op_map allocates another MR for the current RPC while
  62. * the broken MR is reset.
  63. *
  64. * To ensure that frwr_op_map doesn't encounter an MR that is marked
  65. * INVALID but that is about to be flushed due to a previous transport
  66. * disconnect, the transport connect worker attempts to drain all
  67. * pending send queue WRs before the transport is reconnected.
  68. */
  69. #include <linux/sunrpc/rpc_rdma.h>
  70. #include <linux/sunrpc/svc_rdma.h>
  71. #include "xprt_rdma.h"
  72. #include <trace/events/rpcrdma.h>
  73. #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
  74. # define RPCDBG_FACILITY RPCDBG_TRANS
  75. #endif
  76. bool
  77. frwr_is_supported(struct rpcrdma_ia *ia)
  78. {
  79. struct ib_device_attr *attrs = &ia->ri_device->attrs;
  80. if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS))
  81. goto out_not_supported;
  82. if (attrs->max_fast_reg_page_list_len == 0)
  83. goto out_not_supported;
  84. return true;
  85. out_not_supported:
  86. pr_info("rpcrdma: 'frwr' mode is not supported by device %s\n",
  87. ia->ri_device->name);
  88. return false;
  89. }
  90. static int
  91. frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
  92. {
  93. unsigned int depth = ia->ri_max_frwr_depth;
  94. struct rpcrdma_frwr *frwr = &mr->frwr;
  95. int rc;
  96. frwr->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth);
  97. if (IS_ERR(frwr->fr_mr))
  98. goto out_mr_err;
  99. mr->mr_sg = kcalloc(depth, sizeof(*mr->mr_sg), GFP_KERNEL);
  100. if (!mr->mr_sg)
  101. goto out_list_err;
  102. INIT_LIST_HEAD(&mr->mr_list);
  103. sg_init_table(mr->mr_sg, depth);
  104. init_completion(&frwr->fr_linv_done);
  105. return 0;
  106. out_mr_err:
  107. rc = PTR_ERR(frwr->fr_mr);
  108. dprintk("RPC: %s: ib_alloc_mr status %i\n",
  109. __func__, rc);
  110. return rc;
  111. out_list_err:
  112. rc = -ENOMEM;
  113. dprintk("RPC: %s: sg allocation failure\n",
  114. __func__);
  115. ib_dereg_mr(frwr->fr_mr);
  116. return rc;
  117. }
  118. static void
  119. frwr_op_release_mr(struct rpcrdma_mr *mr)
  120. {
  121. int rc;
  122. rc = ib_dereg_mr(mr->frwr.fr_mr);
  123. if (rc)
  124. pr_err("rpcrdma: final ib_dereg_mr for %p returned %i\n",
  125. mr, rc);
  126. kfree(mr->mr_sg);
  127. kfree(mr);
  128. }
  129. static int
  130. __frwr_mr_reset(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
  131. {
  132. struct rpcrdma_frwr *frwr = &mr->frwr;
  133. int rc;
  134. rc = ib_dereg_mr(frwr->fr_mr);
  135. if (rc) {
  136. pr_warn("rpcrdma: ib_dereg_mr status %d, frwr %p orphaned\n",
  137. rc, mr);
  138. return rc;
  139. }
  140. frwr->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype,
  141. ia->ri_max_frwr_depth);
  142. if (IS_ERR(frwr->fr_mr)) {
  143. pr_warn("rpcrdma: ib_alloc_mr status %ld, frwr %p orphaned\n",
  144. PTR_ERR(frwr->fr_mr), mr);
  145. return PTR_ERR(frwr->fr_mr);
  146. }
  147. dprintk("RPC: %s: recovered FRWR %p\n", __func__, frwr);
  148. frwr->fr_state = FRWR_IS_INVALID;
  149. return 0;
  150. }
  151. /* Reset of a single FRWR. Generate a fresh rkey by replacing the MR.
  152. */
  153. static void
  154. frwr_op_recover_mr(struct rpcrdma_mr *mr)
  155. {
  156. enum rpcrdma_frwr_state state = mr->frwr.fr_state;
  157. struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
  158. struct rpcrdma_ia *ia = &r_xprt->rx_ia;
  159. int rc;
  160. rc = __frwr_mr_reset(ia, mr);
  161. if (state != FRWR_FLUSHED_LI) {
  162. trace_xprtrdma_dma_unmap(mr);
  163. ib_dma_unmap_sg(ia->ri_device,
  164. mr->mr_sg, mr->mr_nents, mr->mr_dir);
  165. }
  166. if (rc)
  167. goto out_release;
  168. rpcrdma_mr_put(mr);
  169. r_xprt->rx_stats.mrs_recovered++;
  170. return;
  171. out_release:
  172. pr_err("rpcrdma: FRWR reset failed %d, %p released\n", rc, mr);
  173. r_xprt->rx_stats.mrs_orphaned++;
  174. spin_lock(&r_xprt->rx_buf.rb_mrlock);
  175. list_del(&mr->mr_all);
  176. spin_unlock(&r_xprt->rx_buf.rb_mrlock);
  177. frwr_op_release_mr(mr);
  178. }
  179. /* On success, sets:
  180. * ep->rep_attr.cap.max_send_wr
  181. * ep->rep_attr.cap.max_recv_wr
  182. * cdata->max_requests
  183. * ia->ri_max_segs
  184. *
  185. * And these FRWR-related fields:
  186. * ia->ri_max_frwr_depth
  187. * ia->ri_mrtype
  188. */
  189. static int
  190. frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
  191. struct rpcrdma_create_data_internal *cdata)
  192. {
  193. struct ib_device_attr *attrs = &ia->ri_device->attrs;
  194. int max_qp_wr, depth, delta;
  195. ia->ri_mrtype = IB_MR_TYPE_MEM_REG;
  196. if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG)
  197. ia->ri_mrtype = IB_MR_TYPE_SG_GAPS;
  198. ia->ri_max_frwr_depth =
  199. min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
  200. attrs->max_fast_reg_page_list_len);
  201. dprintk("RPC: %s: device's max FR page list len = %u\n",
  202. __func__, ia->ri_max_frwr_depth);
  203. /* Add room for frwr register and invalidate WRs.
  204. * 1. FRWR reg WR for head
  205. * 2. FRWR invalidate WR for head
  206. * 3. N FRWR reg WRs for pagelist
  207. * 4. N FRWR invalidate WRs for pagelist
  208. * 5. FRWR reg WR for tail
  209. * 6. FRWR invalidate WR for tail
  210. * 7. The RDMA_SEND WR
  211. */
  212. depth = 7;
  213. /* Calculate N if the device max FRWR depth is smaller than
  214. * RPCRDMA_MAX_DATA_SEGS.
  215. */
  216. if (ia->ri_max_frwr_depth < RPCRDMA_MAX_DATA_SEGS) {
  217. delta = RPCRDMA_MAX_DATA_SEGS - ia->ri_max_frwr_depth;
  218. do {
  219. depth += 2; /* FRWR reg + invalidate */
  220. delta -= ia->ri_max_frwr_depth;
  221. } while (delta > 0);
  222. }
  223. max_qp_wr = ia->ri_device->attrs.max_qp_wr;
  224. max_qp_wr -= RPCRDMA_BACKWARD_WRS;
  225. max_qp_wr -= 1;
  226. if (max_qp_wr < RPCRDMA_MIN_SLOT_TABLE)
  227. return -ENOMEM;
  228. if (cdata->max_requests > max_qp_wr)
  229. cdata->max_requests = max_qp_wr;
  230. ep->rep_attr.cap.max_send_wr = cdata->max_requests * depth;
  231. if (ep->rep_attr.cap.max_send_wr > max_qp_wr) {
  232. cdata->max_requests = max_qp_wr / depth;
  233. if (!cdata->max_requests)
  234. return -EINVAL;
  235. ep->rep_attr.cap.max_send_wr = cdata->max_requests *
  236. depth;
  237. }
  238. ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS;
  239. ep->rep_attr.cap.max_send_wr += 1; /* for ib_drain_sq */
  240. ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
  241. ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
  242. ep->rep_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */
  243. ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
  244. ia->ri_max_frwr_depth);
  245. return 0;
  246. }
  247. /* FRWR mode conveys a list of pages per chunk segment. The
  248. * maximum length of that list is the FRWR page list depth.
  249. */
  250. static size_t
  251. frwr_op_maxpages(struct rpcrdma_xprt *r_xprt)
  252. {
  253. struct rpcrdma_ia *ia = &r_xprt->rx_ia;
  254. return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
  255. RPCRDMA_MAX_HDR_SEGS * ia->ri_max_frwr_depth);
  256. }
  257. static void
  258. __frwr_sendcompletion_flush(struct ib_wc *wc, const char *wr)
  259. {
  260. if (wc->status != IB_WC_WR_FLUSH_ERR)
  261. pr_err("rpcrdma: %s: %s (%u/0x%x)\n",
  262. wr, ib_wc_status_msg(wc->status),
  263. wc->status, wc->vendor_err);
  264. }
  265. /**
  266. * frwr_wc_fastreg - Invoked by RDMA provider for a flushed FastReg WC
  267. * @cq: completion queue (ignored)
  268. * @wc: completed WR
  269. *
  270. */
  271. static void
  272. frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
  273. {
  274. struct ib_cqe *cqe = wc->wr_cqe;
  275. struct rpcrdma_frwr *frwr =
  276. container_of(cqe, struct rpcrdma_frwr, fr_cqe);
  277. /* WARNING: Only wr_cqe and status are reliable at this point */
  278. if (wc->status != IB_WC_SUCCESS) {
  279. frwr->fr_state = FRWR_FLUSHED_FR;
  280. __frwr_sendcompletion_flush(wc, "fastreg");
  281. }
  282. trace_xprtrdma_wc_fastreg(wc, frwr);
  283. }
  284. /**
  285. * frwr_wc_localinv - Invoked by RDMA provider for a flushed LocalInv WC
  286. * @cq: completion queue (ignored)
  287. * @wc: completed WR
  288. *
  289. */
  290. static void
  291. frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
  292. {
  293. struct ib_cqe *cqe = wc->wr_cqe;
  294. struct rpcrdma_frwr *frwr = container_of(cqe, struct rpcrdma_frwr,
  295. fr_cqe);
  296. /* WARNING: Only wr_cqe and status are reliable at this point */
  297. if (wc->status != IB_WC_SUCCESS) {
  298. frwr->fr_state = FRWR_FLUSHED_LI;
  299. __frwr_sendcompletion_flush(wc, "localinv");
  300. }
  301. trace_xprtrdma_wc_li(wc, frwr);
  302. }
  303. /**
  304. * frwr_wc_localinv_wake - Invoked by RDMA provider for a signaled LocalInv WC
  305. * @cq: completion queue (ignored)
  306. * @wc: completed WR
  307. *
  308. * Awaken anyone waiting for an MR to finish being fenced.
  309. */
  310. static void
  311. frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
  312. {
  313. struct ib_cqe *cqe = wc->wr_cqe;
  314. struct rpcrdma_frwr *frwr = container_of(cqe, struct rpcrdma_frwr,
  315. fr_cqe);
  316. /* WARNING: Only wr_cqe and status are reliable at this point */
  317. if (wc->status != IB_WC_SUCCESS) {
  318. frwr->fr_state = FRWR_FLUSHED_LI;
  319. __frwr_sendcompletion_flush(wc, "localinv");
  320. }
  321. complete(&frwr->fr_linv_done);
  322. trace_xprtrdma_wc_li_wake(wc, frwr);
  323. }
  324. /* Post a REG_MR Work Request to register a memory region
  325. * for remote access via RDMA READ or RDMA WRITE.
  326. */
  327. static struct rpcrdma_mr_seg *
  328. frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
  329. int nsegs, bool writing, struct rpcrdma_mr **out)
  330. {
  331. struct rpcrdma_ia *ia = &r_xprt->rx_ia;
  332. bool holes_ok = ia->ri_mrtype == IB_MR_TYPE_SG_GAPS;
  333. struct rpcrdma_frwr *frwr;
  334. struct rpcrdma_mr *mr;
  335. struct ib_mr *ibmr;
  336. struct ib_reg_wr *reg_wr;
  337. int i, n;
  338. u8 key;
  339. mr = NULL;
  340. do {
  341. if (mr)
  342. rpcrdma_mr_defer_recovery(mr);
  343. mr = rpcrdma_mr_get(r_xprt);
  344. if (!mr)
  345. return ERR_PTR(-EAGAIN);
  346. } while (mr->frwr.fr_state != FRWR_IS_INVALID);
  347. frwr = &mr->frwr;
  348. frwr->fr_state = FRWR_IS_VALID;
  349. if (nsegs > ia->ri_max_frwr_depth)
  350. nsegs = ia->ri_max_frwr_depth;
  351. for (i = 0; i < nsegs;) {
  352. if (seg->mr_page)
  353. sg_set_page(&mr->mr_sg[i],
  354. seg->mr_page,
  355. seg->mr_len,
  356. offset_in_page(seg->mr_offset));
  357. else
  358. sg_set_buf(&mr->mr_sg[i], seg->mr_offset,
  359. seg->mr_len);
  360. ++seg;
  361. ++i;
  362. if (holes_ok)
  363. continue;
  364. if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
  365. offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
  366. break;
  367. }
  368. mr->mr_dir = rpcrdma_data_dir(writing);
  369. mr->mr_nents = ib_dma_map_sg(ia->ri_device, mr->mr_sg, i, mr->mr_dir);
  370. if (!mr->mr_nents)
  371. goto out_dmamap_err;
  372. trace_xprtrdma_dma_map(mr);
  373. ibmr = frwr->fr_mr;
  374. n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE);
  375. if (unlikely(n != mr->mr_nents))
  376. goto out_mapmr_err;
  377. key = (u8)(ibmr->rkey & 0x000000FF);
  378. ib_update_fast_reg_key(ibmr, ++key);
  379. reg_wr = &frwr->fr_regwr;
  380. reg_wr->mr = ibmr;
  381. reg_wr->key = ibmr->rkey;
  382. reg_wr->access = writing ?
  383. IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
  384. IB_ACCESS_REMOTE_READ;
  385. mr->mr_handle = ibmr->rkey;
  386. mr->mr_length = ibmr->length;
  387. mr->mr_offset = ibmr->iova;
  388. *out = mr;
  389. return seg;
  390. out_dmamap_err:
  391. pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n",
  392. mr->mr_sg, i);
  393. frwr->fr_state = FRWR_IS_INVALID;
  394. rpcrdma_mr_put(mr);
  395. return ERR_PTR(-EIO);
  396. out_mapmr_err:
  397. pr_err("rpcrdma: failed to map mr %p (%d/%d)\n",
  398. frwr->fr_mr, n, mr->mr_nents);
  399. rpcrdma_mr_defer_recovery(mr);
  400. return ERR_PTR(-EIO);
  401. }
  402. /* Post Send WR containing the RPC Call message.
  403. *
  404. * For FRMR, chain any FastReg WRs to the Send WR. Only a
  405. * single ib_post_send call is needed to register memory
  406. * and then post the Send WR.
  407. */
  408. static int
  409. frwr_op_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req)
  410. {
  411. struct ib_send_wr *post_wr;
  412. struct rpcrdma_mr *mr;
  413. post_wr = &req->rl_sendctx->sc_wr;
  414. list_for_each_entry(mr, &req->rl_registered, mr_list) {
  415. struct rpcrdma_frwr *frwr;
  416. frwr = &mr->frwr;
  417. frwr->fr_cqe.done = frwr_wc_fastreg;
  418. frwr->fr_regwr.wr.next = post_wr;
  419. frwr->fr_regwr.wr.wr_cqe = &frwr->fr_cqe;
  420. frwr->fr_regwr.wr.num_sge = 0;
  421. frwr->fr_regwr.wr.opcode = IB_WR_REG_MR;
  422. frwr->fr_regwr.wr.send_flags = 0;
  423. post_wr = &frwr->fr_regwr.wr;
  424. }
  425. /* If ib_post_send fails, the next ->send_request for
  426. * @req will queue these MWs for recovery.
  427. */
  428. return ib_post_send(ia->ri_id->qp, post_wr, NULL);
  429. }
  430. /* Handle a remotely invalidated mr on the @mrs list
  431. */
  432. static void
  433. frwr_op_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)
  434. {
  435. struct rpcrdma_mr *mr;
  436. list_for_each_entry(mr, mrs, mr_list)
  437. if (mr->mr_handle == rep->rr_inv_rkey) {
  438. list_del_init(&mr->mr_list);
  439. trace_xprtrdma_remoteinv(mr);
  440. mr->frwr.fr_state = FRWR_IS_INVALID;
  441. rpcrdma_mr_unmap_and_put(mr);
  442. break; /* only one invalidated MR per RPC */
  443. }
  444. }
  445. /* Invalidate all memory regions that were registered for "req".
  446. *
  447. * Sleeps until it is safe for the host CPU to access the
  448. * previously mapped memory regions.
  449. *
  450. * Caller ensures that @mrs is not empty before the call. This
  451. * function empties the list.
  452. */
  453. static void
  454. frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
  455. {
  456. struct ib_send_wr *first, **prev, *last;
  457. const struct ib_send_wr *bad_wr;
  458. struct rpcrdma_ia *ia = &r_xprt->rx_ia;
  459. struct rpcrdma_frwr *frwr;
  460. struct rpcrdma_mr *mr;
  461. int count, rc;
  462. /* ORDER: Invalidate all of the MRs first
  463. *
  464. * Chain the LOCAL_INV Work Requests and post them with
  465. * a single ib_post_send() call.
  466. */
  467. frwr = NULL;
  468. count = 0;
  469. prev = &first;
  470. list_for_each_entry(mr, mrs, mr_list) {
  471. mr->frwr.fr_state = FRWR_IS_INVALID;
  472. frwr = &mr->frwr;
  473. trace_xprtrdma_localinv(mr);
  474. frwr->fr_cqe.done = frwr_wc_localinv;
  475. last = &frwr->fr_invwr;
  476. memset(last, 0, sizeof(*last));
  477. last->wr_cqe = &frwr->fr_cqe;
  478. last->opcode = IB_WR_LOCAL_INV;
  479. last->ex.invalidate_rkey = mr->mr_handle;
  480. count++;
  481. *prev = last;
  482. prev = &last->next;
  483. }
  484. if (!frwr)
  485. goto unmap;
  486. /* Strong send queue ordering guarantees that when the
  487. * last WR in the chain completes, all WRs in the chain
  488. * are complete.
  489. */
  490. last->send_flags = IB_SEND_SIGNALED;
  491. frwr->fr_cqe.done = frwr_wc_localinv_wake;
  492. reinit_completion(&frwr->fr_linv_done);
  493. /* Transport disconnect drains the receive CQ before it
  494. * replaces the QP. The RPC reply handler won't call us
  495. * unless ri_id->qp is a valid pointer.
  496. */
  497. r_xprt->rx_stats.local_inv_needed++;
  498. bad_wr = NULL;
  499. rc = ib_post_send(ia->ri_id->qp, first, &bad_wr);
  500. if (bad_wr != first)
  501. wait_for_completion(&frwr->fr_linv_done);
  502. if (rc)
  503. goto reset_mrs;
  504. /* ORDER: Now DMA unmap all of the MRs, and return
  505. * them to the free MR list.
  506. */
  507. unmap:
  508. while (!list_empty(mrs)) {
  509. mr = rpcrdma_mr_pop(mrs);
  510. rpcrdma_mr_unmap_and_put(mr);
  511. }
  512. return;
  513. reset_mrs:
  514. pr_err("rpcrdma: FRWR invalidate ib_post_send returned %i\n", rc);
  515. /* Find and reset the MRs in the LOCAL_INV WRs that did not
  516. * get posted.
  517. */
  518. while (bad_wr) {
  519. frwr = container_of(bad_wr, struct rpcrdma_frwr,
  520. fr_invwr);
  521. mr = container_of(frwr, struct rpcrdma_mr, frwr);
  522. __frwr_mr_reset(ia, mr);
  523. bad_wr = bad_wr->next;
  524. }
  525. goto unmap;
  526. }
  527. const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
  528. .ro_map = frwr_op_map,
  529. .ro_send = frwr_op_send,
  530. .ro_reminv = frwr_op_reminv,
  531. .ro_unmap_sync = frwr_op_unmap_sync,
  532. .ro_recover_mr = frwr_op_recover_mr,
  533. .ro_open = frwr_op_open,
  534. .ro_maxpages = frwr_op_maxpages,
  535. .ro_init_mr = frwr_op_init_mr,
  536. .ro_release_mr = frwr_op_release_mr,
  537. .ro_displayname = "frwr",
  538. .ro_send_w_inv_ok = RPCRDMA_CMP_F_SND_W_INV_OK,
  539. };