mr.c 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827
  1. /*
  2. * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
  3. * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
  4. *
  5. * This software is available to you under a choice of one of two
  6. * licenses. You may choose to be licensed under the terms of the GNU
  7. * General Public License (GPL) Version 2, available from the file
  8. * COPYING in the main directory of this source tree, or the
  9. * OpenIB.org BSD license below:
  10. *
  11. * Redistribution and use in source and binary forms, with or
  12. * without modification, are permitted provided that the following
  13. * conditions are met:
  14. *
  15. * - Redistributions of source code must retain the above
  16. * copyright notice, this list of conditions and the following
  17. * disclaimer.
  18. *
  19. * - Redistributions in binary form must reproduce the above
  20. * copyright notice, this list of conditions and the following
  21. * disclaimer in the documentation and/or other materials
  22. * provided with the distribution.
  23. *
  24. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  25. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  26. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  27. * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  28. * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  29. * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  30. * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  31. * SOFTWARE.
  32. */
  33. #include <linux/slab.h>
  34. #include <rdma/ib_user_verbs.h>
  35. #include "mlx4_ib.h"
  36. static u32 convert_access(int acc)
  37. {
  38. return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX4_PERM_ATOMIC : 0) |
  39. (acc & IB_ACCESS_REMOTE_WRITE ? MLX4_PERM_REMOTE_WRITE : 0) |
  40. (acc & IB_ACCESS_REMOTE_READ ? MLX4_PERM_REMOTE_READ : 0) |
  41. (acc & IB_ACCESS_LOCAL_WRITE ? MLX4_PERM_LOCAL_WRITE : 0) |
  42. (acc & IB_ACCESS_MW_BIND ? MLX4_PERM_BIND_MW : 0) |
  43. MLX4_PERM_LOCAL_READ;
  44. }
  45. static enum mlx4_mw_type to_mlx4_type(enum ib_mw_type type)
  46. {
  47. switch (type) {
  48. case IB_MW_TYPE_1: return MLX4_MW_TYPE_1;
  49. case IB_MW_TYPE_2: return MLX4_MW_TYPE_2;
  50. default: return -1;
  51. }
  52. }
  53. struct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc)
  54. {
  55. struct mlx4_ib_mr *mr;
  56. int err;
  57. mr = kzalloc(sizeof(*mr), GFP_KERNEL);
  58. if (!mr)
  59. return ERR_PTR(-ENOMEM);
  60. err = mlx4_mr_alloc(to_mdev(pd->device)->dev, to_mpd(pd)->pdn, 0,
  61. ~0ull, convert_access(acc), 0, 0, &mr->mmr);
  62. if (err)
  63. goto err_free;
  64. err = mlx4_mr_enable(to_mdev(pd->device)->dev, &mr->mmr);
  65. if (err)
  66. goto err_mr;
  67. mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
  68. mr->umem = NULL;
  69. return &mr->ibmr;
  70. err_mr:
  71. (void) mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr);
  72. err_free:
  73. kfree(mr);
  74. return ERR_PTR(err);
  75. }
  76. enum {
  77. MLX4_MAX_MTT_SHIFT = 31
  78. };
  79. static int mlx4_ib_umem_write_mtt_block(struct mlx4_ib_dev *dev,
  80. struct mlx4_mtt *mtt,
  81. u64 mtt_size, u64 mtt_shift, u64 len,
  82. u64 cur_start_addr, u64 *pages,
  83. int *start_index, int *npages)
  84. {
  85. u64 cur_end_addr = cur_start_addr + len;
  86. u64 cur_end_addr_aligned = 0;
  87. u64 mtt_entries;
  88. int err = 0;
  89. int k;
  90. len += (cur_start_addr & (mtt_size - 1ULL));
  91. cur_end_addr_aligned = round_up(cur_end_addr, mtt_size);
  92. len += (cur_end_addr_aligned - cur_end_addr);
  93. if (len & (mtt_size - 1ULL)) {
  94. pr_warn("write_block: len %llx is not aligned to mtt_size %llx\n",
  95. len, mtt_size);
  96. return -EINVAL;
  97. }
  98. mtt_entries = (len >> mtt_shift);
  99. /*
  100. * Align the MTT start address to the mtt_size.
  101. * Required to handle cases when the MR starts in the middle of an MTT
  102. * record. Was not required in old code since the physical addresses
  103. * provided by the dma subsystem were page aligned, which was also the
  104. * MTT size.
  105. */
  106. cur_start_addr = round_down(cur_start_addr, mtt_size);
  107. /* A new block is started ... */
  108. for (k = 0; k < mtt_entries; ++k) {
  109. pages[*npages] = cur_start_addr + (mtt_size * k);
  110. (*npages)++;
  111. /*
  112. * Be friendly to mlx4_write_mtt() and pass it chunks of
  113. * appropriate size.
  114. */
  115. if (*npages == PAGE_SIZE / sizeof(u64)) {
  116. err = mlx4_write_mtt(dev->dev, mtt, *start_index,
  117. *npages, pages);
  118. if (err)
  119. return err;
  120. (*start_index) += *npages;
  121. *npages = 0;
  122. }
  123. }
  124. return 0;
  125. }
  126. static inline u64 alignment_of(u64 ptr)
  127. {
  128. return ilog2(ptr & (~(ptr - 1)));
  129. }
  130. static int mlx4_ib_umem_calc_block_mtt(u64 next_block_start,
  131. u64 current_block_end,
  132. u64 block_shift)
  133. {
  134. /* Check whether the alignment of the new block is aligned as well as
  135. * the previous block.
  136. * Block address must start with zeros till size of entity_size.
  137. */
  138. if ((next_block_start & ((1ULL << block_shift) - 1ULL)) != 0)
  139. /*
  140. * It is not as well aligned as the previous block-reduce the
  141. * mtt size accordingly. Here we take the last right bit which
  142. * is 1.
  143. */
  144. block_shift = alignment_of(next_block_start);
  145. /*
  146. * Check whether the alignment of the end of previous block - is it
  147. * aligned as well as the start of the block
  148. */
  149. if (((current_block_end) & ((1ULL << block_shift) - 1ULL)) != 0)
  150. /*
  151. * It is not as well aligned as the start of the block -
  152. * reduce the mtt size accordingly.
  153. */
  154. block_shift = alignment_of(current_block_end);
  155. return block_shift;
  156. }
  157. int mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
  158. struct ib_umem *umem)
  159. {
  160. u64 *pages;
  161. u64 len = 0;
  162. int err = 0;
  163. u64 mtt_size;
  164. u64 cur_start_addr = 0;
  165. u64 mtt_shift;
  166. int start_index = 0;
  167. int npages = 0;
  168. struct scatterlist *sg;
  169. int i;
  170. pages = (u64 *) __get_free_page(GFP_KERNEL);
  171. if (!pages)
  172. return -ENOMEM;
  173. mtt_shift = mtt->page_shift;
  174. mtt_size = 1ULL << mtt_shift;
  175. for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) {
  176. if (cur_start_addr + len == sg_dma_address(sg)) {
  177. /* still the same block */
  178. len += sg_dma_len(sg);
  179. continue;
  180. }
  181. /*
  182. * A new block is started ...
  183. * If len is malaligned, write an extra mtt entry to cover the
  184. * misaligned area (round up the division)
  185. */
  186. err = mlx4_ib_umem_write_mtt_block(dev, mtt, mtt_size,
  187. mtt_shift, len,
  188. cur_start_addr,
  189. pages, &start_index,
  190. &npages);
  191. if (err)
  192. goto out;
  193. cur_start_addr = sg_dma_address(sg);
  194. len = sg_dma_len(sg);
  195. }
  196. /* Handle the last block */
  197. if (len > 0) {
  198. /*
  199. * If len is malaligned, write an extra mtt entry to cover
  200. * the misaligned area (round up the division)
  201. */
  202. err = mlx4_ib_umem_write_mtt_block(dev, mtt, mtt_size,
  203. mtt_shift, len,
  204. cur_start_addr, pages,
  205. &start_index, &npages);
  206. if (err)
  207. goto out;
  208. }
  209. if (npages)
  210. err = mlx4_write_mtt(dev->dev, mtt, start_index, npages, pages);
  211. out:
  212. free_page((unsigned long) pages);
  213. return err;
  214. }
  215. /*
  216. * Calculate optimal mtt size based on contiguous pages.
  217. * Function will return also the number of pages that are not aligned to the
  218. * calculated mtt_size to be added to total number of pages. For that we should
  219. * check the first chunk length & last chunk length and if not aligned to
  220. * mtt_size we should increment the non_aligned_pages number. All chunks in the
  221. * middle already handled as part of mtt shift calculation for both their start
  222. * & end addresses.
  223. */
  224. int mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va,
  225. int *num_of_mtts)
  226. {
  227. u64 block_shift = MLX4_MAX_MTT_SHIFT;
  228. u64 min_shift = umem->page_shift;
  229. u64 last_block_aligned_end = 0;
  230. u64 current_block_start = 0;
  231. u64 first_block_start = 0;
  232. u64 current_block_len = 0;
  233. u64 last_block_end = 0;
  234. struct scatterlist *sg;
  235. u64 current_block_end;
  236. u64 misalignment_bits;
  237. u64 next_block_start;
  238. u64 total_len = 0;
  239. int i;
  240. for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) {
  241. /*
  242. * Initialization - save the first chunk start as the
  243. * current_block_start - block means contiguous pages.
  244. */
  245. if (current_block_len == 0 && current_block_start == 0) {
  246. current_block_start = sg_dma_address(sg);
  247. first_block_start = current_block_start;
  248. /*
  249. * Find the bits that are different between the physical
  250. * address and the virtual address for the start of the
  251. * MR.
  252. * umem_get aligned the start_va to a page boundary.
  253. * Therefore, we need to align the start va to the same
  254. * boundary.
  255. * misalignment_bits is needed to handle the case of a
  256. * single memory region. In this case, the rest of the
  257. * logic will not reduce the block size. If we use a
  258. * block size which is bigger than the alignment of the
  259. * misalignment bits, we might use the virtual page
  260. * number instead of the physical page number, resulting
  261. * in access to the wrong data.
  262. */
  263. misalignment_bits =
  264. (start_va & (~(((u64)(BIT(umem->page_shift))) - 1ULL)))
  265. ^ current_block_start;
  266. block_shift = min(alignment_of(misalignment_bits),
  267. block_shift);
  268. }
  269. /*
  270. * Go over the scatter entries and check if they continue the
  271. * previous scatter entry.
  272. */
  273. next_block_start = sg_dma_address(sg);
  274. current_block_end = current_block_start + current_block_len;
  275. /* If we have a split (non-contig.) between two blocks */
  276. if (current_block_end != next_block_start) {
  277. block_shift = mlx4_ib_umem_calc_block_mtt
  278. (next_block_start,
  279. current_block_end,
  280. block_shift);
  281. /*
  282. * If we reached the minimum shift for 4k page we stop
  283. * the loop.
  284. */
  285. if (block_shift <= min_shift)
  286. goto end;
  287. /*
  288. * If not saved yet we are in first block - we save the
  289. * length of first block to calculate the
  290. * non_aligned_pages number at the end.
  291. */
  292. total_len += current_block_len;
  293. /* Start a new block */
  294. current_block_start = next_block_start;
  295. current_block_len = sg_dma_len(sg);
  296. continue;
  297. }
  298. /* The scatter entry is another part of the current block,
  299. * increase the block size.
  300. * An entry in the scatter can be larger than 4k (page) as of
  301. * dma mapping which merge some blocks together.
  302. */
  303. current_block_len += sg_dma_len(sg);
  304. }
  305. /* Account for the last block in the total len */
  306. total_len += current_block_len;
  307. /* Add to the first block the misalignment that it suffers from. */
  308. total_len += (first_block_start & ((1ULL << block_shift) - 1ULL));
  309. last_block_end = current_block_start + current_block_len;
  310. last_block_aligned_end = round_up(last_block_end, 1ULL << block_shift);
  311. total_len += (last_block_aligned_end - last_block_end);
  312. if (total_len & ((1ULL << block_shift) - 1ULL))
  313. pr_warn("misaligned total length detected (%llu, %llu)!",
  314. total_len, block_shift);
  315. *num_of_mtts = total_len >> block_shift;
  316. end:
  317. if (block_shift < min_shift) {
  318. /*
  319. * If shift is less than the min we set a warning and return the
  320. * min shift.
  321. */
  322. pr_warn("umem_calc_optimal_mtt_size - unexpected shift %lld\n", block_shift);
  323. block_shift = min_shift;
  324. }
  325. return block_shift;
  326. }
  327. static struct ib_umem *mlx4_get_umem_mr(struct ib_ucontext *context, u64 start,
  328. u64 length, u64 virt_addr,
  329. int access_flags)
  330. {
  331. /*
  332. * Force registering the memory as writable if the underlying pages
  333. * are writable. This is so rereg can change the access permissions
  334. * from readable to writable without having to run through ib_umem_get
  335. * again
  336. */
  337. if (!ib_access_writable(access_flags)) {
  338. struct vm_area_struct *vma;
  339. down_read(&current->mm->mmap_sem);
  340. /*
  341. * FIXME: Ideally this would iterate over all the vmas that
  342. * cover the memory, but for now it requires a single vma to
  343. * entirely cover the MR to support RO mappings.
  344. */
  345. vma = find_vma(current->mm, start);
  346. if (vma && vma->vm_end >= start + length &&
  347. vma->vm_start <= start) {
  348. if (vma->vm_flags & VM_WRITE)
  349. access_flags |= IB_ACCESS_LOCAL_WRITE;
  350. } else {
  351. access_flags |= IB_ACCESS_LOCAL_WRITE;
  352. }
  353. up_read(&current->mm->mmap_sem);
  354. }
  355. return ib_umem_get(context, start, length, access_flags, 0);
  356. }
  357. struct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
  358. u64 virt_addr, int access_flags,
  359. struct ib_udata *udata)
  360. {
  361. struct mlx4_ib_dev *dev = to_mdev(pd->device);
  362. struct mlx4_ib_mr *mr;
  363. int shift;
  364. int err;
  365. int n;
  366. mr = kzalloc(sizeof(*mr), GFP_KERNEL);
  367. if (!mr)
  368. return ERR_PTR(-ENOMEM);
  369. mr->umem = mlx4_get_umem_mr(pd->uobject->context, start, length,
  370. virt_addr, access_flags);
  371. if (IS_ERR(mr->umem)) {
  372. err = PTR_ERR(mr->umem);
  373. goto err_free;
  374. }
  375. n = ib_umem_page_count(mr->umem);
  376. shift = mlx4_ib_umem_calc_optimal_mtt_size(mr->umem, start, &n);
  377. err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length,
  378. convert_access(access_flags), n, shift, &mr->mmr);
  379. if (err)
  380. goto err_umem;
  381. err = mlx4_ib_umem_write_mtt(dev, &mr->mmr.mtt, mr->umem);
  382. if (err)
  383. goto err_mr;
  384. err = mlx4_mr_enable(dev->dev, &mr->mmr);
  385. if (err)
  386. goto err_mr;
  387. mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
  388. mr->ibmr.length = length;
  389. mr->ibmr.iova = virt_addr;
  390. mr->ibmr.page_size = 1U << shift;
  391. return &mr->ibmr;
  392. err_mr:
  393. (void) mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr);
  394. err_umem:
  395. ib_umem_release(mr->umem);
  396. err_free:
  397. kfree(mr);
  398. return ERR_PTR(err);
  399. }
  400. int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
  401. u64 start, u64 length, u64 virt_addr,
  402. int mr_access_flags, struct ib_pd *pd,
  403. struct ib_udata *udata)
  404. {
  405. struct mlx4_ib_dev *dev = to_mdev(mr->device);
  406. struct mlx4_ib_mr *mmr = to_mmr(mr);
  407. struct mlx4_mpt_entry *mpt_entry;
  408. struct mlx4_mpt_entry **pmpt_entry = &mpt_entry;
  409. int err;
  410. /* Since we synchronize this call and mlx4_ib_dereg_mr via uverbs,
  411. * we assume that the calls can't run concurrently. Otherwise, a
  412. * race exists.
  413. */
  414. err = mlx4_mr_hw_get_mpt(dev->dev, &mmr->mmr, &pmpt_entry);
  415. if (err)
  416. return err;
  417. if (flags & IB_MR_REREG_PD) {
  418. err = mlx4_mr_hw_change_pd(dev->dev, *pmpt_entry,
  419. to_mpd(pd)->pdn);
  420. if (err)
  421. goto release_mpt_entry;
  422. }
  423. if (flags & IB_MR_REREG_ACCESS) {
  424. if (ib_access_writable(mr_access_flags) &&
  425. !mmr->umem->writable) {
  426. err = -EPERM;
  427. goto release_mpt_entry;
  428. }
  429. err = mlx4_mr_hw_change_access(dev->dev, *pmpt_entry,
  430. convert_access(mr_access_flags));
  431. if (err)
  432. goto release_mpt_entry;
  433. }
  434. if (flags & IB_MR_REREG_TRANS) {
  435. int shift;
  436. int n;
  437. mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr);
  438. ib_umem_release(mmr->umem);
  439. mmr->umem =
  440. mlx4_get_umem_mr(mr->uobject->context, start, length,
  441. virt_addr, mr_access_flags);
  442. if (IS_ERR(mmr->umem)) {
  443. err = PTR_ERR(mmr->umem);
  444. /* Prevent mlx4_ib_dereg_mr from free'ing invalid pointer */
  445. mmr->umem = NULL;
  446. goto release_mpt_entry;
  447. }
  448. n = ib_umem_page_count(mmr->umem);
  449. shift = mmr->umem->page_shift;
  450. err = mlx4_mr_rereg_mem_write(dev->dev, &mmr->mmr,
  451. virt_addr, length, n, shift,
  452. *pmpt_entry);
  453. if (err) {
  454. ib_umem_release(mmr->umem);
  455. goto release_mpt_entry;
  456. }
  457. mmr->mmr.iova = virt_addr;
  458. mmr->mmr.size = length;
  459. err = mlx4_ib_umem_write_mtt(dev, &mmr->mmr.mtt, mmr->umem);
  460. if (err) {
  461. mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr);
  462. ib_umem_release(mmr->umem);
  463. goto release_mpt_entry;
  464. }
  465. }
  466. /* If we couldn't transfer the MR to the HCA, just remember to
  467. * return a failure. But dereg_mr will free the resources.
  468. */
  469. err = mlx4_mr_hw_write_mpt(dev->dev, &mmr->mmr, pmpt_entry);
  470. if (!err && flags & IB_MR_REREG_ACCESS)
  471. mmr->mmr.access = mr_access_flags;
  472. release_mpt_entry:
  473. mlx4_mr_hw_put_mpt(dev->dev, pmpt_entry);
  474. return err;
  475. }
  476. static int
  477. mlx4_alloc_priv_pages(struct ib_device *device,
  478. struct mlx4_ib_mr *mr,
  479. int max_pages)
  480. {
  481. int ret;
  482. /* Ensure that size is aligned to DMA cacheline
  483. * requirements.
  484. * max_pages is limited to MLX4_MAX_FAST_REG_PAGES
  485. * so page_map_size will never cross PAGE_SIZE.
  486. */
  487. mr->page_map_size = roundup(max_pages * sizeof(u64),
  488. MLX4_MR_PAGES_ALIGN);
  489. /* Prevent cross page boundary allocation. */
  490. mr->pages = (__be64 *)get_zeroed_page(GFP_KERNEL);
  491. if (!mr->pages)
  492. return -ENOMEM;
  493. mr->page_map = dma_map_single(device->dev.parent, mr->pages,
  494. mr->page_map_size, DMA_TO_DEVICE);
  495. if (dma_mapping_error(device->dev.parent, mr->page_map)) {
  496. ret = -ENOMEM;
  497. goto err;
  498. }
  499. return 0;
  500. err:
  501. free_page((unsigned long)mr->pages);
  502. return ret;
  503. }
  504. static void
  505. mlx4_free_priv_pages(struct mlx4_ib_mr *mr)
  506. {
  507. if (mr->pages) {
  508. struct ib_device *device = mr->ibmr.device;
  509. dma_unmap_single(device->dev.parent, mr->page_map,
  510. mr->page_map_size, DMA_TO_DEVICE);
  511. free_page((unsigned long)mr->pages);
  512. mr->pages = NULL;
  513. }
  514. }
  515. int mlx4_ib_dereg_mr(struct ib_mr *ibmr)
  516. {
  517. struct mlx4_ib_mr *mr = to_mmr(ibmr);
  518. int ret;
  519. mlx4_free_priv_pages(mr);
  520. ret = mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr);
  521. if (ret)
  522. return ret;
  523. if (mr->umem)
  524. ib_umem_release(mr->umem);
  525. kfree(mr);
  526. return 0;
  527. }
  528. struct ib_mw *mlx4_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
  529. struct ib_udata *udata)
  530. {
  531. struct mlx4_ib_dev *dev = to_mdev(pd->device);
  532. struct mlx4_ib_mw *mw;
  533. int err;
  534. mw = kmalloc(sizeof(*mw), GFP_KERNEL);
  535. if (!mw)
  536. return ERR_PTR(-ENOMEM);
  537. err = mlx4_mw_alloc(dev->dev, to_mpd(pd)->pdn,
  538. to_mlx4_type(type), &mw->mmw);
  539. if (err)
  540. goto err_free;
  541. err = mlx4_mw_enable(dev->dev, &mw->mmw);
  542. if (err)
  543. goto err_mw;
  544. mw->ibmw.rkey = mw->mmw.key;
  545. return &mw->ibmw;
  546. err_mw:
  547. mlx4_mw_free(dev->dev, &mw->mmw);
  548. err_free:
  549. kfree(mw);
  550. return ERR_PTR(err);
  551. }
  552. int mlx4_ib_dealloc_mw(struct ib_mw *ibmw)
  553. {
  554. struct mlx4_ib_mw *mw = to_mmw(ibmw);
  555. mlx4_mw_free(to_mdev(ibmw->device)->dev, &mw->mmw);
  556. kfree(mw);
  557. return 0;
  558. }
  559. struct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd,
  560. enum ib_mr_type mr_type,
  561. u32 max_num_sg)
  562. {
  563. struct mlx4_ib_dev *dev = to_mdev(pd->device);
  564. struct mlx4_ib_mr *mr;
  565. int err;
  566. if (mr_type != IB_MR_TYPE_MEM_REG ||
  567. max_num_sg > MLX4_MAX_FAST_REG_PAGES)
  568. return ERR_PTR(-EINVAL);
  569. mr = kzalloc(sizeof(*mr), GFP_KERNEL);
  570. if (!mr)
  571. return ERR_PTR(-ENOMEM);
  572. err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, 0, 0, 0,
  573. max_num_sg, 0, &mr->mmr);
  574. if (err)
  575. goto err_free;
  576. err = mlx4_alloc_priv_pages(pd->device, mr, max_num_sg);
  577. if (err)
  578. goto err_free_mr;
  579. mr->max_pages = max_num_sg;
  580. err = mlx4_mr_enable(dev->dev, &mr->mmr);
  581. if (err)
  582. goto err_free_pl;
  583. mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
  584. mr->umem = NULL;
  585. return &mr->ibmr;
  586. err_free_pl:
  587. mr->ibmr.device = pd->device;
  588. mlx4_free_priv_pages(mr);
  589. err_free_mr:
  590. (void) mlx4_mr_free(dev->dev, &mr->mmr);
  591. err_free:
  592. kfree(mr);
  593. return ERR_PTR(err);
  594. }
  595. struct ib_fmr *mlx4_ib_fmr_alloc(struct ib_pd *pd, int acc,
  596. struct ib_fmr_attr *fmr_attr)
  597. {
  598. struct mlx4_ib_dev *dev = to_mdev(pd->device);
  599. struct mlx4_ib_fmr *fmr;
  600. int err = -ENOMEM;
  601. fmr = kmalloc(sizeof *fmr, GFP_KERNEL);
  602. if (!fmr)
  603. return ERR_PTR(-ENOMEM);
  604. err = mlx4_fmr_alloc(dev->dev, to_mpd(pd)->pdn, convert_access(acc),
  605. fmr_attr->max_pages, fmr_attr->max_maps,
  606. fmr_attr->page_shift, &fmr->mfmr);
  607. if (err)
  608. goto err_free;
  609. err = mlx4_fmr_enable(to_mdev(pd->device)->dev, &fmr->mfmr);
  610. if (err)
  611. goto err_mr;
  612. fmr->ibfmr.rkey = fmr->ibfmr.lkey = fmr->mfmr.mr.key;
  613. return &fmr->ibfmr;
  614. err_mr:
  615. (void) mlx4_mr_free(to_mdev(pd->device)->dev, &fmr->mfmr.mr);
  616. err_free:
  617. kfree(fmr);
  618. return ERR_PTR(err);
  619. }
  620. int mlx4_ib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
  621. int npages, u64 iova)
  622. {
  623. struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr);
  624. struct mlx4_ib_dev *dev = to_mdev(ifmr->ibfmr.device);
  625. return mlx4_map_phys_fmr(dev->dev, &ifmr->mfmr, page_list, npages, iova,
  626. &ifmr->ibfmr.lkey, &ifmr->ibfmr.rkey);
  627. }
  628. int mlx4_ib_unmap_fmr(struct list_head *fmr_list)
  629. {
  630. struct ib_fmr *ibfmr;
  631. int err;
  632. struct mlx4_dev *mdev = NULL;
  633. list_for_each_entry(ibfmr, fmr_list, list) {
  634. if (mdev && to_mdev(ibfmr->device)->dev != mdev)
  635. return -EINVAL;
  636. mdev = to_mdev(ibfmr->device)->dev;
  637. }
  638. if (!mdev)
  639. return 0;
  640. list_for_each_entry(ibfmr, fmr_list, list) {
  641. struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr);
  642. mlx4_fmr_unmap(mdev, &ifmr->mfmr, &ifmr->ibfmr.lkey, &ifmr->ibfmr.rkey);
  643. }
  644. /*
  645. * Make sure all MPT status updates are visible before issuing
  646. * SYNC_TPT firmware command.
  647. */
  648. wmb();
  649. err = mlx4_SYNC_TPT(mdev);
  650. if (err)
  651. pr_warn("SYNC_TPT error %d when "
  652. "unmapping FMRs\n", err);
  653. return 0;
  654. }
  655. int mlx4_ib_fmr_dealloc(struct ib_fmr *ibfmr)
  656. {
  657. struct mlx4_ib_fmr *ifmr = to_mfmr(ibfmr);
  658. struct mlx4_ib_dev *dev = to_mdev(ibfmr->device);
  659. int err;
  660. err = mlx4_fmr_free(dev->dev, &ifmr->mfmr);
  661. if (!err)
  662. kfree(ifmr);
  663. return err;
  664. }
  665. static int mlx4_set_page(struct ib_mr *ibmr, u64 addr)
  666. {
  667. struct mlx4_ib_mr *mr = to_mmr(ibmr);
  668. if (unlikely(mr->npages == mr->max_pages))
  669. return -ENOMEM;
  670. mr->pages[mr->npages++] = cpu_to_be64(addr | MLX4_MTT_FLAG_PRESENT);
  671. return 0;
  672. }
  673. int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
  674. unsigned int *sg_offset)
  675. {
  676. struct mlx4_ib_mr *mr = to_mmr(ibmr);
  677. int rc;
  678. mr->npages = 0;
  679. ib_dma_sync_single_for_cpu(ibmr->device, mr->page_map,
  680. mr->page_map_size, DMA_TO_DEVICE);
  681. rc = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, mlx4_set_page);
  682. ib_dma_sync_single_for_device(ibmr->device, mr->page_map,
  683. mr->page_map_size, DMA_TO_DEVICE);
  684. return rc;
  685. }