mr.c 48 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985
  1. /*
  2. * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
  3. *
  4. * This software is available to you under a choice of one of two
  5. * licenses. You may choose to be licensed under the terms of the GNU
  6. * General Public License (GPL) Version 2, available from the file
  7. * COPYING in the main directory of this source tree, or the
  8. * OpenIB.org BSD license below:
  9. *
  10. * Redistribution and use in source and binary forms, with or
  11. * without modification, are permitted provided that the following
  12. * conditions are met:
  13. *
  14. * - Redistributions of source code must retain the above
  15. * copyright notice, this list of conditions and the following
  16. * disclaimer.
  17. *
  18. * - Redistributions in binary form must reproduce the above
  19. * copyright notice, this list of conditions and the following
  20. * disclaimer in the documentation and/or other materials
  21. * provided with the distribution.
  22. *
  23. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26. * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27. * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28. * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29. * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30. * SOFTWARE.
  31. */
  32. #include <linux/kref.h>
  33. #include <linux/random.h>
  34. #include <linux/debugfs.h>
  35. #include <linux/export.h>
  36. #include <linux/delay.h>
  37. #include <rdma/ib_umem.h>
  38. #include <rdma/ib_umem_odp.h>
  39. #include <rdma/ib_verbs.h>
  40. #include "mlx5_ib.h"
  41. enum {
  42. MAX_PENDING_REG_MR = 8,
  43. };
  44. #define MLX5_UMR_ALIGN 2048
  45. static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
  46. static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
  47. static int mr_cache_max_order(struct mlx5_ib_dev *dev);
  48. static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
  49. static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev)
  50. {
  51. return !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled);
  52. }
  53. static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
  54. {
  55. int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
  56. #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  57. /* Wait until all page fault handlers using the mr complete. */
  58. synchronize_srcu(&dev->mr_srcu);
  59. #endif
  60. return err;
  61. }
  62. static int order2idx(struct mlx5_ib_dev *dev, int order)
  63. {
  64. struct mlx5_mr_cache *cache = &dev->cache;
  65. if (order < cache->ent[0].order)
  66. return 0;
  67. else
  68. return order - cache->ent[0].order;
  69. }
  70. static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length)
  71. {
  72. return ((u64)1 << mr->order) * MLX5_ADAPTER_PAGE_SIZE >=
  73. length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1));
  74. }
  75. #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  76. static void update_odp_mr(struct mlx5_ib_mr *mr)
  77. {
  78. if (mr->umem->odp_data) {
  79. /*
  80. * This barrier prevents the compiler from moving the
  81. * setting of umem->odp_data->private to point to our
  82. * MR, before reg_umr finished, to ensure that the MR
  83. * initialization have finished before starting to
  84. * handle invalidations.
  85. */
  86. smp_wmb();
  87. mr->umem->odp_data->private = mr;
  88. /*
  89. * Make sure we will see the new
  90. * umem->odp_data->private value in the invalidation
  91. * routines, before we can get page faults on the
  92. * MR. Page faults can happen once we put the MR in
  93. * the tree, below this line. Without the barrier,
  94. * there can be a fault handling and an invalidation
  95. * before umem->odp_data->private == mr is visible to
  96. * the invalidation handler.
  97. */
  98. smp_wmb();
  99. }
  100. }
  101. #endif
  102. static void reg_mr_callback(int status, void *context)
  103. {
  104. struct mlx5_ib_mr *mr = context;
  105. struct mlx5_ib_dev *dev = mr->dev;
  106. struct mlx5_mr_cache *cache = &dev->cache;
  107. int c = order2idx(dev, mr->order);
  108. struct mlx5_cache_ent *ent = &cache->ent[c];
  109. u8 key;
  110. unsigned long flags;
  111. struct mlx5_mkey_table *table = &dev->mdev->priv.mkey_table;
  112. int err;
  113. spin_lock_irqsave(&ent->lock, flags);
  114. ent->pending--;
  115. spin_unlock_irqrestore(&ent->lock, flags);
  116. if (status) {
  117. mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status);
  118. kfree(mr);
  119. dev->fill_delay = 1;
  120. mod_timer(&dev->delay_timer, jiffies + HZ);
  121. return;
  122. }
  123. mr->mmkey.type = MLX5_MKEY_MR;
  124. spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags);
  125. key = dev->mdev->priv.mkey_key++;
  126. spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags);
  127. mr->mmkey.key = mlx5_idx_to_mkey(MLX5_GET(create_mkey_out, mr->out, mkey_index)) | key;
  128. cache->last_add = jiffies;
  129. spin_lock_irqsave(&ent->lock, flags);
  130. list_add_tail(&mr->list, &ent->head);
  131. ent->cur++;
  132. ent->size++;
  133. spin_unlock_irqrestore(&ent->lock, flags);
  134. write_lock_irqsave(&table->lock, flags);
  135. err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmkey.key),
  136. &mr->mmkey);
  137. if (err)
  138. pr_err("Error inserting to mkey tree. 0x%x\n", -err);
  139. write_unlock_irqrestore(&table->lock, flags);
  140. if (!completion_done(&ent->compl))
  141. complete(&ent->compl);
  142. }
  143. static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
  144. {
  145. struct mlx5_mr_cache *cache = &dev->cache;
  146. struct mlx5_cache_ent *ent = &cache->ent[c];
  147. int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
  148. struct mlx5_ib_mr *mr;
  149. void *mkc;
  150. u32 *in;
  151. int err = 0;
  152. int i;
  153. in = kzalloc(inlen, GFP_KERNEL);
  154. if (!in)
  155. return -ENOMEM;
  156. mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
  157. for (i = 0; i < num; i++) {
  158. if (ent->pending >= MAX_PENDING_REG_MR) {
  159. err = -EAGAIN;
  160. break;
  161. }
  162. mr = kzalloc(sizeof(*mr), GFP_KERNEL);
  163. if (!mr) {
  164. err = -ENOMEM;
  165. break;
  166. }
  167. mr->order = ent->order;
  168. mr->allocated_from_cache = 1;
  169. mr->dev = dev;
  170. MLX5_SET(mkc, mkc, free, 1);
  171. MLX5_SET(mkc, mkc, umr_en, 1);
  172. MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3);
  173. MLX5_SET(mkc, mkc, access_mode_4_2,
  174. (ent->access_mode >> 2) & 0x7);
  175. MLX5_SET(mkc, mkc, qpn, 0xffffff);
  176. MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt);
  177. MLX5_SET(mkc, mkc, log_page_size, ent->page);
  178. spin_lock_irq(&ent->lock);
  179. ent->pending++;
  180. spin_unlock_irq(&ent->lock);
  181. err = mlx5_core_create_mkey_cb(dev->mdev, &mr->mmkey,
  182. in, inlen,
  183. mr->out, sizeof(mr->out),
  184. reg_mr_callback, mr);
  185. if (err) {
  186. spin_lock_irq(&ent->lock);
  187. ent->pending--;
  188. spin_unlock_irq(&ent->lock);
  189. mlx5_ib_warn(dev, "create mkey failed %d\n", err);
  190. kfree(mr);
  191. break;
  192. }
  193. }
  194. kfree(in);
  195. return err;
  196. }
  197. static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
  198. {
  199. struct mlx5_mr_cache *cache = &dev->cache;
  200. struct mlx5_cache_ent *ent = &cache->ent[c];
  201. struct mlx5_ib_mr *tmp_mr;
  202. struct mlx5_ib_mr *mr;
  203. LIST_HEAD(del_list);
  204. int i;
  205. for (i = 0; i < num; i++) {
  206. spin_lock_irq(&ent->lock);
  207. if (list_empty(&ent->head)) {
  208. spin_unlock_irq(&ent->lock);
  209. break;
  210. }
  211. mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
  212. list_move(&mr->list, &del_list);
  213. ent->cur--;
  214. ent->size--;
  215. spin_unlock_irq(&ent->lock);
  216. mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
  217. }
  218. #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  219. synchronize_srcu(&dev->mr_srcu);
  220. #endif
  221. list_for_each_entry_safe(mr, tmp_mr, &del_list, list) {
  222. list_del(&mr->list);
  223. kfree(mr);
  224. }
  225. }
  226. static ssize_t size_write(struct file *filp, const char __user *buf,
  227. size_t count, loff_t *pos)
  228. {
  229. struct mlx5_cache_ent *ent = filp->private_data;
  230. struct mlx5_ib_dev *dev = ent->dev;
  231. char lbuf[20] = {0};
  232. u32 var;
  233. int err;
  234. int c;
  235. count = min(count, sizeof(lbuf) - 1);
  236. if (copy_from_user(lbuf, buf, count))
  237. return -EFAULT;
  238. c = order2idx(dev, ent->order);
  239. if (sscanf(lbuf, "%u", &var) != 1)
  240. return -EINVAL;
  241. if (var < ent->limit)
  242. return -EINVAL;
  243. if (var > ent->size) {
  244. do {
  245. err = add_keys(dev, c, var - ent->size);
  246. if (err && err != -EAGAIN)
  247. return err;
  248. usleep_range(3000, 5000);
  249. } while (err);
  250. } else if (var < ent->size) {
  251. remove_keys(dev, c, ent->size - var);
  252. }
  253. return count;
  254. }
  255. static ssize_t size_read(struct file *filp, char __user *buf, size_t count,
  256. loff_t *pos)
  257. {
  258. struct mlx5_cache_ent *ent = filp->private_data;
  259. char lbuf[20];
  260. int err;
  261. err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size);
  262. if (err < 0)
  263. return err;
  264. return simple_read_from_buffer(buf, count, pos, lbuf, err);
  265. }
  266. static const struct file_operations size_fops = {
  267. .owner = THIS_MODULE,
  268. .open = simple_open,
  269. .write = size_write,
  270. .read = size_read,
  271. };
  272. static ssize_t limit_write(struct file *filp, const char __user *buf,
  273. size_t count, loff_t *pos)
  274. {
  275. struct mlx5_cache_ent *ent = filp->private_data;
  276. struct mlx5_ib_dev *dev = ent->dev;
  277. char lbuf[20] = {0};
  278. u32 var;
  279. int err;
  280. int c;
  281. count = min(count, sizeof(lbuf) - 1);
  282. if (copy_from_user(lbuf, buf, count))
  283. return -EFAULT;
  284. c = order2idx(dev, ent->order);
  285. if (sscanf(lbuf, "%u", &var) != 1)
  286. return -EINVAL;
  287. if (var > ent->size)
  288. return -EINVAL;
  289. ent->limit = var;
  290. if (ent->cur < ent->limit) {
  291. err = add_keys(dev, c, 2 * ent->limit - ent->cur);
  292. if (err)
  293. return err;
  294. }
  295. return count;
  296. }
  297. static ssize_t limit_read(struct file *filp, char __user *buf, size_t count,
  298. loff_t *pos)
  299. {
  300. struct mlx5_cache_ent *ent = filp->private_data;
  301. char lbuf[20];
  302. int err;
  303. err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit);
  304. if (err < 0)
  305. return err;
  306. return simple_read_from_buffer(buf, count, pos, lbuf, err);
  307. }
  308. static const struct file_operations limit_fops = {
  309. .owner = THIS_MODULE,
  310. .open = simple_open,
  311. .write = limit_write,
  312. .read = limit_read,
  313. };
  314. static int someone_adding(struct mlx5_mr_cache *cache)
  315. {
  316. int i;
  317. for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
  318. if (cache->ent[i].cur < cache->ent[i].limit)
  319. return 1;
  320. }
  321. return 0;
  322. }
  323. static void __cache_work_func(struct mlx5_cache_ent *ent)
  324. {
  325. struct mlx5_ib_dev *dev = ent->dev;
  326. struct mlx5_mr_cache *cache = &dev->cache;
  327. int i = order2idx(dev, ent->order);
  328. int err;
  329. if (cache->stopped)
  330. return;
  331. ent = &dev->cache.ent[i];
  332. if (ent->cur < 2 * ent->limit && !dev->fill_delay) {
  333. err = add_keys(dev, i, 1);
  334. if (ent->cur < 2 * ent->limit) {
  335. if (err == -EAGAIN) {
  336. mlx5_ib_dbg(dev, "returned eagain, order %d\n",
  337. i + 2);
  338. queue_delayed_work(cache->wq, &ent->dwork,
  339. msecs_to_jiffies(3));
  340. } else if (err) {
  341. mlx5_ib_warn(dev, "command failed order %d, err %d\n",
  342. i + 2, err);
  343. queue_delayed_work(cache->wq, &ent->dwork,
  344. msecs_to_jiffies(1000));
  345. } else {
  346. queue_work(cache->wq, &ent->work);
  347. }
  348. }
  349. } else if (ent->cur > 2 * ent->limit) {
  350. /*
  351. * The remove_keys() logic is performed as garbage collection
  352. * task. Such task is intended to be run when no other active
  353. * processes are running.
  354. *
  355. * The need_resched() will return TRUE if there are user tasks
  356. * to be activated in near future.
  357. *
  358. * In such case, we don't execute remove_keys() and postpone
  359. * the garbage collection work to try to run in next cycle,
  360. * in order to free CPU resources to other tasks.
  361. */
  362. if (!need_resched() && !someone_adding(cache) &&
  363. time_after(jiffies, cache->last_add + 300 * HZ)) {
  364. remove_keys(dev, i, 1);
  365. if (ent->cur > ent->limit)
  366. queue_work(cache->wq, &ent->work);
  367. } else {
  368. queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ);
  369. }
  370. }
  371. }
  372. static void delayed_cache_work_func(struct work_struct *work)
  373. {
  374. struct mlx5_cache_ent *ent;
  375. ent = container_of(work, struct mlx5_cache_ent, dwork.work);
  376. __cache_work_func(ent);
  377. }
  378. static void cache_work_func(struct work_struct *work)
  379. {
  380. struct mlx5_cache_ent *ent;
  381. ent = container_of(work, struct mlx5_cache_ent, work);
  382. __cache_work_func(ent);
  383. }
  384. struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry)
  385. {
  386. struct mlx5_mr_cache *cache = &dev->cache;
  387. struct mlx5_cache_ent *ent;
  388. struct mlx5_ib_mr *mr;
  389. int err;
  390. if (entry < 0 || entry >= MAX_MR_CACHE_ENTRIES) {
  391. mlx5_ib_err(dev, "cache entry %d is out of range\n", entry);
  392. return ERR_PTR(-EINVAL);
  393. }
  394. ent = &cache->ent[entry];
  395. while (1) {
  396. spin_lock_irq(&ent->lock);
  397. if (list_empty(&ent->head)) {
  398. spin_unlock_irq(&ent->lock);
  399. err = add_keys(dev, entry, 1);
  400. if (err && err != -EAGAIN)
  401. return ERR_PTR(err);
  402. wait_for_completion(&ent->compl);
  403. } else {
  404. mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
  405. list);
  406. list_del(&mr->list);
  407. ent->cur--;
  408. spin_unlock_irq(&ent->lock);
  409. if (ent->cur < ent->limit)
  410. queue_work(cache->wq, &ent->work);
  411. return mr;
  412. }
  413. }
  414. }
  415. static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
  416. {
  417. struct mlx5_mr_cache *cache = &dev->cache;
  418. struct mlx5_ib_mr *mr = NULL;
  419. struct mlx5_cache_ent *ent;
  420. int last_umr_cache_entry;
  421. int c;
  422. int i;
  423. c = order2idx(dev, order);
  424. last_umr_cache_entry = order2idx(dev, mr_cache_max_order(dev));
  425. if (c < 0 || c > last_umr_cache_entry) {
  426. mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c);
  427. return NULL;
  428. }
  429. for (i = c; i <= last_umr_cache_entry; i++) {
  430. ent = &cache->ent[i];
  431. mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
  432. spin_lock_irq(&ent->lock);
  433. if (!list_empty(&ent->head)) {
  434. mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
  435. list);
  436. list_del(&mr->list);
  437. ent->cur--;
  438. spin_unlock_irq(&ent->lock);
  439. if (ent->cur < ent->limit)
  440. queue_work(cache->wq, &ent->work);
  441. break;
  442. }
  443. spin_unlock_irq(&ent->lock);
  444. queue_work(cache->wq, &ent->work);
  445. }
  446. if (!mr)
  447. cache->ent[c].miss++;
  448. return mr;
  449. }
  450. void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
  451. {
  452. struct mlx5_mr_cache *cache = &dev->cache;
  453. struct mlx5_cache_ent *ent;
  454. int shrink = 0;
  455. int c;
  456. if (!mr->allocated_from_cache)
  457. return;
  458. c = order2idx(dev, mr->order);
  459. WARN_ON(c < 0 || c >= MAX_MR_CACHE_ENTRIES);
  460. if (unreg_umr(dev, mr)) {
  461. mr->allocated_from_cache = false;
  462. destroy_mkey(dev, mr);
  463. ent = &cache->ent[c];
  464. if (ent->cur < ent->limit)
  465. queue_work(cache->wq, &ent->work);
  466. return;
  467. }
  468. ent = &cache->ent[c];
  469. spin_lock_irq(&ent->lock);
  470. list_add_tail(&mr->list, &ent->head);
  471. ent->cur++;
  472. if (ent->cur > 2 * ent->limit)
  473. shrink = 1;
  474. spin_unlock_irq(&ent->lock);
  475. if (shrink)
  476. queue_work(cache->wq, &ent->work);
  477. }
  478. static void clean_keys(struct mlx5_ib_dev *dev, int c)
  479. {
  480. struct mlx5_mr_cache *cache = &dev->cache;
  481. struct mlx5_cache_ent *ent = &cache->ent[c];
  482. struct mlx5_ib_mr *tmp_mr;
  483. struct mlx5_ib_mr *mr;
  484. LIST_HEAD(del_list);
  485. cancel_delayed_work(&ent->dwork);
  486. while (1) {
  487. spin_lock_irq(&ent->lock);
  488. if (list_empty(&ent->head)) {
  489. spin_unlock_irq(&ent->lock);
  490. break;
  491. }
  492. mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
  493. list_move(&mr->list, &del_list);
  494. ent->cur--;
  495. ent->size--;
  496. spin_unlock_irq(&ent->lock);
  497. mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
  498. }
  499. #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  500. synchronize_srcu(&dev->mr_srcu);
  501. #endif
  502. list_for_each_entry_safe(mr, tmp_mr, &del_list, list) {
  503. list_del(&mr->list);
  504. kfree(mr);
  505. }
  506. }
  507. static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
  508. {
  509. if (!mlx5_debugfs_root || dev->rep)
  510. return;
  511. debugfs_remove_recursive(dev->cache.root);
  512. dev->cache.root = NULL;
  513. }
  514. static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
  515. {
  516. struct mlx5_mr_cache *cache = &dev->cache;
  517. struct mlx5_cache_ent *ent;
  518. int i;
  519. if (!mlx5_debugfs_root || dev->rep)
  520. return 0;
  521. cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root);
  522. if (!cache->root)
  523. return -ENOMEM;
  524. for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
  525. ent = &cache->ent[i];
  526. sprintf(ent->name, "%d", ent->order);
  527. ent->dir = debugfs_create_dir(ent->name, cache->root);
  528. if (!ent->dir)
  529. goto err;
  530. ent->fsize = debugfs_create_file("size", 0600, ent->dir, ent,
  531. &size_fops);
  532. if (!ent->fsize)
  533. goto err;
  534. ent->flimit = debugfs_create_file("limit", 0600, ent->dir, ent,
  535. &limit_fops);
  536. if (!ent->flimit)
  537. goto err;
  538. ent->fcur = debugfs_create_u32("cur", 0400, ent->dir,
  539. &ent->cur);
  540. if (!ent->fcur)
  541. goto err;
  542. ent->fmiss = debugfs_create_u32("miss", 0600, ent->dir,
  543. &ent->miss);
  544. if (!ent->fmiss)
  545. goto err;
  546. }
  547. return 0;
  548. err:
  549. mlx5_mr_cache_debugfs_cleanup(dev);
  550. return -ENOMEM;
  551. }
  552. static void delay_time_func(struct timer_list *t)
  553. {
  554. struct mlx5_ib_dev *dev = from_timer(dev, t, delay_timer);
  555. dev->fill_delay = 0;
  556. }
  557. int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
  558. {
  559. struct mlx5_mr_cache *cache = &dev->cache;
  560. struct mlx5_cache_ent *ent;
  561. int err;
  562. int i;
  563. mutex_init(&dev->slow_path_mutex);
  564. cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM);
  565. if (!cache->wq) {
  566. mlx5_ib_warn(dev, "failed to create work queue\n");
  567. return -ENOMEM;
  568. }
  569. timer_setup(&dev->delay_timer, delay_time_func, 0);
  570. for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
  571. ent = &cache->ent[i];
  572. INIT_LIST_HEAD(&ent->head);
  573. spin_lock_init(&ent->lock);
  574. ent->order = i + 2;
  575. ent->dev = dev;
  576. ent->limit = 0;
  577. init_completion(&ent->compl);
  578. INIT_WORK(&ent->work, cache_work_func);
  579. INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
  580. if (i > MR_CACHE_LAST_STD_ENTRY) {
  581. mlx5_odp_init_mr_cache_entry(ent);
  582. continue;
  583. }
  584. if (ent->order > mr_cache_max_order(dev))
  585. continue;
  586. ent->page = PAGE_SHIFT;
  587. ent->xlt = (1 << ent->order) * sizeof(struct mlx5_mtt) /
  588. MLX5_IB_UMR_OCTOWORD;
  589. ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
  590. if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) &&
  591. !dev->rep &&
  592. mlx5_core_is_pf(dev->mdev))
  593. ent->limit = dev->mdev->profile->mr_cache[i].limit;
  594. else
  595. ent->limit = 0;
  596. queue_work(cache->wq, &ent->work);
  597. }
  598. err = mlx5_mr_cache_debugfs_init(dev);
  599. if (err)
  600. mlx5_ib_warn(dev, "cache debugfs failure\n");
  601. /*
  602. * We don't want to fail driver if debugfs failed to initialize,
  603. * so we are not forwarding error to the user.
  604. */
  605. return 0;
  606. }
  607. static void wait_for_async_commands(struct mlx5_ib_dev *dev)
  608. {
  609. struct mlx5_mr_cache *cache = &dev->cache;
  610. struct mlx5_cache_ent *ent;
  611. int total = 0;
  612. int i;
  613. int j;
  614. for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
  615. ent = &cache->ent[i];
  616. for (j = 0 ; j < 1000; j++) {
  617. if (!ent->pending)
  618. break;
  619. msleep(50);
  620. }
  621. }
  622. for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
  623. ent = &cache->ent[i];
  624. total += ent->pending;
  625. }
  626. if (total)
  627. mlx5_ib_warn(dev, "aborted while there are %d pending mr requests\n", total);
  628. else
  629. mlx5_ib_warn(dev, "done with all pending requests\n");
  630. }
  631. int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
  632. {
  633. int i;
  634. if (!dev->cache.wq)
  635. return 0;
  636. dev->cache.stopped = 1;
  637. flush_workqueue(dev->cache.wq);
  638. mlx5_mr_cache_debugfs_cleanup(dev);
  639. for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++)
  640. clean_keys(dev, i);
  641. destroy_workqueue(dev->cache.wq);
  642. wait_for_async_commands(dev);
  643. del_timer_sync(&dev->delay_timer);
  644. return 0;
  645. }
  646. struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
  647. {
  648. struct mlx5_ib_dev *dev = to_mdev(pd->device);
  649. int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
  650. struct mlx5_core_dev *mdev = dev->mdev;
  651. struct mlx5_ib_mr *mr;
  652. void *mkc;
  653. u32 *in;
  654. int err;
  655. mr = kzalloc(sizeof(*mr), GFP_KERNEL);
  656. if (!mr)
  657. return ERR_PTR(-ENOMEM);
  658. in = kzalloc(inlen, GFP_KERNEL);
  659. if (!in) {
  660. err = -ENOMEM;
  661. goto err_free;
  662. }
  663. mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
  664. MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
  665. MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC));
  666. MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE));
  667. MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ));
  668. MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE));
  669. MLX5_SET(mkc, mkc, lr, 1);
  670. MLX5_SET(mkc, mkc, length64, 1);
  671. MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
  672. MLX5_SET(mkc, mkc, qpn, 0xffffff);
  673. MLX5_SET64(mkc, mkc, start_addr, 0);
  674. err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen);
  675. if (err)
  676. goto err_in;
  677. kfree(in);
  678. mr->mmkey.type = MLX5_MKEY_MR;
  679. mr->ibmr.lkey = mr->mmkey.key;
  680. mr->ibmr.rkey = mr->mmkey.key;
  681. mr->umem = NULL;
  682. return &mr->ibmr;
  683. err_in:
  684. kfree(in);
  685. err_free:
  686. kfree(mr);
  687. return ERR_PTR(err);
  688. }
  689. static int get_octo_len(u64 addr, u64 len, int page_shift)
  690. {
  691. u64 page_size = 1ULL << page_shift;
  692. u64 offset;
  693. int npages;
  694. offset = addr & (page_size - 1);
  695. npages = ALIGN(len + offset, page_size) >> page_shift;
  696. return (npages + 1) / 2;
  697. }
  698. static int mr_cache_max_order(struct mlx5_ib_dev *dev)
  699. {
  700. if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset))
  701. return MR_CACHE_LAST_STD_ENTRY + 2;
  702. return MLX5_MAX_UMR_SHIFT;
  703. }
  704. static int mr_umem_get(struct ib_pd *pd, u64 start, u64 length,
  705. int access_flags, struct ib_umem **umem,
  706. int *npages, int *page_shift, int *ncont,
  707. int *order)
  708. {
  709. struct mlx5_ib_dev *dev = to_mdev(pd->device);
  710. struct ib_umem *u;
  711. int err;
  712. *umem = NULL;
  713. u = ib_umem_get(pd->uobject->context, start, length, access_flags, 0);
  714. err = PTR_ERR_OR_ZERO(u);
  715. if (err) {
  716. mlx5_ib_dbg(dev, "umem get failed (%d)\n", err);
  717. return err;
  718. }
  719. mlx5_ib_cont_pages(u, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages,
  720. page_shift, ncont, order);
  721. if (!*npages) {
  722. mlx5_ib_warn(dev, "avoid zero region\n");
  723. ib_umem_release(u);
  724. return -EINVAL;
  725. }
  726. *umem = u;
  727. mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n",
  728. *npages, *ncont, *order, *page_shift);
  729. return 0;
  730. }
  731. static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc)
  732. {
  733. struct mlx5_ib_umr_context *context =
  734. container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe);
  735. context->status = wc->status;
  736. complete(&context->done);
  737. }
  738. static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context)
  739. {
  740. context->cqe.done = mlx5_ib_umr_done;
  741. context->status = -1;
  742. init_completion(&context->done);
  743. }
  744. static int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev,
  745. struct mlx5_umr_wr *umrwr)
  746. {
  747. struct umr_common *umrc = &dev->umrc;
  748. const struct ib_send_wr *bad;
  749. int err;
  750. struct mlx5_ib_umr_context umr_context;
  751. mlx5_ib_init_umr_context(&umr_context);
  752. umrwr->wr.wr_cqe = &umr_context.cqe;
  753. down(&umrc->sem);
  754. err = ib_post_send(umrc->qp, &umrwr->wr, &bad);
  755. if (err) {
  756. mlx5_ib_warn(dev, "UMR post send failed, err %d\n", err);
  757. } else {
  758. wait_for_completion(&umr_context.done);
  759. if (umr_context.status != IB_WC_SUCCESS) {
  760. mlx5_ib_warn(dev, "reg umr failed (%u)\n",
  761. umr_context.status);
  762. err = -EFAULT;
  763. }
  764. }
  765. up(&umrc->sem);
  766. return err;
  767. }
  768. static struct mlx5_ib_mr *alloc_mr_from_cache(
  769. struct ib_pd *pd, struct ib_umem *umem,
  770. u64 virt_addr, u64 len, int npages,
  771. int page_shift, int order, int access_flags)
  772. {
  773. struct mlx5_ib_dev *dev = to_mdev(pd->device);
  774. struct mlx5_ib_mr *mr;
  775. int err = 0;
  776. int i;
  777. for (i = 0; i < 1; i++) {
  778. mr = alloc_cached_mr(dev, order);
  779. if (mr)
  780. break;
  781. err = add_keys(dev, order2idx(dev, order), 1);
  782. if (err && err != -EAGAIN) {
  783. mlx5_ib_warn(dev, "add_keys failed, err %d\n", err);
  784. break;
  785. }
  786. }
  787. if (!mr)
  788. return ERR_PTR(-EAGAIN);
  789. mr->ibmr.pd = pd;
  790. mr->umem = umem;
  791. mr->access_flags = access_flags;
  792. mr->desc_size = sizeof(struct mlx5_mtt);
  793. mr->mmkey.iova = virt_addr;
  794. mr->mmkey.size = len;
  795. mr->mmkey.pd = to_mpd(pd)->pdn;
  796. return mr;
  797. }
  798. static inline int populate_xlt(struct mlx5_ib_mr *mr, int idx, int npages,
  799. void *xlt, int page_shift, size_t size,
  800. int flags)
  801. {
  802. struct mlx5_ib_dev *dev = mr->dev;
  803. struct ib_umem *umem = mr->umem;
  804. if (flags & MLX5_IB_UPD_XLT_INDIRECT) {
  805. if (!umr_can_use_indirect_mkey(dev))
  806. return -EPERM;
  807. mlx5_odp_populate_klm(xlt, idx, npages, mr, flags);
  808. return npages;
  809. }
  810. npages = min_t(size_t, npages, ib_umem_num_pages(umem) - idx);
  811. if (!(flags & MLX5_IB_UPD_XLT_ZAP)) {
  812. __mlx5_ib_populate_pas(dev, umem, page_shift,
  813. idx, npages, xlt,
  814. MLX5_IB_MTT_PRESENT);
  815. /* Clear padding after the pages
  816. * brought from the umem.
  817. */
  818. memset(xlt + (npages * sizeof(struct mlx5_mtt)), 0,
  819. size - npages * sizeof(struct mlx5_mtt));
  820. }
  821. return npages;
  822. }
  823. #define MLX5_MAX_UMR_CHUNK ((1 << (MLX5_MAX_UMR_SHIFT + 4)) - \
  824. MLX5_UMR_MTT_ALIGNMENT)
  825. #define MLX5_SPARE_UMR_CHUNK 0x10000
  826. int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
  827. int page_shift, int flags)
  828. {
  829. struct mlx5_ib_dev *dev = mr->dev;
  830. struct device *ddev = dev->ib_dev.dev.parent;
  831. int size;
  832. void *xlt;
  833. dma_addr_t dma;
  834. struct mlx5_umr_wr wr;
  835. struct ib_sge sg;
  836. int err = 0;
  837. int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT)
  838. ? sizeof(struct mlx5_klm)
  839. : sizeof(struct mlx5_mtt);
  840. const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size;
  841. const int page_mask = page_align - 1;
  842. size_t pages_mapped = 0;
  843. size_t pages_to_map = 0;
  844. size_t pages_iter = 0;
  845. gfp_t gfp;
  846. bool use_emergency_page = false;
  847. if ((flags & MLX5_IB_UPD_XLT_INDIRECT) &&
  848. !umr_can_use_indirect_mkey(dev))
  849. return -EPERM;
  850. /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,
  851. * so we need to align the offset and length accordingly
  852. */
  853. if (idx & page_mask) {
  854. npages += idx & page_mask;
  855. idx &= ~page_mask;
  856. }
  857. gfp = flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC : GFP_KERNEL;
  858. gfp |= __GFP_ZERO | __GFP_NOWARN;
  859. pages_to_map = ALIGN(npages, page_align);
  860. size = desc_size * pages_to_map;
  861. size = min_t(int, size, MLX5_MAX_UMR_CHUNK);
  862. xlt = (void *)__get_free_pages(gfp, get_order(size));
  863. if (!xlt && size > MLX5_SPARE_UMR_CHUNK) {
  864. mlx5_ib_dbg(dev, "Failed to allocate %d bytes of order %d. fallback to spare UMR allocation od %d bytes\n",
  865. size, get_order(size), MLX5_SPARE_UMR_CHUNK);
  866. size = MLX5_SPARE_UMR_CHUNK;
  867. xlt = (void *)__get_free_pages(gfp, get_order(size));
  868. }
  869. if (!xlt) {
  870. mlx5_ib_warn(dev, "Using XLT emergency buffer\n");
  871. xlt = (void *)mlx5_ib_get_xlt_emergency_page();
  872. size = PAGE_SIZE;
  873. memset(xlt, 0, size);
  874. use_emergency_page = true;
  875. }
  876. pages_iter = size / desc_size;
  877. dma = dma_map_single(ddev, xlt, size, DMA_TO_DEVICE);
  878. if (dma_mapping_error(ddev, dma)) {
  879. mlx5_ib_err(dev, "unable to map DMA during XLT update.\n");
  880. err = -ENOMEM;
  881. goto free_xlt;
  882. }
  883. sg.addr = dma;
  884. sg.lkey = dev->umrc.pd->local_dma_lkey;
  885. memset(&wr, 0, sizeof(wr));
  886. wr.wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT;
  887. if (!(flags & MLX5_IB_UPD_XLT_ENABLE))
  888. wr.wr.send_flags |= MLX5_IB_SEND_UMR_FAIL_IF_FREE;
  889. wr.wr.sg_list = &sg;
  890. wr.wr.num_sge = 1;
  891. wr.wr.opcode = MLX5_IB_WR_UMR;
  892. wr.pd = mr->ibmr.pd;
  893. wr.mkey = mr->mmkey.key;
  894. wr.length = mr->mmkey.size;
  895. wr.virt_addr = mr->mmkey.iova;
  896. wr.access_flags = mr->access_flags;
  897. wr.page_shift = page_shift;
  898. for (pages_mapped = 0;
  899. pages_mapped < pages_to_map && !err;
  900. pages_mapped += pages_iter, idx += pages_iter) {
  901. npages = min_t(int, pages_iter, pages_to_map - pages_mapped);
  902. dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE);
  903. npages = populate_xlt(mr, idx, npages, xlt,
  904. page_shift, size, flags);
  905. dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE);
  906. sg.length = ALIGN(npages * desc_size,
  907. MLX5_UMR_MTT_ALIGNMENT);
  908. if (pages_mapped + pages_iter >= pages_to_map) {
  909. if (flags & MLX5_IB_UPD_XLT_ENABLE)
  910. wr.wr.send_flags |=
  911. MLX5_IB_SEND_UMR_ENABLE_MR |
  912. MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS |
  913. MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
  914. if (flags & MLX5_IB_UPD_XLT_PD ||
  915. flags & MLX5_IB_UPD_XLT_ACCESS)
  916. wr.wr.send_flags |=
  917. MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
  918. if (flags & MLX5_IB_UPD_XLT_ADDR)
  919. wr.wr.send_flags |=
  920. MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
  921. }
  922. wr.offset = idx * desc_size;
  923. wr.xlt_size = sg.length;
  924. err = mlx5_ib_post_send_wait(dev, &wr);
  925. }
  926. dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
  927. free_xlt:
  928. if (use_emergency_page)
  929. mlx5_ib_put_xlt_emergency_page();
  930. else
  931. free_pages((unsigned long)xlt, get_order(size));
  932. return err;
  933. }
  934. /*
  935. * If ibmr is NULL it will be allocated by reg_create.
  936. * Else, the given ibmr will be used.
  937. */
  938. static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
  939. u64 virt_addr, u64 length,
  940. struct ib_umem *umem, int npages,
  941. int page_shift, int access_flags,
  942. bool populate)
  943. {
  944. struct mlx5_ib_dev *dev = to_mdev(pd->device);
  945. struct mlx5_ib_mr *mr;
  946. __be64 *pas;
  947. void *mkc;
  948. int inlen;
  949. u32 *in;
  950. int err;
  951. bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg));
  952. mr = ibmr ? to_mmr(ibmr) : kzalloc(sizeof(*mr), GFP_KERNEL);
  953. if (!mr)
  954. return ERR_PTR(-ENOMEM);
  955. mr->ibmr.pd = pd;
  956. mr->access_flags = access_flags;
  957. inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
  958. if (populate)
  959. inlen += sizeof(*pas) * roundup(npages, 2);
  960. in = kvzalloc(inlen, GFP_KERNEL);
  961. if (!in) {
  962. err = -ENOMEM;
  963. goto err_1;
  964. }
  965. pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
  966. if (populate && !(access_flags & IB_ACCESS_ON_DEMAND))
  967. mlx5_ib_populate_pas(dev, umem, page_shift, pas,
  968. pg_cap ? MLX5_IB_MTT_PRESENT : 0);
  969. /* The pg_access bit allows setting the access flags
  970. * in the page list submitted with the command. */
  971. MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap));
  972. mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
  973. MLX5_SET(mkc, mkc, free, !populate);
  974. MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
  975. MLX5_SET(mkc, mkc, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
  976. MLX5_SET(mkc, mkc, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE));
  977. MLX5_SET(mkc, mkc, rr, !!(access_flags & IB_ACCESS_REMOTE_READ));
  978. MLX5_SET(mkc, mkc, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE));
  979. MLX5_SET(mkc, mkc, lr, 1);
  980. MLX5_SET(mkc, mkc, umr_en, 1);
  981. MLX5_SET64(mkc, mkc, start_addr, virt_addr);
  982. MLX5_SET64(mkc, mkc, len, length);
  983. MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
  984. MLX5_SET(mkc, mkc, bsf_octword_size, 0);
  985. MLX5_SET(mkc, mkc, translations_octword_size,
  986. get_octo_len(virt_addr, length, page_shift));
  987. MLX5_SET(mkc, mkc, log_page_size, page_shift);
  988. MLX5_SET(mkc, mkc, qpn, 0xffffff);
  989. if (populate) {
  990. MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
  991. get_octo_len(virt_addr, length, page_shift));
  992. }
  993. err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen);
  994. if (err) {
  995. mlx5_ib_warn(dev, "create mkey failed\n");
  996. goto err_2;
  997. }
  998. mr->mmkey.type = MLX5_MKEY_MR;
  999. mr->desc_size = sizeof(struct mlx5_mtt);
  1000. mr->dev = dev;
  1001. kvfree(in);
  1002. mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key);
  1003. return mr;
  1004. err_2:
  1005. kvfree(in);
  1006. err_1:
  1007. if (!ibmr)
  1008. kfree(mr);
  1009. return ERR_PTR(err);
  1010. }
  1011. static void set_mr_fileds(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
  1012. int npages, u64 length, int access_flags)
  1013. {
  1014. mr->npages = npages;
  1015. atomic_add(npages, &dev->mdev->priv.reg_pages);
  1016. mr->ibmr.lkey = mr->mmkey.key;
  1017. mr->ibmr.rkey = mr->mmkey.key;
  1018. mr->ibmr.length = length;
  1019. mr->access_flags = access_flags;
  1020. }
  1021. static struct ib_mr *mlx5_ib_get_memic_mr(struct ib_pd *pd, u64 memic_addr,
  1022. u64 length, int acc)
  1023. {
  1024. struct mlx5_ib_dev *dev = to_mdev(pd->device);
  1025. int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
  1026. struct mlx5_core_dev *mdev = dev->mdev;
  1027. struct mlx5_ib_mr *mr;
  1028. void *mkc;
  1029. u32 *in;
  1030. int err;
  1031. mr = kzalloc(sizeof(*mr), GFP_KERNEL);
  1032. if (!mr)
  1033. return ERR_PTR(-ENOMEM);
  1034. in = kzalloc(inlen, GFP_KERNEL);
  1035. if (!in) {
  1036. err = -ENOMEM;
  1037. goto err_free;
  1038. }
  1039. mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
  1040. MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MEMIC & 0x3);
  1041. MLX5_SET(mkc, mkc, access_mode_4_2,
  1042. (MLX5_MKC_ACCESS_MODE_MEMIC >> 2) & 0x7);
  1043. MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC));
  1044. MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE));
  1045. MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ));
  1046. MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE));
  1047. MLX5_SET(mkc, mkc, lr, 1);
  1048. MLX5_SET64(mkc, mkc, len, length);
  1049. MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
  1050. MLX5_SET(mkc, mkc, qpn, 0xffffff);
  1051. MLX5_SET64(mkc, mkc, start_addr,
  1052. memic_addr - pci_resource_start(dev->mdev->pdev, 0));
  1053. err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen);
  1054. if (err)
  1055. goto err_in;
  1056. kfree(in);
  1057. mr->umem = NULL;
  1058. set_mr_fileds(dev, mr, 0, length, acc);
  1059. return &mr->ibmr;
  1060. err_in:
  1061. kfree(in);
  1062. err_free:
  1063. kfree(mr);
  1064. return ERR_PTR(err);
  1065. }
  1066. struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm,
  1067. struct ib_dm_mr_attr *attr,
  1068. struct uverbs_attr_bundle *attrs)
  1069. {
  1070. struct mlx5_ib_dm *mdm = to_mdm(dm);
  1071. u64 memic_addr;
  1072. if (attr->access_flags & ~MLX5_IB_DM_ALLOWED_ACCESS)
  1073. return ERR_PTR(-EINVAL);
  1074. memic_addr = mdm->dev_addr + attr->offset;
  1075. return mlx5_ib_get_memic_mr(pd, memic_addr, attr->length,
  1076. attr->access_flags);
  1077. }
  1078. struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
  1079. u64 virt_addr, int access_flags,
  1080. struct ib_udata *udata)
  1081. {
  1082. struct mlx5_ib_dev *dev = to_mdev(pd->device);
  1083. struct mlx5_ib_mr *mr = NULL;
  1084. bool use_umr;
  1085. struct ib_umem *umem;
  1086. int page_shift;
  1087. int npages;
  1088. int ncont;
  1089. int order;
  1090. int err;
  1091. if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM))
  1092. return ERR_PTR(-EOPNOTSUPP);
  1093. mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
  1094. start, virt_addr, length, access_flags);
  1095. #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  1096. if (!start && length == U64_MAX) {
  1097. if (!(access_flags & IB_ACCESS_ON_DEMAND) ||
  1098. !(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
  1099. return ERR_PTR(-EINVAL);
  1100. mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), access_flags);
  1101. if (IS_ERR(mr))
  1102. return ERR_CAST(mr);
  1103. return &mr->ibmr;
  1104. }
  1105. #endif
  1106. err = mr_umem_get(pd, start, length, access_flags, &umem, &npages,
  1107. &page_shift, &ncont, &order);
  1108. if (err < 0)
  1109. return ERR_PTR(err);
  1110. use_umr = !MLX5_CAP_GEN(dev->mdev, umr_modify_entity_size_disabled) &&
  1111. (!MLX5_CAP_GEN(dev->mdev, umr_modify_atomic_disabled) ||
  1112. !MLX5_CAP_GEN(dev->mdev, atomic));
  1113. if (order <= mr_cache_max_order(dev) && use_umr) {
  1114. mr = alloc_mr_from_cache(pd, umem, virt_addr, length, ncont,
  1115. page_shift, order, access_flags);
  1116. if (PTR_ERR(mr) == -EAGAIN) {
  1117. mlx5_ib_dbg(dev, "cache empty for order %d\n", order);
  1118. mr = NULL;
  1119. }
  1120. } else if (!MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) {
  1121. if (access_flags & IB_ACCESS_ON_DEMAND) {
  1122. err = -EINVAL;
  1123. pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB\n");
  1124. goto error;
  1125. }
  1126. use_umr = false;
  1127. }
  1128. if (!mr) {
  1129. mutex_lock(&dev->slow_path_mutex);
  1130. mr = reg_create(NULL, pd, virt_addr, length, umem, ncont,
  1131. page_shift, access_flags, !use_umr);
  1132. mutex_unlock(&dev->slow_path_mutex);
  1133. }
  1134. if (IS_ERR(mr)) {
  1135. err = PTR_ERR(mr);
  1136. goto error;
  1137. }
  1138. mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key);
  1139. mr->umem = umem;
  1140. set_mr_fileds(dev, mr, npages, length, access_flags);
  1141. #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  1142. update_odp_mr(mr);
  1143. #endif
  1144. if (use_umr) {
  1145. int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE;
  1146. if (access_flags & IB_ACCESS_ON_DEMAND)
  1147. update_xlt_flags |= MLX5_IB_UPD_XLT_ZAP;
  1148. err = mlx5_ib_update_xlt(mr, 0, ncont, page_shift,
  1149. update_xlt_flags);
  1150. if (err) {
  1151. dereg_mr(dev, mr);
  1152. return ERR_PTR(err);
  1153. }
  1154. }
  1155. #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  1156. mr->live = 1;
  1157. #endif
  1158. return &mr->ibmr;
  1159. error:
  1160. ib_umem_release(umem);
  1161. return ERR_PTR(err);
  1162. }
  1163. static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
  1164. {
  1165. struct mlx5_core_dev *mdev = dev->mdev;
  1166. struct mlx5_umr_wr umrwr = {};
  1167. if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
  1168. return 0;
  1169. umrwr.wr.send_flags = MLX5_IB_SEND_UMR_DISABLE_MR |
  1170. MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
  1171. umrwr.wr.opcode = MLX5_IB_WR_UMR;
  1172. umrwr.pd = dev->umrc.pd;
  1173. umrwr.mkey = mr->mmkey.key;
  1174. umrwr.ignore_free_state = 1;
  1175. return mlx5_ib_post_send_wait(dev, &umrwr);
  1176. }
  1177. static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr,
  1178. int access_flags, int flags)
  1179. {
  1180. struct mlx5_ib_dev *dev = to_mdev(pd->device);
  1181. struct mlx5_umr_wr umrwr = {};
  1182. int err;
  1183. umrwr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE;
  1184. umrwr.wr.opcode = MLX5_IB_WR_UMR;
  1185. umrwr.mkey = mr->mmkey.key;
  1186. if (flags & IB_MR_REREG_PD || flags & IB_MR_REREG_ACCESS) {
  1187. umrwr.pd = pd;
  1188. umrwr.access_flags = access_flags;
  1189. umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
  1190. }
  1191. err = mlx5_ib_post_send_wait(dev, &umrwr);
  1192. return err;
  1193. }
  1194. int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
  1195. u64 length, u64 virt_addr, int new_access_flags,
  1196. struct ib_pd *new_pd, struct ib_udata *udata)
  1197. {
  1198. struct mlx5_ib_dev *dev = to_mdev(ib_mr->device);
  1199. struct mlx5_ib_mr *mr = to_mmr(ib_mr);
  1200. struct ib_pd *pd = (flags & IB_MR_REREG_PD) ? new_pd : ib_mr->pd;
  1201. int access_flags = flags & IB_MR_REREG_ACCESS ?
  1202. new_access_flags :
  1203. mr->access_flags;
  1204. int page_shift = 0;
  1205. int upd_flags = 0;
  1206. int npages = 0;
  1207. int ncont = 0;
  1208. int order = 0;
  1209. u64 addr, len;
  1210. int err;
  1211. mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
  1212. start, virt_addr, length, access_flags);
  1213. atomic_sub(mr->npages, &dev->mdev->priv.reg_pages);
  1214. if (!mr->umem)
  1215. return -EINVAL;
  1216. if (flags & IB_MR_REREG_TRANS) {
  1217. addr = virt_addr;
  1218. len = length;
  1219. } else {
  1220. addr = mr->umem->address;
  1221. len = mr->umem->length;
  1222. }
  1223. if (flags != IB_MR_REREG_PD) {
  1224. /*
  1225. * Replace umem. This needs to be done whether or not UMR is
  1226. * used.
  1227. */
  1228. flags |= IB_MR_REREG_TRANS;
  1229. ib_umem_release(mr->umem);
  1230. mr->umem = NULL;
  1231. err = mr_umem_get(pd, addr, len, access_flags, &mr->umem,
  1232. &npages, &page_shift, &ncont, &order);
  1233. if (err)
  1234. goto err;
  1235. }
  1236. if (flags & IB_MR_REREG_TRANS && !use_umr_mtt_update(mr, addr, len)) {
  1237. /*
  1238. * UMR can't be used - MKey needs to be replaced.
  1239. */
  1240. if (mr->allocated_from_cache)
  1241. err = unreg_umr(dev, mr);
  1242. else
  1243. err = destroy_mkey(dev, mr);
  1244. if (err)
  1245. goto err;
  1246. mr = reg_create(ib_mr, pd, addr, len, mr->umem, ncont,
  1247. page_shift, access_flags, true);
  1248. if (IS_ERR(mr)) {
  1249. err = PTR_ERR(mr);
  1250. mr = to_mmr(ib_mr);
  1251. goto err;
  1252. }
  1253. mr->allocated_from_cache = 0;
  1254. #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  1255. mr->live = 1;
  1256. #endif
  1257. } else {
  1258. /*
  1259. * Send a UMR WQE
  1260. */
  1261. mr->ibmr.pd = pd;
  1262. mr->access_flags = access_flags;
  1263. mr->mmkey.iova = addr;
  1264. mr->mmkey.size = len;
  1265. mr->mmkey.pd = to_mpd(pd)->pdn;
  1266. if (flags & IB_MR_REREG_TRANS) {
  1267. upd_flags = MLX5_IB_UPD_XLT_ADDR;
  1268. if (flags & IB_MR_REREG_PD)
  1269. upd_flags |= MLX5_IB_UPD_XLT_PD;
  1270. if (flags & IB_MR_REREG_ACCESS)
  1271. upd_flags |= MLX5_IB_UPD_XLT_ACCESS;
  1272. err = mlx5_ib_update_xlt(mr, 0, npages, page_shift,
  1273. upd_flags);
  1274. } else {
  1275. err = rereg_umr(pd, mr, access_flags, flags);
  1276. }
  1277. if (err)
  1278. goto err;
  1279. }
  1280. set_mr_fileds(dev, mr, npages, len, access_flags);
  1281. #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  1282. update_odp_mr(mr);
  1283. #endif
  1284. return 0;
  1285. err:
  1286. if (mr->umem) {
  1287. ib_umem_release(mr->umem);
  1288. mr->umem = NULL;
  1289. }
  1290. clean_mr(dev, mr);
  1291. return err;
  1292. }
  1293. static int
  1294. mlx5_alloc_priv_descs(struct ib_device *device,
  1295. struct mlx5_ib_mr *mr,
  1296. int ndescs,
  1297. int desc_size)
  1298. {
  1299. int size = ndescs * desc_size;
  1300. int add_size;
  1301. int ret;
  1302. add_size = max_t(int, MLX5_UMR_ALIGN - ARCH_KMALLOC_MINALIGN, 0);
  1303. mr->descs_alloc = kzalloc(size + add_size, GFP_KERNEL);
  1304. if (!mr->descs_alloc)
  1305. return -ENOMEM;
  1306. mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN);
  1307. mr->desc_map = dma_map_single(device->dev.parent, mr->descs,
  1308. size, DMA_TO_DEVICE);
  1309. if (dma_mapping_error(device->dev.parent, mr->desc_map)) {
  1310. ret = -ENOMEM;
  1311. goto err;
  1312. }
  1313. return 0;
  1314. err:
  1315. kfree(mr->descs_alloc);
  1316. return ret;
  1317. }
  1318. static void
  1319. mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
  1320. {
  1321. if (mr->descs) {
  1322. struct ib_device *device = mr->ibmr.device;
  1323. int size = mr->max_descs * mr->desc_size;
  1324. dma_unmap_single(device->dev.parent, mr->desc_map,
  1325. size, DMA_TO_DEVICE);
  1326. kfree(mr->descs_alloc);
  1327. mr->descs = NULL;
  1328. }
  1329. }
  1330. static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
  1331. {
  1332. int allocated_from_cache = mr->allocated_from_cache;
  1333. if (mr->sig) {
  1334. if (mlx5_core_destroy_psv(dev->mdev,
  1335. mr->sig->psv_memory.psv_idx))
  1336. mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
  1337. mr->sig->psv_memory.psv_idx);
  1338. if (mlx5_core_destroy_psv(dev->mdev,
  1339. mr->sig->psv_wire.psv_idx))
  1340. mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
  1341. mr->sig->psv_wire.psv_idx);
  1342. kfree(mr->sig);
  1343. mr->sig = NULL;
  1344. }
  1345. if (!allocated_from_cache) {
  1346. destroy_mkey(dev, mr);
  1347. mlx5_free_priv_descs(mr);
  1348. }
  1349. }
  1350. static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
  1351. {
  1352. int npages = mr->npages;
  1353. struct ib_umem *umem = mr->umem;
  1354. #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  1355. if (umem && umem->odp_data) {
  1356. /* Prevent new page faults from succeeding */
  1357. mr->live = 0;
  1358. /* Wait for all running page-fault handlers to finish. */
  1359. synchronize_srcu(&dev->mr_srcu);
  1360. /* Destroy all page mappings */
  1361. if (umem->odp_data->page_list)
  1362. mlx5_ib_invalidate_range(umem, ib_umem_start(umem),
  1363. ib_umem_end(umem));
  1364. else
  1365. mlx5_ib_free_implicit_mr(mr);
  1366. /*
  1367. * We kill the umem before the MR for ODP,
  1368. * so that there will not be any invalidations in
  1369. * flight, looking at the *mr struct.
  1370. */
  1371. ib_umem_release(umem);
  1372. atomic_sub(npages, &dev->mdev->priv.reg_pages);
  1373. /* Avoid double-freeing the umem. */
  1374. umem = NULL;
  1375. }
  1376. #endif
  1377. clean_mr(dev, mr);
  1378. /*
  1379. * We should unregister the DMA address from the HCA before
  1380. * remove the DMA mapping.
  1381. */
  1382. mlx5_mr_cache_free(dev, mr);
  1383. if (umem) {
  1384. ib_umem_release(umem);
  1385. atomic_sub(npages, &dev->mdev->priv.reg_pages);
  1386. }
  1387. if (!mr->allocated_from_cache)
  1388. kfree(mr);
  1389. }
  1390. int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
  1391. {
  1392. dereg_mr(to_mdev(ibmr->device), to_mmr(ibmr));
  1393. return 0;
  1394. }
  1395. struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd,
  1396. enum ib_mr_type mr_type,
  1397. u32 max_num_sg)
  1398. {
  1399. struct mlx5_ib_dev *dev = to_mdev(pd->device);
  1400. int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
  1401. int ndescs = ALIGN(max_num_sg, 4);
  1402. struct mlx5_ib_mr *mr;
  1403. void *mkc;
  1404. u32 *in;
  1405. int err;
  1406. mr = kzalloc(sizeof(*mr), GFP_KERNEL);
  1407. if (!mr)
  1408. return ERR_PTR(-ENOMEM);
  1409. in = kzalloc(inlen, GFP_KERNEL);
  1410. if (!in) {
  1411. err = -ENOMEM;
  1412. goto err_free;
  1413. }
  1414. mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
  1415. MLX5_SET(mkc, mkc, free, 1);
  1416. MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
  1417. MLX5_SET(mkc, mkc, qpn, 0xffffff);
  1418. MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
  1419. if (mr_type == IB_MR_TYPE_MEM_REG) {
  1420. mr->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
  1421. MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT);
  1422. err = mlx5_alloc_priv_descs(pd->device, mr,
  1423. ndescs, sizeof(struct mlx5_mtt));
  1424. if (err)
  1425. goto err_free_in;
  1426. mr->desc_size = sizeof(struct mlx5_mtt);
  1427. mr->max_descs = ndescs;
  1428. } else if (mr_type == IB_MR_TYPE_SG_GAPS) {
  1429. mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS;
  1430. err = mlx5_alloc_priv_descs(pd->device, mr,
  1431. ndescs, sizeof(struct mlx5_klm));
  1432. if (err)
  1433. goto err_free_in;
  1434. mr->desc_size = sizeof(struct mlx5_klm);
  1435. mr->max_descs = ndescs;
  1436. } else if (mr_type == IB_MR_TYPE_SIGNATURE) {
  1437. u32 psv_index[2];
  1438. MLX5_SET(mkc, mkc, bsf_en, 1);
  1439. MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE);
  1440. mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL);
  1441. if (!mr->sig) {
  1442. err = -ENOMEM;
  1443. goto err_free_in;
  1444. }
  1445. /* create mem & wire PSVs */
  1446. err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn,
  1447. 2, psv_index);
  1448. if (err)
  1449. goto err_free_sig;
  1450. mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS;
  1451. mr->sig->psv_memory.psv_idx = psv_index[0];
  1452. mr->sig->psv_wire.psv_idx = psv_index[1];
  1453. mr->sig->sig_status_checked = true;
  1454. mr->sig->sig_err_exists = false;
  1455. /* Next UMR, Arm SIGERR */
  1456. ++mr->sig->sigerr_count;
  1457. } else {
  1458. mlx5_ib_warn(dev, "Invalid mr type %d\n", mr_type);
  1459. err = -EINVAL;
  1460. goto err_free_in;
  1461. }
  1462. MLX5_SET(mkc, mkc, access_mode_1_0, mr->access_mode & 0x3);
  1463. MLX5_SET(mkc, mkc, access_mode_4_2, (mr->access_mode >> 2) & 0x7);
  1464. MLX5_SET(mkc, mkc, umr_en, 1);
  1465. mr->ibmr.device = pd->device;
  1466. err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen);
  1467. if (err)
  1468. goto err_destroy_psv;
  1469. mr->mmkey.type = MLX5_MKEY_MR;
  1470. mr->ibmr.lkey = mr->mmkey.key;
  1471. mr->ibmr.rkey = mr->mmkey.key;
  1472. mr->umem = NULL;
  1473. kfree(in);
  1474. return &mr->ibmr;
  1475. err_destroy_psv:
  1476. if (mr->sig) {
  1477. if (mlx5_core_destroy_psv(dev->mdev,
  1478. mr->sig->psv_memory.psv_idx))
  1479. mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
  1480. mr->sig->psv_memory.psv_idx);
  1481. if (mlx5_core_destroy_psv(dev->mdev,
  1482. mr->sig->psv_wire.psv_idx))
  1483. mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
  1484. mr->sig->psv_wire.psv_idx);
  1485. }
  1486. mlx5_free_priv_descs(mr);
  1487. err_free_sig:
  1488. kfree(mr->sig);
  1489. err_free_in:
  1490. kfree(in);
  1491. err_free:
  1492. kfree(mr);
  1493. return ERR_PTR(err);
  1494. }
  1495. struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
  1496. struct ib_udata *udata)
  1497. {
  1498. struct mlx5_ib_dev *dev = to_mdev(pd->device);
  1499. int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
  1500. struct mlx5_ib_mw *mw = NULL;
  1501. u32 *in = NULL;
  1502. void *mkc;
  1503. int ndescs;
  1504. int err;
  1505. struct mlx5_ib_alloc_mw req = {};
  1506. struct {
  1507. __u32 comp_mask;
  1508. __u32 response_length;
  1509. } resp = {};
  1510. err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req)));
  1511. if (err)
  1512. return ERR_PTR(err);
  1513. if (req.comp_mask || req.reserved1 || req.reserved2)
  1514. return ERR_PTR(-EOPNOTSUPP);
  1515. if (udata->inlen > sizeof(req) &&
  1516. !ib_is_udata_cleared(udata, sizeof(req),
  1517. udata->inlen - sizeof(req)))
  1518. return ERR_PTR(-EOPNOTSUPP);
  1519. ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4);
  1520. mw = kzalloc(sizeof(*mw), GFP_KERNEL);
  1521. in = kzalloc(inlen, GFP_KERNEL);
  1522. if (!mw || !in) {
  1523. err = -ENOMEM;
  1524. goto free;
  1525. }
  1526. mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
  1527. MLX5_SET(mkc, mkc, free, 1);
  1528. MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
  1529. MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
  1530. MLX5_SET(mkc, mkc, umr_en, 1);
  1531. MLX5_SET(mkc, mkc, lr, 1);
  1532. MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_KLMS);
  1533. MLX5_SET(mkc, mkc, en_rinval, !!((type == IB_MW_TYPE_2)));
  1534. MLX5_SET(mkc, mkc, qpn, 0xffffff);
  1535. err = mlx5_core_create_mkey(dev->mdev, &mw->mmkey, in, inlen);
  1536. if (err)
  1537. goto free;
  1538. mw->mmkey.type = MLX5_MKEY_MW;
  1539. mw->ibmw.rkey = mw->mmkey.key;
  1540. mw->ndescs = ndescs;
  1541. resp.response_length = min(offsetof(typeof(resp), response_length) +
  1542. sizeof(resp.response_length), udata->outlen);
  1543. if (resp.response_length) {
  1544. err = ib_copy_to_udata(udata, &resp, resp.response_length);
  1545. if (err) {
  1546. mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey);
  1547. goto free;
  1548. }
  1549. }
  1550. kfree(in);
  1551. return &mw->ibmw;
  1552. free:
  1553. kfree(mw);
  1554. kfree(in);
  1555. return ERR_PTR(err);
  1556. }
  1557. int mlx5_ib_dealloc_mw(struct ib_mw *mw)
  1558. {
  1559. struct mlx5_ib_mw *mmw = to_mmw(mw);
  1560. int err;
  1561. err = mlx5_core_destroy_mkey((to_mdev(mw->device))->mdev,
  1562. &mmw->mmkey);
  1563. if (!err)
  1564. kfree(mmw);
  1565. return err;
  1566. }
  1567. int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
  1568. struct ib_mr_status *mr_status)
  1569. {
  1570. struct mlx5_ib_mr *mmr = to_mmr(ibmr);
  1571. int ret = 0;
  1572. if (check_mask & ~IB_MR_CHECK_SIG_STATUS) {
  1573. pr_err("Invalid status check mask\n");
  1574. ret = -EINVAL;
  1575. goto done;
  1576. }
  1577. mr_status->fail_status = 0;
  1578. if (check_mask & IB_MR_CHECK_SIG_STATUS) {
  1579. if (!mmr->sig) {
  1580. ret = -EINVAL;
  1581. pr_err("signature status check requested on a non-signature enabled MR\n");
  1582. goto done;
  1583. }
  1584. mmr->sig->sig_status_checked = true;
  1585. if (!mmr->sig->sig_err_exists)
  1586. goto done;
  1587. if (ibmr->lkey == mmr->sig->err_item.key)
  1588. memcpy(&mr_status->sig_err, &mmr->sig->err_item,
  1589. sizeof(mr_status->sig_err));
  1590. else {
  1591. mr_status->sig_err.err_type = IB_SIG_BAD_GUARD;
  1592. mr_status->sig_err.sig_err_offset = 0;
  1593. mr_status->sig_err.key = mmr->sig->err_item.key;
  1594. }
  1595. mmr->sig->sig_err_exists = false;
  1596. mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS;
  1597. }
  1598. done:
  1599. return ret;
  1600. }
  1601. static int
  1602. mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr,
  1603. struct scatterlist *sgl,
  1604. unsigned short sg_nents,
  1605. unsigned int *sg_offset_p)
  1606. {
  1607. struct scatterlist *sg = sgl;
  1608. struct mlx5_klm *klms = mr->descs;
  1609. unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
  1610. u32 lkey = mr->ibmr.pd->local_dma_lkey;
  1611. int i;
  1612. mr->ibmr.iova = sg_dma_address(sg) + sg_offset;
  1613. mr->ibmr.length = 0;
  1614. for_each_sg(sgl, sg, sg_nents, i) {
  1615. if (unlikely(i >= mr->max_descs))
  1616. break;
  1617. klms[i].va = cpu_to_be64(sg_dma_address(sg) + sg_offset);
  1618. klms[i].bcount = cpu_to_be32(sg_dma_len(sg) - sg_offset);
  1619. klms[i].key = cpu_to_be32(lkey);
  1620. mr->ibmr.length += sg_dma_len(sg) - sg_offset;
  1621. sg_offset = 0;
  1622. }
  1623. mr->ndescs = i;
  1624. if (sg_offset_p)
  1625. *sg_offset_p = sg_offset;
  1626. return i;
  1627. }
  1628. static int mlx5_set_page(struct ib_mr *ibmr, u64 addr)
  1629. {
  1630. struct mlx5_ib_mr *mr = to_mmr(ibmr);
  1631. __be64 *descs;
  1632. if (unlikely(mr->ndescs == mr->max_descs))
  1633. return -ENOMEM;
  1634. descs = mr->descs;
  1635. descs[mr->ndescs++] = cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR);
  1636. return 0;
  1637. }
  1638. int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
  1639. unsigned int *sg_offset)
  1640. {
  1641. struct mlx5_ib_mr *mr = to_mmr(ibmr);
  1642. int n;
  1643. mr->ndescs = 0;
  1644. ib_dma_sync_single_for_cpu(ibmr->device, mr->desc_map,
  1645. mr->desc_size * mr->max_descs,
  1646. DMA_TO_DEVICE);
  1647. if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS)
  1648. n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset);
  1649. else
  1650. n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset,
  1651. mlx5_set_page);
  1652. ib_dma_sync_single_for_device(ibmr->device, mr->desc_map,
  1653. mr->desc_size * mr->max_descs,
  1654. DMA_TO_DEVICE);
  1655. return n;
  1656. }