pagealloc.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581
  1. /*
  2. * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
  3. *
  4. * This software is available to you under a choice of one of two
  5. * licenses. You may choose to be licensed under the terms of the GNU
  6. * General Public License (GPL) Version 2, available from the file
  7. * COPYING in the main directory of this source tree, or the
  8. * OpenIB.org BSD license below:
  9. *
  10. * Redistribution and use in source and binary forms, with or
  11. * without modification, are permitted provided that the following
  12. * conditions are met:
  13. *
  14. * - Redistributions of source code must retain the above
  15. * copyright notice, this list of conditions and the following
  16. * disclaimer.
  17. *
  18. * - Redistributions in binary form must reproduce the above
  19. * copyright notice, this list of conditions and the following
  20. * disclaimer in the documentation and/or other materials
  21. * provided with the distribution.
  22. *
  23. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26. * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27. * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28. * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29. * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30. * SOFTWARE.
  31. */
  32. #include <linux/highmem.h>
  33. #include <linux/kernel.h>
  34. #include <linux/module.h>
  35. #include <linux/delay.h>
  36. #include <linux/mlx5/driver.h>
  37. #include <linux/mlx5/cmd.h>
  38. #include "mlx5_core.h"
  39. enum {
  40. MLX5_PAGES_CANT_GIVE = 0,
  41. MLX5_PAGES_GIVE = 1,
  42. MLX5_PAGES_TAKE = 2
  43. };
  44. struct mlx5_pages_req {
  45. struct mlx5_core_dev *dev;
  46. u16 func_id;
  47. s32 npages;
  48. struct work_struct work;
  49. };
  50. struct fw_page {
  51. struct rb_node rb_node;
  52. u64 addr;
  53. struct page *page;
  54. u16 func_id;
  55. unsigned long bitmask;
  56. struct list_head list;
  57. unsigned free_count;
  58. };
  59. enum {
  60. MAX_RECLAIM_TIME_MSECS = 5000,
  61. MAX_RECLAIM_VFS_PAGES_TIME_MSECS = 2 * 1000 * 60,
  62. };
  63. enum {
  64. MLX5_MAX_RECLAIM_TIME_MILI = 5000,
  65. MLX5_NUM_4K_IN_PAGE = PAGE_SIZE / MLX5_ADAPTER_PAGE_SIZE,
  66. };
  67. static int insert_page(struct mlx5_core_dev *dev, u64 addr, struct page *page, u16 func_id)
  68. {
  69. struct rb_root *root = &dev->priv.page_root;
  70. struct rb_node **new = &root->rb_node;
  71. struct rb_node *parent = NULL;
  72. struct fw_page *nfp;
  73. struct fw_page *tfp;
  74. int i;
  75. while (*new) {
  76. parent = *new;
  77. tfp = rb_entry(parent, struct fw_page, rb_node);
  78. if (tfp->addr < addr)
  79. new = &parent->rb_left;
  80. else if (tfp->addr > addr)
  81. new = &parent->rb_right;
  82. else
  83. return -EEXIST;
  84. }
  85. nfp = kzalloc(sizeof(*nfp), GFP_KERNEL);
  86. if (!nfp)
  87. return -ENOMEM;
  88. nfp->addr = addr;
  89. nfp->page = page;
  90. nfp->func_id = func_id;
  91. nfp->free_count = MLX5_NUM_4K_IN_PAGE;
  92. for (i = 0; i < MLX5_NUM_4K_IN_PAGE; i++)
  93. set_bit(i, &nfp->bitmask);
  94. rb_link_node(&nfp->rb_node, parent, new);
  95. rb_insert_color(&nfp->rb_node, root);
  96. list_add(&nfp->list, &dev->priv.free_list);
  97. return 0;
  98. }
  99. static struct fw_page *find_fw_page(struct mlx5_core_dev *dev, u64 addr)
  100. {
  101. struct rb_root *root = &dev->priv.page_root;
  102. struct rb_node *tmp = root->rb_node;
  103. struct fw_page *result = NULL;
  104. struct fw_page *tfp;
  105. while (tmp) {
  106. tfp = rb_entry(tmp, struct fw_page, rb_node);
  107. if (tfp->addr < addr) {
  108. tmp = tmp->rb_left;
  109. } else if (tfp->addr > addr) {
  110. tmp = tmp->rb_right;
  111. } else {
  112. result = tfp;
  113. break;
  114. }
  115. }
  116. return result;
  117. }
  118. static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id,
  119. s32 *npages, int boot)
  120. {
  121. u32 out[MLX5_ST_SZ_DW(query_pages_out)] = {0};
  122. u32 in[MLX5_ST_SZ_DW(query_pages_in)] = {0};
  123. int err;
  124. MLX5_SET(query_pages_in, in, opcode, MLX5_CMD_OP_QUERY_PAGES);
  125. MLX5_SET(query_pages_in, in, op_mod, boot ?
  126. MLX5_QUERY_PAGES_IN_OP_MOD_BOOT_PAGES :
  127. MLX5_QUERY_PAGES_IN_OP_MOD_INIT_PAGES);
  128. err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
  129. if (err)
  130. return err;
  131. *npages = MLX5_GET(query_pages_out, out, num_pages);
  132. *func_id = MLX5_GET(query_pages_out, out, function_id);
  133. return err;
  134. }
  135. static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr)
  136. {
  137. struct fw_page *fp;
  138. unsigned n;
  139. if (list_empty(&dev->priv.free_list))
  140. return -ENOMEM;
  141. fp = list_entry(dev->priv.free_list.next, struct fw_page, list);
  142. n = find_first_bit(&fp->bitmask, 8 * sizeof(fp->bitmask));
  143. if (n >= MLX5_NUM_4K_IN_PAGE) {
  144. mlx5_core_warn(dev, "alloc 4k bug\n");
  145. return -ENOENT;
  146. }
  147. clear_bit(n, &fp->bitmask);
  148. fp->free_count--;
  149. if (!fp->free_count)
  150. list_del(&fp->list);
  151. *addr = fp->addr + n * MLX5_ADAPTER_PAGE_SIZE;
  152. return 0;
  153. }
  154. #define MLX5_U64_4K_PAGE_MASK ((~(u64)0U) << PAGE_SHIFT)
  155. static void free_4k(struct mlx5_core_dev *dev, u64 addr)
  156. {
  157. struct fw_page *fwp;
  158. int n;
  159. fwp = find_fw_page(dev, addr & MLX5_U64_4K_PAGE_MASK);
  160. if (!fwp) {
  161. mlx5_core_warn(dev, "page not found\n");
  162. return;
  163. }
  164. n = (addr & ~MLX5_U64_4K_PAGE_MASK) >> MLX5_ADAPTER_PAGE_SHIFT;
  165. fwp->free_count++;
  166. set_bit(n, &fwp->bitmask);
  167. if (fwp->free_count == MLX5_NUM_4K_IN_PAGE) {
  168. rb_erase(&fwp->rb_node, &dev->priv.page_root);
  169. if (fwp->free_count != 1)
  170. list_del(&fwp->list);
  171. dma_unmap_page(&dev->pdev->dev, addr & MLX5_U64_4K_PAGE_MASK,
  172. PAGE_SIZE, DMA_BIDIRECTIONAL);
  173. __free_page(fwp->page);
  174. kfree(fwp);
  175. } else if (fwp->free_count == 1) {
  176. list_add(&fwp->list, &dev->priv.free_list);
  177. }
  178. }
  179. static int alloc_system_page(struct mlx5_core_dev *dev, u16 func_id)
  180. {
  181. struct page *page;
  182. u64 zero_addr = 1;
  183. u64 addr;
  184. int err;
  185. int nid = dev_to_node(&dev->pdev->dev);
  186. page = alloc_pages_node(nid, GFP_HIGHUSER, 0);
  187. if (!page) {
  188. mlx5_core_warn(dev, "failed to allocate page\n");
  189. return -ENOMEM;
  190. }
  191. map:
  192. addr = dma_map_page(&dev->pdev->dev, page, 0,
  193. PAGE_SIZE, DMA_BIDIRECTIONAL);
  194. if (dma_mapping_error(&dev->pdev->dev, addr)) {
  195. mlx5_core_warn(dev, "failed dma mapping page\n");
  196. err = -ENOMEM;
  197. goto err_mapping;
  198. }
  199. /* Firmware doesn't support page with physical address 0 */
  200. if (addr == 0) {
  201. zero_addr = addr;
  202. goto map;
  203. }
  204. err = insert_page(dev, addr, page, func_id);
  205. if (err) {
  206. mlx5_core_err(dev, "failed to track allocated page\n");
  207. dma_unmap_page(&dev->pdev->dev, addr, PAGE_SIZE,
  208. DMA_BIDIRECTIONAL);
  209. }
  210. err_mapping:
  211. if (err)
  212. __free_page(page);
  213. if (zero_addr == 0)
  214. dma_unmap_page(&dev->pdev->dev, zero_addr, PAGE_SIZE,
  215. DMA_BIDIRECTIONAL);
  216. return err;
  217. }
  218. static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id)
  219. {
  220. u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0};
  221. u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {0};
  222. int err;
  223. MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES);
  224. MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_CANT_GIVE);
  225. MLX5_SET(manage_pages_in, in, function_id, func_id);
  226. err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
  227. if (err)
  228. mlx5_core_warn(dev, "page notify failed func_id(%d) err(%d)\n",
  229. func_id, err);
  230. }
  231. static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
  232. int notify_fail)
  233. {
  234. u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0};
  235. int inlen = MLX5_ST_SZ_BYTES(manage_pages_in);
  236. u64 addr;
  237. int err;
  238. u32 *in;
  239. int i;
  240. inlen += npages * MLX5_FLD_SZ_BYTES(manage_pages_in, pas[0]);
  241. in = mlx5_vzalloc(inlen);
  242. if (!in) {
  243. err = -ENOMEM;
  244. mlx5_core_warn(dev, "vzalloc failed %d\n", inlen);
  245. goto out_free;
  246. }
  247. for (i = 0; i < npages; i++) {
  248. retry:
  249. err = alloc_4k(dev, &addr);
  250. if (err) {
  251. if (err == -ENOMEM)
  252. err = alloc_system_page(dev, func_id);
  253. if (err)
  254. goto out_4k;
  255. goto retry;
  256. }
  257. MLX5_ARRAY_SET64(manage_pages_in, in, pas, i, addr);
  258. }
  259. MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES);
  260. MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_GIVE);
  261. MLX5_SET(manage_pages_in, in, function_id, func_id);
  262. MLX5_SET(manage_pages_in, in, input_num_entries, npages);
  263. err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
  264. if (err) {
  265. mlx5_core_warn(dev, "func_id 0x%x, npages %d, err %d\n",
  266. func_id, npages, err);
  267. goto out_4k;
  268. }
  269. dev->priv.fw_pages += npages;
  270. if (func_id)
  271. dev->priv.vfs_pages += npages;
  272. mlx5_core_dbg(dev, "err %d\n", err);
  273. kvfree(in);
  274. return 0;
  275. out_4k:
  276. for (i--; i >= 0; i--)
  277. free_4k(dev, MLX5_GET64(manage_pages_in, in, pas[i]));
  278. out_free:
  279. kvfree(in);
  280. if (notify_fail)
  281. page_notify_fail(dev, func_id);
  282. return err;
  283. }
  284. static int reclaim_pages_cmd(struct mlx5_core_dev *dev,
  285. u32 *in, int in_size, u32 *out, int out_size)
  286. {
  287. struct fw_page *fwp;
  288. struct rb_node *p;
  289. u32 func_id;
  290. u32 npages;
  291. u32 i = 0;
  292. if (dev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR)
  293. return mlx5_cmd_exec(dev, in, in_size, out, out_size);
  294. /* No hard feelings, we want our pages back! */
  295. npages = MLX5_GET(manage_pages_in, in, input_num_entries);
  296. func_id = MLX5_GET(manage_pages_in, in, function_id);
  297. p = rb_first(&dev->priv.page_root);
  298. while (p && i < npages) {
  299. fwp = rb_entry(p, struct fw_page, rb_node);
  300. p = rb_next(p);
  301. if (fwp->func_id != func_id)
  302. continue;
  303. MLX5_ARRAY_SET64(manage_pages_out, out, pas, i, fwp->addr);
  304. i++;
  305. }
  306. MLX5_SET(manage_pages_out, out, output_num_entries, i);
  307. return 0;
  308. }
  309. static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
  310. int *nclaimed)
  311. {
  312. int outlen = MLX5_ST_SZ_BYTES(manage_pages_out);
  313. u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {0};
  314. int num_claimed;
  315. u32 *out;
  316. int err;
  317. int i;
  318. if (nclaimed)
  319. *nclaimed = 0;
  320. outlen += npages * MLX5_FLD_SZ_BYTES(manage_pages_out, pas[0]);
  321. out = mlx5_vzalloc(outlen);
  322. if (!out)
  323. return -ENOMEM;
  324. MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES);
  325. MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_TAKE);
  326. MLX5_SET(manage_pages_in, in, function_id, func_id);
  327. MLX5_SET(manage_pages_in, in, input_num_entries, npages);
  328. mlx5_core_dbg(dev, "npages %d, outlen %d\n", npages, outlen);
  329. err = reclaim_pages_cmd(dev, in, sizeof(in), out, outlen);
  330. if (err) {
  331. mlx5_core_err(dev, "failed reclaiming pages: err %d\n", err);
  332. goto out_free;
  333. }
  334. num_claimed = MLX5_GET(manage_pages_out, out, output_num_entries);
  335. if (num_claimed > npages) {
  336. mlx5_core_warn(dev, "fw returned %d, driver asked %d => corruption\n",
  337. num_claimed, npages);
  338. err = -EINVAL;
  339. goto out_free;
  340. }
  341. for (i = 0; i < num_claimed; i++)
  342. free_4k(dev, MLX5_GET64(manage_pages_out, out, pas[i]));
  343. if (nclaimed)
  344. *nclaimed = num_claimed;
  345. dev->priv.fw_pages -= num_claimed;
  346. if (func_id)
  347. dev->priv.vfs_pages -= num_claimed;
  348. out_free:
  349. kvfree(out);
  350. return err;
  351. }
  352. static void pages_work_handler(struct work_struct *work)
  353. {
  354. struct mlx5_pages_req *req = container_of(work, struct mlx5_pages_req, work);
  355. struct mlx5_core_dev *dev = req->dev;
  356. int err = 0;
  357. if (req->npages < 0)
  358. err = reclaim_pages(dev, req->func_id, -1 * req->npages, NULL);
  359. else if (req->npages > 0)
  360. err = give_pages(dev, req->func_id, req->npages, 1);
  361. if (err)
  362. mlx5_core_warn(dev, "%s fail %d\n",
  363. req->npages < 0 ? "reclaim" : "give", err);
  364. kfree(req);
  365. }
  366. void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id,
  367. s32 npages)
  368. {
  369. struct mlx5_pages_req *req;
  370. req = kzalloc(sizeof(*req), GFP_ATOMIC);
  371. if (!req) {
  372. mlx5_core_warn(dev, "failed to allocate pages request\n");
  373. return;
  374. }
  375. req->dev = dev;
  376. req->func_id = func_id;
  377. req->npages = npages;
  378. INIT_WORK(&req->work, pages_work_handler);
  379. queue_work(dev->priv.pg_wq, &req->work);
  380. }
  381. int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot)
  382. {
  383. u16 uninitialized_var(func_id);
  384. s32 uninitialized_var(npages);
  385. int err;
  386. err = mlx5_cmd_query_pages(dev, &func_id, &npages, boot);
  387. if (err)
  388. return err;
  389. mlx5_core_dbg(dev, "requested %d %s pages for func_id 0x%x\n",
  390. npages, boot ? "boot" : "init", func_id);
  391. return give_pages(dev, func_id, npages, 0);
  392. }
  393. enum {
  394. MLX5_BLKS_FOR_RECLAIM_PAGES = 12
  395. };
  396. static int optimal_reclaimed_pages(void)
  397. {
  398. struct mlx5_cmd_prot_block *block;
  399. struct mlx5_cmd_layout *lay;
  400. int ret;
  401. ret = (sizeof(lay->out) + MLX5_BLKS_FOR_RECLAIM_PAGES * sizeof(block->data) -
  402. MLX5_ST_SZ_BYTES(manage_pages_out)) /
  403. MLX5_FLD_SZ_BYTES(manage_pages_out, pas[0]);
  404. return ret;
  405. }
  406. int mlx5_reclaim_startup_pages(struct mlx5_core_dev *dev)
  407. {
  408. unsigned long end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS);
  409. struct fw_page *fwp;
  410. struct rb_node *p;
  411. int nclaimed = 0;
  412. int err = 0;
  413. do {
  414. p = rb_first(&dev->priv.page_root);
  415. if (p) {
  416. fwp = rb_entry(p, struct fw_page, rb_node);
  417. err = reclaim_pages(dev, fwp->func_id,
  418. optimal_reclaimed_pages(),
  419. &nclaimed);
  420. if (err) {
  421. mlx5_core_warn(dev, "failed reclaiming pages (%d)\n",
  422. err);
  423. return err;
  424. }
  425. if (nclaimed)
  426. end = jiffies + msecs_to_jiffies(MAX_RECLAIM_TIME_MSECS);
  427. }
  428. if (time_after(jiffies, end)) {
  429. mlx5_core_warn(dev, "FW did not return all pages. giving up...\n");
  430. break;
  431. }
  432. } while (p);
  433. WARN(dev->priv.fw_pages,
  434. "FW pages counter is %d after reclaiming all pages\n",
  435. dev->priv.fw_pages);
  436. WARN(dev->priv.vfs_pages,
  437. "VFs FW pages counter is %d after reclaiming all pages\n",
  438. dev->priv.vfs_pages);
  439. return 0;
  440. }
  441. void mlx5_pagealloc_init(struct mlx5_core_dev *dev)
  442. {
  443. dev->priv.page_root = RB_ROOT;
  444. INIT_LIST_HEAD(&dev->priv.free_list);
  445. }
  446. void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev)
  447. {
  448. /* nothing */
  449. }
  450. int mlx5_pagealloc_start(struct mlx5_core_dev *dev)
  451. {
  452. dev->priv.pg_wq = create_singlethread_workqueue("mlx5_page_allocator");
  453. if (!dev->priv.pg_wq)
  454. return -ENOMEM;
  455. return 0;
  456. }
  457. void mlx5_pagealloc_stop(struct mlx5_core_dev *dev)
  458. {
  459. destroy_workqueue(dev->priv.pg_wq);
  460. }
  461. int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev)
  462. {
  463. unsigned long end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS);
  464. int prev_vfs_pages = dev->priv.vfs_pages;
  465. /* In case of internal error we will free the pages manually later */
  466. if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
  467. mlx5_core_warn(dev, "Skipping wait for vf pages stage");
  468. return 0;
  469. }
  470. mlx5_core_dbg(dev, "Waiting for %d pages from %s\n", prev_vfs_pages,
  471. dev->priv.name);
  472. while (dev->priv.vfs_pages) {
  473. if (time_after(jiffies, end)) {
  474. mlx5_core_warn(dev, "aborting while there are %d pending pages\n", dev->priv.vfs_pages);
  475. return -ETIMEDOUT;
  476. }
  477. if (dev->priv.vfs_pages < prev_vfs_pages) {
  478. end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS);
  479. prev_vfs_pages = dev->priv.vfs_pages;
  480. }
  481. msleep(50);
  482. }
  483. mlx5_core_dbg(dev, "All pages received from %s\n", dev->priv.name);
  484. return 0;
  485. }