eq.c 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992
  1. /*
  2. * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
  3. *
  4. * This software is available to you under a choice of one of two
  5. * licenses. You may choose to be licensed under the terms of the GNU
  6. * General Public License (GPL) Version 2, available from the file
  7. * COPYING in the main directory of this source tree, or the
  8. * OpenIB.org BSD license below:
  9. *
  10. * Redistribution and use in source and binary forms, with or
  11. * without modification, are permitted provided that the following
  12. * conditions are met:
  13. *
  14. * - Redistributions of source code must retain the above
  15. * copyright notice, this list of conditions and the following
  16. * disclaimer.
  17. *
  18. * - Redistributions in binary form must reproduce the above
  19. * copyright notice, this list of conditions and the following
  20. * disclaimer in the documentation and/or other materials
  21. * provided with the distribution.
  22. *
  23. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26. * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27. * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28. * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29. * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30. * SOFTWARE.
  31. */
  32. #include <linux/interrupt.h>
  33. #include <linux/module.h>
  34. #include <linux/mlx5/driver.h>
  35. #include <linux/mlx5/cmd.h>
  36. #ifdef CONFIG_RFS_ACCEL
  37. #include <linux/cpu_rmap.h>
  38. #endif
  39. #include "mlx5_core.h"
  40. #include "fpga/core.h"
  41. #include "eswitch.h"
  42. #include "lib/clock.h"
  43. #include "diag/fw_tracer.h"
  44. enum {
  45. MLX5_EQE_SIZE = sizeof(struct mlx5_eqe),
  46. MLX5_EQE_OWNER_INIT_VAL = 0x1,
  47. };
  48. enum {
  49. MLX5_EQ_STATE_ARMED = 0x9,
  50. MLX5_EQ_STATE_FIRED = 0xa,
  51. MLX5_EQ_STATE_ALWAYS_ARMED = 0xb,
  52. };
  53. enum {
  54. MLX5_NUM_SPARE_EQE = 0x80,
  55. MLX5_NUM_ASYNC_EQE = 0x1000,
  56. MLX5_NUM_CMD_EQE = 32,
  57. MLX5_NUM_PF_DRAIN = 64,
  58. };
  59. enum {
  60. MLX5_EQ_DOORBEL_OFFSET = 0x40,
  61. };
  62. #define MLX5_ASYNC_EVENT_MASK ((1ull << MLX5_EVENT_TYPE_PATH_MIG) | \
  63. (1ull << MLX5_EVENT_TYPE_COMM_EST) | \
  64. (1ull << MLX5_EVENT_TYPE_SQ_DRAINED) | \
  65. (1ull << MLX5_EVENT_TYPE_CQ_ERROR) | \
  66. (1ull << MLX5_EVENT_TYPE_WQ_CATAS_ERROR) | \
  67. (1ull << MLX5_EVENT_TYPE_PATH_MIG_FAILED) | \
  68. (1ull << MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR) | \
  69. (1ull << MLX5_EVENT_TYPE_WQ_ACCESS_ERROR) | \
  70. (1ull << MLX5_EVENT_TYPE_PORT_CHANGE) | \
  71. (1ull << MLX5_EVENT_TYPE_SRQ_CATAS_ERROR) | \
  72. (1ull << MLX5_EVENT_TYPE_SRQ_LAST_WQE) | \
  73. (1ull << MLX5_EVENT_TYPE_SRQ_RQ_LIMIT))
  74. struct map_eq_in {
  75. u64 mask;
  76. u32 reserved;
  77. u32 unmap_eqn;
  78. };
  79. struct cre_des_eq {
  80. u8 reserved[15];
  81. u8 eqn;
  82. };
  83. static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn)
  84. {
  85. u32 out[MLX5_ST_SZ_DW(destroy_eq_out)] = {0};
  86. u32 in[MLX5_ST_SZ_DW(destroy_eq_in)] = {0};
  87. MLX5_SET(destroy_eq_in, in, opcode, MLX5_CMD_OP_DESTROY_EQ);
  88. MLX5_SET(destroy_eq_in, in, eq_number, eqn);
  89. return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
  90. }
  91. static struct mlx5_eqe *get_eqe(struct mlx5_eq *eq, u32 entry)
  92. {
  93. return mlx5_buf_offset(&eq->buf, entry * MLX5_EQE_SIZE);
  94. }
  95. static struct mlx5_eqe *next_eqe_sw(struct mlx5_eq *eq)
  96. {
  97. struct mlx5_eqe *eqe = get_eqe(eq, eq->cons_index & (eq->nent - 1));
  98. return ((eqe->owner & 1) ^ !!(eq->cons_index & eq->nent)) ? NULL : eqe;
  99. }
  100. static const char *eqe_type_str(u8 type)
  101. {
  102. switch (type) {
  103. case MLX5_EVENT_TYPE_COMP:
  104. return "MLX5_EVENT_TYPE_COMP";
  105. case MLX5_EVENT_TYPE_PATH_MIG:
  106. return "MLX5_EVENT_TYPE_PATH_MIG";
  107. case MLX5_EVENT_TYPE_COMM_EST:
  108. return "MLX5_EVENT_TYPE_COMM_EST";
  109. case MLX5_EVENT_TYPE_SQ_DRAINED:
  110. return "MLX5_EVENT_TYPE_SQ_DRAINED";
  111. case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
  112. return "MLX5_EVENT_TYPE_SRQ_LAST_WQE";
  113. case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
  114. return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT";
  115. case MLX5_EVENT_TYPE_CQ_ERROR:
  116. return "MLX5_EVENT_TYPE_CQ_ERROR";
  117. case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
  118. return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR";
  119. case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
  120. return "MLX5_EVENT_TYPE_PATH_MIG_FAILED";
  121. case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
  122. return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR";
  123. case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
  124. return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR";
  125. case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
  126. return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR";
  127. case MLX5_EVENT_TYPE_INTERNAL_ERROR:
  128. return "MLX5_EVENT_TYPE_INTERNAL_ERROR";
  129. case MLX5_EVENT_TYPE_PORT_CHANGE:
  130. return "MLX5_EVENT_TYPE_PORT_CHANGE";
  131. case MLX5_EVENT_TYPE_GPIO_EVENT:
  132. return "MLX5_EVENT_TYPE_GPIO_EVENT";
  133. case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
  134. return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT";
  135. case MLX5_EVENT_TYPE_TEMP_WARN_EVENT:
  136. return "MLX5_EVENT_TYPE_TEMP_WARN_EVENT";
  137. case MLX5_EVENT_TYPE_REMOTE_CONFIG:
  138. return "MLX5_EVENT_TYPE_REMOTE_CONFIG";
  139. case MLX5_EVENT_TYPE_DB_BF_CONGESTION:
  140. return "MLX5_EVENT_TYPE_DB_BF_CONGESTION";
  141. case MLX5_EVENT_TYPE_STALL_EVENT:
  142. return "MLX5_EVENT_TYPE_STALL_EVENT";
  143. case MLX5_EVENT_TYPE_CMD:
  144. return "MLX5_EVENT_TYPE_CMD";
  145. case MLX5_EVENT_TYPE_PAGE_REQUEST:
  146. return "MLX5_EVENT_TYPE_PAGE_REQUEST";
  147. case MLX5_EVENT_TYPE_PAGE_FAULT:
  148. return "MLX5_EVENT_TYPE_PAGE_FAULT";
  149. case MLX5_EVENT_TYPE_PPS_EVENT:
  150. return "MLX5_EVENT_TYPE_PPS_EVENT";
  151. case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE:
  152. return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE";
  153. case MLX5_EVENT_TYPE_FPGA_ERROR:
  154. return "MLX5_EVENT_TYPE_FPGA_ERROR";
  155. case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
  156. return "MLX5_EVENT_TYPE_FPGA_QP_ERROR";
  157. case MLX5_EVENT_TYPE_GENERAL_EVENT:
  158. return "MLX5_EVENT_TYPE_GENERAL_EVENT";
  159. case MLX5_EVENT_TYPE_DEVICE_TRACER:
  160. return "MLX5_EVENT_TYPE_DEVICE_TRACER";
  161. default:
  162. return "Unrecognized event";
  163. }
  164. }
  165. static enum mlx5_dev_event port_subtype_event(u8 subtype)
  166. {
  167. switch (subtype) {
  168. case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
  169. return MLX5_DEV_EVENT_PORT_DOWN;
  170. case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
  171. return MLX5_DEV_EVENT_PORT_UP;
  172. case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED:
  173. return MLX5_DEV_EVENT_PORT_INITIALIZED;
  174. case MLX5_PORT_CHANGE_SUBTYPE_LID:
  175. return MLX5_DEV_EVENT_LID_CHANGE;
  176. case MLX5_PORT_CHANGE_SUBTYPE_PKEY:
  177. return MLX5_DEV_EVENT_PKEY_CHANGE;
  178. case MLX5_PORT_CHANGE_SUBTYPE_GUID:
  179. return MLX5_DEV_EVENT_GUID_CHANGE;
  180. case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG:
  181. return MLX5_DEV_EVENT_CLIENT_REREG;
  182. }
  183. return -1;
  184. }
  185. static void eq_update_ci(struct mlx5_eq *eq, int arm)
  186. {
  187. __be32 __iomem *addr = eq->doorbell + (arm ? 0 : 2);
  188. u32 val = (eq->cons_index & 0xffffff) | (eq->eqn << 24);
  189. __raw_writel((__force u32)cpu_to_be32(val), addr);
  190. /* We still want ordering, just not swabbing, so add a barrier */
  191. mb();
  192. }
  193. #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  194. static void eqe_pf_action(struct work_struct *work)
  195. {
  196. struct mlx5_pagefault *pfault = container_of(work,
  197. struct mlx5_pagefault,
  198. work);
  199. struct mlx5_eq *eq = pfault->eq;
  200. mlx5_core_page_fault(eq->dev, pfault);
  201. mempool_free(pfault, eq->pf_ctx.pool);
  202. }
  203. static void eq_pf_process(struct mlx5_eq *eq)
  204. {
  205. struct mlx5_core_dev *dev = eq->dev;
  206. struct mlx5_eqe_page_fault *pf_eqe;
  207. struct mlx5_pagefault *pfault;
  208. struct mlx5_eqe *eqe;
  209. int set_ci = 0;
  210. while ((eqe = next_eqe_sw(eq))) {
  211. pfault = mempool_alloc(eq->pf_ctx.pool, GFP_ATOMIC);
  212. if (!pfault) {
  213. schedule_work(&eq->pf_ctx.work);
  214. break;
  215. }
  216. dma_rmb();
  217. pf_eqe = &eqe->data.page_fault;
  218. pfault->event_subtype = eqe->sub_type;
  219. pfault->bytes_committed = be32_to_cpu(pf_eqe->bytes_committed);
  220. mlx5_core_dbg(dev,
  221. "PAGE_FAULT: subtype: 0x%02x, bytes_committed: 0x%06x\n",
  222. eqe->sub_type, pfault->bytes_committed);
  223. switch (eqe->sub_type) {
  224. case MLX5_PFAULT_SUBTYPE_RDMA:
  225. /* RDMA based event */
  226. pfault->type =
  227. be32_to_cpu(pf_eqe->rdma.pftype_token) >> 24;
  228. pfault->token =
  229. be32_to_cpu(pf_eqe->rdma.pftype_token) &
  230. MLX5_24BIT_MASK;
  231. pfault->rdma.r_key =
  232. be32_to_cpu(pf_eqe->rdma.r_key);
  233. pfault->rdma.packet_size =
  234. be16_to_cpu(pf_eqe->rdma.packet_length);
  235. pfault->rdma.rdma_op_len =
  236. be32_to_cpu(pf_eqe->rdma.rdma_op_len);
  237. pfault->rdma.rdma_va =
  238. be64_to_cpu(pf_eqe->rdma.rdma_va);
  239. mlx5_core_dbg(dev,
  240. "PAGE_FAULT: type:0x%x, token: 0x%06x, r_key: 0x%08x\n",
  241. pfault->type, pfault->token,
  242. pfault->rdma.r_key);
  243. mlx5_core_dbg(dev,
  244. "PAGE_FAULT: rdma_op_len: 0x%08x, rdma_va: 0x%016llx\n",
  245. pfault->rdma.rdma_op_len,
  246. pfault->rdma.rdma_va);
  247. break;
  248. case MLX5_PFAULT_SUBTYPE_WQE:
  249. /* WQE based event */
  250. pfault->type =
  251. (be32_to_cpu(pf_eqe->wqe.pftype_wq) >> 24) & 0x7;
  252. pfault->token =
  253. be32_to_cpu(pf_eqe->wqe.token);
  254. pfault->wqe.wq_num =
  255. be32_to_cpu(pf_eqe->wqe.pftype_wq) &
  256. MLX5_24BIT_MASK;
  257. pfault->wqe.wqe_index =
  258. be16_to_cpu(pf_eqe->wqe.wqe_index);
  259. pfault->wqe.packet_size =
  260. be16_to_cpu(pf_eqe->wqe.packet_length);
  261. mlx5_core_dbg(dev,
  262. "PAGE_FAULT: type:0x%x, token: 0x%06x, wq_num: 0x%06x, wqe_index: 0x%04x\n",
  263. pfault->type, pfault->token,
  264. pfault->wqe.wq_num,
  265. pfault->wqe.wqe_index);
  266. break;
  267. default:
  268. mlx5_core_warn(dev,
  269. "Unsupported page fault event sub-type: 0x%02hhx\n",
  270. eqe->sub_type);
  271. /* Unsupported page faults should still be
  272. * resolved by the page fault handler
  273. */
  274. }
  275. pfault->eq = eq;
  276. INIT_WORK(&pfault->work, eqe_pf_action);
  277. queue_work(eq->pf_ctx.wq, &pfault->work);
  278. ++eq->cons_index;
  279. ++set_ci;
  280. if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) {
  281. eq_update_ci(eq, 0);
  282. set_ci = 0;
  283. }
  284. }
  285. eq_update_ci(eq, 1);
  286. }
  287. static irqreturn_t mlx5_eq_pf_int(int irq, void *eq_ptr)
  288. {
  289. struct mlx5_eq *eq = eq_ptr;
  290. unsigned long flags;
  291. if (spin_trylock_irqsave(&eq->pf_ctx.lock, flags)) {
  292. eq_pf_process(eq);
  293. spin_unlock_irqrestore(&eq->pf_ctx.lock, flags);
  294. } else {
  295. schedule_work(&eq->pf_ctx.work);
  296. }
  297. return IRQ_HANDLED;
  298. }
  299. /* mempool_refill() was proposed but unfortunately wasn't accepted
  300. * http://lkml.iu.edu/hypermail/linux/kernel/1512.1/05073.html
  301. * Chip workaround.
  302. */
  303. static void mempool_refill(mempool_t *pool)
  304. {
  305. while (pool->curr_nr < pool->min_nr)
  306. mempool_free(mempool_alloc(pool, GFP_KERNEL), pool);
  307. }
  308. static void eq_pf_action(struct work_struct *work)
  309. {
  310. struct mlx5_eq *eq = container_of(work, struct mlx5_eq, pf_ctx.work);
  311. mempool_refill(eq->pf_ctx.pool);
  312. spin_lock_irq(&eq->pf_ctx.lock);
  313. eq_pf_process(eq);
  314. spin_unlock_irq(&eq->pf_ctx.lock);
  315. }
  316. static int init_pf_ctx(struct mlx5_eq_pagefault *pf_ctx, const char *name)
  317. {
  318. spin_lock_init(&pf_ctx->lock);
  319. INIT_WORK(&pf_ctx->work, eq_pf_action);
  320. pf_ctx->wq = alloc_ordered_workqueue(name,
  321. WQ_MEM_RECLAIM);
  322. if (!pf_ctx->wq)
  323. return -ENOMEM;
  324. pf_ctx->pool = mempool_create_kmalloc_pool
  325. (MLX5_NUM_PF_DRAIN, sizeof(struct mlx5_pagefault));
  326. if (!pf_ctx->pool)
  327. goto err_wq;
  328. return 0;
  329. err_wq:
  330. destroy_workqueue(pf_ctx->wq);
  331. return -ENOMEM;
  332. }
  333. int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 token,
  334. u32 wq_num, u8 type, int error)
  335. {
  336. u32 out[MLX5_ST_SZ_DW(page_fault_resume_out)] = {0};
  337. u32 in[MLX5_ST_SZ_DW(page_fault_resume_in)] = {0};
  338. MLX5_SET(page_fault_resume_in, in, opcode,
  339. MLX5_CMD_OP_PAGE_FAULT_RESUME);
  340. MLX5_SET(page_fault_resume_in, in, error, !!error);
  341. MLX5_SET(page_fault_resume_in, in, page_fault_type, type);
  342. MLX5_SET(page_fault_resume_in, in, wq_number, wq_num);
  343. MLX5_SET(page_fault_resume_in, in, token, token);
  344. return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
  345. }
  346. EXPORT_SYMBOL_GPL(mlx5_core_page_fault_resume);
  347. #endif
  348. static void general_event_handler(struct mlx5_core_dev *dev,
  349. struct mlx5_eqe *eqe)
  350. {
  351. switch (eqe->sub_type) {
  352. case MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT:
  353. if (dev->event)
  354. dev->event(dev, MLX5_DEV_EVENT_DELAY_DROP_TIMEOUT, 0);
  355. break;
  356. default:
  357. mlx5_core_dbg(dev, "General event with unrecognized subtype: sub_type %d\n",
  358. eqe->sub_type);
  359. }
  360. }
  361. static void mlx5_temp_warning_event(struct mlx5_core_dev *dev,
  362. struct mlx5_eqe *eqe)
  363. {
  364. u64 value_lsb;
  365. u64 value_msb;
  366. value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb);
  367. value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb);
  368. mlx5_core_warn(dev,
  369. "High temperature on sensors with bit set %llx %llx",
  370. value_msb, value_lsb);
  371. }
  372. /* caller must eventually call mlx5_cq_put on the returned cq */
  373. static struct mlx5_core_cq *mlx5_eq_cq_get(struct mlx5_eq *eq, u32 cqn)
  374. {
  375. struct mlx5_cq_table *table = &eq->cq_table;
  376. struct mlx5_core_cq *cq = NULL;
  377. spin_lock(&table->lock);
  378. cq = radix_tree_lookup(&table->tree, cqn);
  379. if (likely(cq))
  380. mlx5_cq_hold(cq);
  381. spin_unlock(&table->lock);
  382. return cq;
  383. }
  384. static void mlx5_eq_cq_completion(struct mlx5_eq *eq, u32 cqn)
  385. {
  386. struct mlx5_core_cq *cq = mlx5_eq_cq_get(eq, cqn);
  387. if (unlikely(!cq)) {
  388. mlx5_core_warn(eq->dev, "Completion event for bogus CQ 0x%x\n", cqn);
  389. return;
  390. }
  391. ++cq->arm_sn;
  392. cq->comp(cq);
  393. mlx5_cq_put(cq);
  394. }
  395. static void mlx5_eq_cq_event(struct mlx5_eq *eq, u32 cqn, int event_type)
  396. {
  397. struct mlx5_core_cq *cq = mlx5_eq_cq_get(eq, cqn);
  398. if (unlikely(!cq)) {
  399. mlx5_core_warn(eq->dev, "Async event for bogus CQ 0x%x\n", cqn);
  400. return;
  401. }
  402. cq->event(cq, event_type);
  403. mlx5_cq_put(cq);
  404. }
  405. static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr)
  406. {
  407. struct mlx5_eq *eq = eq_ptr;
  408. struct mlx5_core_dev *dev = eq->dev;
  409. struct mlx5_eqe *eqe;
  410. int set_ci = 0;
  411. u32 cqn = -1;
  412. u32 rsn;
  413. u8 port;
  414. while ((eqe = next_eqe_sw(eq))) {
  415. /*
  416. * Make sure we read EQ entry contents after we've
  417. * checked the ownership bit.
  418. */
  419. dma_rmb();
  420. mlx5_core_dbg(eq->dev, "eqn %d, eqe type %s\n",
  421. eq->eqn, eqe_type_str(eqe->type));
  422. switch (eqe->type) {
  423. case MLX5_EVENT_TYPE_COMP:
  424. cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff;
  425. mlx5_eq_cq_completion(eq, cqn);
  426. break;
  427. case MLX5_EVENT_TYPE_DCT_DRAINED:
  428. rsn = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff;
  429. rsn |= (MLX5_RES_DCT << MLX5_USER_INDEX_LEN);
  430. mlx5_rsc_event(dev, rsn, eqe->type);
  431. break;
  432. case MLX5_EVENT_TYPE_PATH_MIG:
  433. case MLX5_EVENT_TYPE_COMM_EST:
  434. case MLX5_EVENT_TYPE_SQ_DRAINED:
  435. case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
  436. case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
  437. case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
  438. case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
  439. case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
  440. rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
  441. rsn |= (eqe->data.qp_srq.type << MLX5_USER_INDEX_LEN);
  442. mlx5_core_dbg(dev, "event %s(%d) arrived on resource 0x%x\n",
  443. eqe_type_str(eqe->type), eqe->type, rsn);
  444. mlx5_rsc_event(dev, rsn, eqe->type);
  445. break;
  446. case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
  447. case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
  448. rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
  449. mlx5_core_dbg(dev, "SRQ event %s(%d): srqn 0x%x\n",
  450. eqe_type_str(eqe->type), eqe->type, rsn);
  451. mlx5_srq_event(dev, rsn, eqe->type);
  452. break;
  453. case MLX5_EVENT_TYPE_CMD:
  454. mlx5_cmd_comp_handler(dev, be32_to_cpu(eqe->data.cmd.vector), false);
  455. break;
  456. case MLX5_EVENT_TYPE_PORT_CHANGE:
  457. port = (eqe->data.port.port >> 4) & 0xf;
  458. switch (eqe->sub_type) {
  459. case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
  460. case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
  461. case MLX5_PORT_CHANGE_SUBTYPE_LID:
  462. case MLX5_PORT_CHANGE_SUBTYPE_PKEY:
  463. case MLX5_PORT_CHANGE_SUBTYPE_GUID:
  464. case MLX5_PORT_CHANGE_SUBTYPE_CLIENT_REREG:
  465. case MLX5_PORT_CHANGE_SUBTYPE_INITIALIZED:
  466. if (dev->event)
  467. dev->event(dev, port_subtype_event(eqe->sub_type),
  468. (unsigned long)port);
  469. break;
  470. default:
  471. mlx5_core_warn(dev, "Port event with unrecognized subtype: port %d, sub_type %d\n",
  472. port, eqe->sub_type);
  473. }
  474. break;
  475. case MLX5_EVENT_TYPE_CQ_ERROR:
  476. cqn = be32_to_cpu(eqe->data.cq_err.cqn) & 0xffffff;
  477. mlx5_core_warn(dev, "CQ error on CQN 0x%x, syndrome 0x%x\n",
  478. cqn, eqe->data.cq_err.syndrome);
  479. mlx5_eq_cq_event(eq, cqn, eqe->type);
  480. break;
  481. case MLX5_EVENT_TYPE_PAGE_REQUEST:
  482. {
  483. u16 func_id = be16_to_cpu(eqe->data.req_pages.func_id);
  484. s32 npages = be32_to_cpu(eqe->data.req_pages.num_pages);
  485. mlx5_core_dbg(dev, "page request for func 0x%x, npages %d\n",
  486. func_id, npages);
  487. mlx5_core_req_pages_handler(dev, func_id, npages);
  488. }
  489. break;
  490. case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE:
  491. mlx5_eswitch_vport_event(dev->priv.eswitch, eqe);
  492. break;
  493. case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
  494. mlx5_port_module_event(dev, eqe);
  495. break;
  496. case MLX5_EVENT_TYPE_PPS_EVENT:
  497. mlx5_pps_event(dev, eqe);
  498. break;
  499. case MLX5_EVENT_TYPE_FPGA_ERROR:
  500. case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
  501. mlx5_fpga_event(dev, eqe->type, &eqe->data.raw);
  502. break;
  503. case MLX5_EVENT_TYPE_TEMP_WARN_EVENT:
  504. mlx5_temp_warning_event(dev, eqe);
  505. break;
  506. case MLX5_EVENT_TYPE_GENERAL_EVENT:
  507. general_event_handler(dev, eqe);
  508. break;
  509. case MLX5_EVENT_TYPE_DEVICE_TRACER:
  510. mlx5_fw_tracer_event(dev, eqe);
  511. break;
  512. default:
  513. mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n",
  514. eqe->type, eq->eqn);
  515. break;
  516. }
  517. ++eq->cons_index;
  518. ++set_ci;
  519. /* The HCA will think the queue has overflowed if we
  520. * don't tell it we've been processing events. We
  521. * create our EQs with MLX5_NUM_SPARE_EQE extra
  522. * entries, so we must update our consumer index at
  523. * least that often.
  524. */
  525. if (unlikely(set_ci >= MLX5_NUM_SPARE_EQE)) {
  526. eq_update_ci(eq, 0);
  527. set_ci = 0;
  528. }
  529. }
  530. eq_update_ci(eq, 1);
  531. if (cqn != -1)
  532. tasklet_schedule(&eq->tasklet_ctx.task);
  533. return IRQ_HANDLED;
  534. }
  535. /* Some architectures don't latch interrupts when they are disabled, so using
  536. * mlx5_eq_poll_irq_disabled could end up losing interrupts while trying to
  537. * avoid losing them. It is not recommended to use it, unless this is the last
  538. * resort.
  539. */
  540. u32 mlx5_eq_poll_irq_disabled(struct mlx5_eq *eq)
  541. {
  542. u32 count_eqe;
  543. disable_irq(eq->irqn);
  544. count_eqe = eq->cons_index;
  545. mlx5_eq_int(eq->irqn, eq);
  546. count_eqe = eq->cons_index - count_eqe;
  547. enable_irq(eq->irqn);
  548. return count_eqe;
  549. }
  550. static void init_eq_buf(struct mlx5_eq *eq)
  551. {
  552. struct mlx5_eqe *eqe;
  553. int i;
  554. for (i = 0; i < eq->nent; i++) {
  555. eqe = get_eqe(eq, i);
  556. eqe->owner = MLX5_EQE_OWNER_INIT_VAL;
  557. }
  558. }
  559. int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
  560. int nent, u64 mask, const char *name,
  561. enum mlx5_eq_type type)
  562. {
  563. struct mlx5_cq_table *cq_table = &eq->cq_table;
  564. u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0};
  565. struct mlx5_priv *priv = &dev->priv;
  566. irq_handler_t handler;
  567. __be64 *pas;
  568. void *eqc;
  569. int inlen;
  570. u32 *in;
  571. int err;
  572. /* Init CQ table */
  573. memset(cq_table, 0, sizeof(*cq_table));
  574. spin_lock_init(&cq_table->lock);
  575. INIT_RADIX_TREE(&cq_table->tree, GFP_ATOMIC);
  576. eq->type = type;
  577. eq->nent = roundup_pow_of_two(nent + MLX5_NUM_SPARE_EQE);
  578. eq->cons_index = 0;
  579. err = mlx5_buf_alloc(dev, eq->nent * MLX5_EQE_SIZE, &eq->buf);
  580. if (err)
  581. return err;
  582. #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  583. if (type == MLX5_EQ_TYPE_PF)
  584. handler = mlx5_eq_pf_int;
  585. else
  586. #endif
  587. handler = mlx5_eq_int;
  588. init_eq_buf(eq);
  589. inlen = MLX5_ST_SZ_BYTES(create_eq_in) +
  590. MLX5_FLD_SZ_BYTES(create_eq_in, pas[0]) * eq->buf.npages;
  591. in = kvzalloc(inlen, GFP_KERNEL);
  592. if (!in) {
  593. err = -ENOMEM;
  594. goto err_buf;
  595. }
  596. pas = (__be64 *)MLX5_ADDR_OF(create_eq_in, in, pas);
  597. mlx5_fill_page_array(&eq->buf, pas);
  598. MLX5_SET(create_eq_in, in, opcode, MLX5_CMD_OP_CREATE_EQ);
  599. MLX5_SET64(create_eq_in, in, event_bitmask, mask);
  600. eqc = MLX5_ADDR_OF(create_eq_in, in, eq_context_entry);
  601. MLX5_SET(eqc, eqc, log_eq_size, ilog2(eq->nent));
  602. MLX5_SET(eqc, eqc, uar_page, priv->uar->index);
  603. MLX5_SET(eqc, eqc, intr, vecidx);
  604. MLX5_SET(eqc, eqc, log_page_size,
  605. eq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
  606. err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
  607. if (err)
  608. goto err_in;
  609. snprintf(priv->irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s",
  610. name, pci_name(dev->pdev));
  611. eq->eqn = MLX5_GET(create_eq_out, out, eq_number);
  612. eq->irqn = pci_irq_vector(dev->pdev, vecidx);
  613. eq->dev = dev;
  614. eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET;
  615. err = request_irq(eq->irqn, handler, 0,
  616. priv->irq_info[vecidx].name, eq);
  617. if (err)
  618. goto err_eq;
  619. err = mlx5_debug_eq_add(dev, eq);
  620. if (err)
  621. goto err_irq;
  622. #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  623. if (type == MLX5_EQ_TYPE_PF) {
  624. err = init_pf_ctx(&eq->pf_ctx, name);
  625. if (err)
  626. goto err_irq;
  627. } else
  628. #endif
  629. {
  630. INIT_LIST_HEAD(&eq->tasklet_ctx.list);
  631. INIT_LIST_HEAD(&eq->tasklet_ctx.process_list);
  632. spin_lock_init(&eq->tasklet_ctx.lock);
  633. tasklet_init(&eq->tasklet_ctx.task, mlx5_cq_tasklet_cb,
  634. (unsigned long)&eq->tasklet_ctx);
  635. }
  636. /* EQs are created in ARMED state
  637. */
  638. eq_update_ci(eq, 1);
  639. kvfree(in);
  640. return 0;
  641. err_irq:
  642. free_irq(eq->irqn, eq);
  643. err_eq:
  644. mlx5_cmd_destroy_eq(dev, eq->eqn);
  645. err_in:
  646. kvfree(in);
  647. err_buf:
  648. mlx5_buf_free(dev, &eq->buf);
  649. return err;
  650. }
  651. int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
  652. {
  653. int err;
  654. mlx5_debug_eq_remove(dev, eq);
  655. free_irq(eq->irqn, eq);
  656. err = mlx5_cmd_destroy_eq(dev, eq->eqn);
  657. if (err)
  658. mlx5_core_warn(dev, "failed to destroy a previously created eq: eqn %d\n",
  659. eq->eqn);
  660. synchronize_irq(eq->irqn);
  661. if (eq->type == MLX5_EQ_TYPE_COMP) {
  662. tasklet_disable(&eq->tasklet_ctx.task);
  663. #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  664. } else if (eq->type == MLX5_EQ_TYPE_PF) {
  665. cancel_work_sync(&eq->pf_ctx.work);
  666. destroy_workqueue(eq->pf_ctx.wq);
  667. mempool_destroy(eq->pf_ctx.pool);
  668. #endif
  669. }
  670. mlx5_buf_free(dev, &eq->buf);
  671. return err;
  672. }
  673. int mlx5_eq_add_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq)
  674. {
  675. struct mlx5_cq_table *table = &eq->cq_table;
  676. int err;
  677. spin_lock_irq(&table->lock);
  678. err = radix_tree_insert(&table->tree, cq->cqn, cq);
  679. spin_unlock_irq(&table->lock);
  680. return err;
  681. }
  682. int mlx5_eq_del_cq(struct mlx5_eq *eq, struct mlx5_core_cq *cq)
  683. {
  684. struct mlx5_cq_table *table = &eq->cq_table;
  685. struct mlx5_core_cq *tmp;
  686. spin_lock_irq(&table->lock);
  687. tmp = radix_tree_delete(&table->tree, cq->cqn);
  688. spin_unlock_irq(&table->lock);
  689. if (!tmp) {
  690. mlx5_core_warn(eq->dev, "cq 0x%x not found in eq 0x%x tree\n", eq->eqn, cq->cqn);
  691. return -ENOENT;
  692. }
  693. if (tmp != cq) {
  694. mlx5_core_warn(eq->dev, "corruption on cqn 0x%x in eq 0x%x\n", eq->eqn, cq->cqn);
  695. return -EINVAL;
  696. }
  697. return 0;
  698. }
  699. int mlx5_eq_init(struct mlx5_core_dev *dev)
  700. {
  701. int err;
  702. spin_lock_init(&dev->priv.eq_table.lock);
  703. err = mlx5_eq_debugfs_init(dev);
  704. return err;
  705. }
  706. void mlx5_eq_cleanup(struct mlx5_core_dev *dev)
  707. {
  708. mlx5_eq_debugfs_cleanup(dev);
  709. }
  710. int mlx5_start_eqs(struct mlx5_core_dev *dev)
  711. {
  712. struct mlx5_eq_table *table = &dev->priv.eq_table;
  713. u64 async_event_mask = MLX5_ASYNC_EVENT_MASK;
  714. int err;
  715. if (MLX5_VPORT_MANAGER(dev))
  716. async_event_mask |= (1ull << MLX5_EVENT_TYPE_NIC_VPORT_CHANGE);
  717. if (MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_ETH &&
  718. MLX5_CAP_GEN(dev, general_notification_event))
  719. async_event_mask |= (1ull << MLX5_EVENT_TYPE_GENERAL_EVENT);
  720. if (MLX5_CAP_GEN(dev, port_module_event))
  721. async_event_mask |= (1ull << MLX5_EVENT_TYPE_PORT_MODULE_EVENT);
  722. else
  723. mlx5_core_dbg(dev, "port_module_event is not set\n");
  724. if (MLX5_PPS_CAP(dev))
  725. async_event_mask |= (1ull << MLX5_EVENT_TYPE_PPS_EVENT);
  726. if (MLX5_CAP_GEN(dev, fpga))
  727. async_event_mask |= (1ull << MLX5_EVENT_TYPE_FPGA_ERROR) |
  728. (1ull << MLX5_EVENT_TYPE_FPGA_QP_ERROR);
  729. if (MLX5_CAP_GEN_MAX(dev, dct))
  730. async_event_mask |= (1ull << MLX5_EVENT_TYPE_DCT_DRAINED);
  731. if (MLX5_CAP_GEN(dev, temp_warn_event))
  732. async_event_mask |= (1ull << MLX5_EVENT_TYPE_TEMP_WARN_EVENT);
  733. if (MLX5_CAP_MCAM_REG(dev, tracer_registers))
  734. async_event_mask |= (1ull << MLX5_EVENT_TYPE_DEVICE_TRACER);
  735. err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD,
  736. MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD,
  737. "mlx5_cmd_eq", MLX5_EQ_TYPE_ASYNC);
  738. if (err) {
  739. mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err);
  740. return err;
  741. }
  742. mlx5_cmd_use_events(dev);
  743. err = mlx5_create_map_eq(dev, &table->async_eq, MLX5_EQ_VEC_ASYNC,
  744. MLX5_NUM_ASYNC_EQE, async_event_mask,
  745. "mlx5_async_eq", MLX5_EQ_TYPE_ASYNC);
  746. if (err) {
  747. mlx5_core_warn(dev, "failed to create async EQ %d\n", err);
  748. goto err1;
  749. }
  750. err = mlx5_create_map_eq(dev, &table->pages_eq,
  751. MLX5_EQ_VEC_PAGES,
  752. /* TODO: sriov max_vf + */ 1,
  753. 1 << MLX5_EVENT_TYPE_PAGE_REQUEST, "mlx5_pages_eq",
  754. MLX5_EQ_TYPE_ASYNC);
  755. if (err) {
  756. mlx5_core_warn(dev, "failed to create pages EQ %d\n", err);
  757. goto err2;
  758. }
  759. #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  760. if (MLX5_CAP_GEN(dev, pg)) {
  761. err = mlx5_create_map_eq(dev, &table->pfault_eq,
  762. MLX5_EQ_VEC_PFAULT,
  763. MLX5_NUM_ASYNC_EQE,
  764. 1 << MLX5_EVENT_TYPE_PAGE_FAULT,
  765. "mlx5_page_fault_eq",
  766. MLX5_EQ_TYPE_PF);
  767. if (err) {
  768. mlx5_core_warn(dev, "failed to create page fault EQ %d\n",
  769. err);
  770. goto err3;
  771. }
  772. }
  773. return err;
  774. err3:
  775. mlx5_destroy_unmap_eq(dev, &table->pages_eq);
  776. #else
  777. return err;
  778. #endif
  779. err2:
  780. mlx5_destroy_unmap_eq(dev, &table->async_eq);
  781. err1:
  782. mlx5_cmd_use_polling(dev);
  783. mlx5_destroy_unmap_eq(dev, &table->cmd_eq);
  784. return err;
  785. }
  786. void mlx5_stop_eqs(struct mlx5_core_dev *dev)
  787. {
  788. struct mlx5_eq_table *table = &dev->priv.eq_table;
  789. int err;
  790. #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  791. if (MLX5_CAP_GEN(dev, pg)) {
  792. err = mlx5_destroy_unmap_eq(dev, &table->pfault_eq);
  793. if (err)
  794. mlx5_core_err(dev, "failed to destroy page fault eq, err(%d)\n",
  795. err);
  796. }
  797. #endif
  798. err = mlx5_destroy_unmap_eq(dev, &table->pages_eq);
  799. if (err)
  800. mlx5_core_err(dev, "failed to destroy pages eq, err(%d)\n",
  801. err);
  802. err = mlx5_destroy_unmap_eq(dev, &table->async_eq);
  803. if (err)
  804. mlx5_core_err(dev, "failed to destroy async eq, err(%d)\n",
  805. err);
  806. mlx5_cmd_use_polling(dev);
  807. err = mlx5_destroy_unmap_eq(dev, &table->cmd_eq);
  808. if (err)
  809. mlx5_core_err(dev, "failed to destroy command eq, err(%d)\n",
  810. err);
  811. }
  812. int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
  813. u32 *out, int outlen)
  814. {
  815. u32 in[MLX5_ST_SZ_DW(query_eq_in)] = {0};
  816. MLX5_SET(query_eq_in, in, opcode, MLX5_CMD_OP_QUERY_EQ);
  817. MLX5_SET(query_eq_in, in, eq_number, eq->eqn);
  818. return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
  819. }
  820. /* This function should only be called after mlx5_cmd_force_teardown_hca */
  821. void mlx5_core_eq_free_irqs(struct mlx5_core_dev *dev)
  822. {
  823. struct mlx5_eq_table *table = &dev->priv.eq_table;
  824. struct mlx5_eq *eq;
  825. #ifdef CONFIG_RFS_ACCEL
  826. if (dev->rmap) {
  827. free_irq_cpu_rmap(dev->rmap);
  828. dev->rmap = NULL;
  829. }
  830. #endif
  831. list_for_each_entry(eq, &table->comp_eqs_list, list)
  832. free_irq(eq->irqn, eq);
  833. free_irq(table->pages_eq.irqn, &table->pages_eq);
  834. free_irq(table->async_eq.irqn, &table->async_eq);
  835. free_irq(table->cmd_eq.irqn, &table->cmd_eq);
  836. #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
  837. if (MLX5_CAP_GEN(dev, pg))
  838. free_irq(table->pfault_eq.irqn, &table->pfault_eq);
  839. #endif
  840. pci_free_irq_vectors(dev->pdev);
  841. }