fault.c 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385
  1. /*
  2. * Copyright(c) 2018 Intel Corporation.
  3. *
  4. * This file is provided under a dual BSD/GPLv2 license. When using or
  5. * redistributing this file, you may do so under either license.
  6. *
  7. * GPL LICENSE SUMMARY
  8. *
  9. * This program is free software; you can redistribute it and/or modify
  10. * it under the terms of version 2 of the GNU General Public License as
  11. * published by the Free Software Foundation.
  12. *
  13. * This program is distributed in the hope that it will be useful, but
  14. * WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16. * General Public License for more details.
  17. *
  18. * BSD LICENSE
  19. *
  20. * Redistribution and use in source and binary forms, with or without
  21. * modification, are permitted provided that the following conditions
  22. * are met:
  23. *
  24. * - Redistributions of source code must retain the above copyright
  25. * notice, this list of conditions and the following disclaimer.
  26. * - Redistributions in binary form must reproduce the above copyright
  27. * notice, this list of conditions and the following disclaimer in
  28. * the documentation and/or other materials provided with the
  29. * distribution.
  30. * - Neither the name of Intel Corporation nor the names of its
  31. * contributors may be used to endorse or promote products derived
  32. * from this software without specific prior written permission.
  33. *
  34. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  35. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  36. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  37. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  38. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  39. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  40. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  41. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  42. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  43. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  44. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  45. *
  46. */
  47. #include <linux/debugfs.h>
  48. #include <linux/seq_file.h>
  49. #include <linux/kernel.h>
  50. #include <linux/module.h>
  51. #include <linux/types.h>
  52. #include <linux/bitmap.h>
  53. #include "debugfs.h"
  54. #include "fault.h"
  55. #include "trace.h"
  56. #define HFI1_FAULT_DIR_TX BIT(0)
  57. #define HFI1_FAULT_DIR_RX BIT(1)
  58. #define HFI1_FAULT_DIR_TXRX (HFI1_FAULT_DIR_TX | HFI1_FAULT_DIR_RX)
  59. static void *_fault_stats_seq_start(struct seq_file *s, loff_t *pos)
  60. {
  61. struct hfi1_opcode_stats_perctx *opstats;
  62. if (*pos >= ARRAY_SIZE(opstats->stats))
  63. return NULL;
  64. return pos;
  65. }
  66. static void *_fault_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
  67. {
  68. struct hfi1_opcode_stats_perctx *opstats;
  69. ++*pos;
  70. if (*pos >= ARRAY_SIZE(opstats->stats))
  71. return NULL;
  72. return pos;
  73. }
  74. static void _fault_stats_seq_stop(struct seq_file *s, void *v)
  75. {
  76. }
  77. static int _fault_stats_seq_show(struct seq_file *s, void *v)
  78. {
  79. loff_t *spos = v;
  80. loff_t i = *spos, j;
  81. u64 n_packets = 0, n_bytes = 0;
  82. struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private;
  83. struct hfi1_devdata *dd = dd_from_dev(ibd);
  84. struct hfi1_ctxtdata *rcd;
  85. for (j = 0; j < dd->first_dyn_alloc_ctxt; j++) {
  86. rcd = hfi1_rcd_get_by_index(dd, j);
  87. if (rcd) {
  88. n_packets += rcd->opstats->stats[i].n_packets;
  89. n_bytes += rcd->opstats->stats[i].n_bytes;
  90. }
  91. hfi1_rcd_put(rcd);
  92. }
  93. for_each_possible_cpu(j) {
  94. struct hfi1_opcode_stats_perctx *sp =
  95. per_cpu_ptr(dd->tx_opstats, j);
  96. n_packets += sp->stats[i].n_packets;
  97. n_bytes += sp->stats[i].n_bytes;
  98. }
  99. if (!n_packets && !n_bytes)
  100. return SEQ_SKIP;
  101. if (!ibd->fault->n_rxfaults[i] && !ibd->fault->n_txfaults[i])
  102. return SEQ_SKIP;
  103. seq_printf(s, "%02llx %llu/%llu (faults rx:%llu faults: tx:%llu)\n", i,
  104. (unsigned long long)n_packets,
  105. (unsigned long long)n_bytes,
  106. (unsigned long long)ibd->fault->n_rxfaults[i],
  107. (unsigned long long)ibd->fault->n_txfaults[i]);
  108. return 0;
  109. }
  110. DEBUGFS_SEQ_FILE_OPS(fault_stats);
  111. DEBUGFS_SEQ_FILE_OPEN(fault_stats);
  112. DEBUGFS_FILE_OPS(fault_stats);
  113. static int fault_opcodes_open(struct inode *inode, struct file *file)
  114. {
  115. file->private_data = inode->i_private;
  116. return nonseekable_open(inode, file);
  117. }
  118. static ssize_t fault_opcodes_write(struct file *file, const char __user *buf,
  119. size_t len, loff_t *pos)
  120. {
  121. ssize_t ret = 0;
  122. /* 1280 = 256 opcodes * 4 chars/opcode + 255 commas + NULL */
  123. size_t copy, datalen = 1280;
  124. char *data, *token, *ptr, *end;
  125. struct fault *fault = file->private_data;
  126. data = kcalloc(datalen, sizeof(*data), GFP_KERNEL);
  127. if (!data)
  128. return -ENOMEM;
  129. copy = min(len, datalen - 1);
  130. if (copy_from_user(data, buf, copy)) {
  131. ret = -EFAULT;
  132. goto free_data;
  133. }
  134. ret = debugfs_file_get(file->f_path.dentry);
  135. if (unlikely(ret))
  136. goto free_data;
  137. ptr = data;
  138. token = ptr;
  139. for (ptr = data; *ptr; ptr = end + 1, token = ptr) {
  140. char *dash;
  141. unsigned long range_start, range_end, i;
  142. bool remove = false;
  143. unsigned long bound = 1U << BITS_PER_BYTE;
  144. end = strchr(ptr, ',');
  145. if (end)
  146. *end = '\0';
  147. if (token[0] == '-') {
  148. remove = true;
  149. token++;
  150. }
  151. dash = strchr(token, '-');
  152. if (dash)
  153. *dash = '\0';
  154. if (kstrtoul(token, 0, &range_start))
  155. break;
  156. if (dash) {
  157. token = dash + 1;
  158. if (kstrtoul(token, 0, &range_end))
  159. break;
  160. } else {
  161. range_end = range_start;
  162. }
  163. if (range_start == range_end && range_start == -1UL) {
  164. bitmap_zero(fault->opcodes, sizeof(fault->opcodes) *
  165. BITS_PER_BYTE);
  166. break;
  167. }
  168. /* Check the inputs */
  169. if (range_start >= bound || range_end >= bound)
  170. break;
  171. for (i = range_start; i <= range_end; i++) {
  172. if (remove)
  173. clear_bit(i, fault->opcodes);
  174. else
  175. set_bit(i, fault->opcodes);
  176. }
  177. if (!end)
  178. break;
  179. }
  180. ret = len;
  181. debugfs_file_put(file->f_path.dentry);
  182. free_data:
  183. kfree(data);
  184. return ret;
  185. }
  186. static ssize_t fault_opcodes_read(struct file *file, char __user *buf,
  187. size_t len, loff_t *pos)
  188. {
  189. ssize_t ret = 0;
  190. char *data;
  191. size_t datalen = 1280, size = 0; /* see fault_opcodes_write() */
  192. unsigned long bit = 0, zero = 0;
  193. struct fault *fault = file->private_data;
  194. size_t bitsize = sizeof(fault->opcodes) * BITS_PER_BYTE;
  195. data = kcalloc(datalen, sizeof(*data), GFP_KERNEL);
  196. if (!data)
  197. return -ENOMEM;
  198. ret = debugfs_file_get(file->f_path.dentry);
  199. if (unlikely(ret))
  200. goto free_data;
  201. bit = find_first_bit(fault->opcodes, bitsize);
  202. while (bit < bitsize) {
  203. zero = find_next_zero_bit(fault->opcodes, bitsize, bit);
  204. if (zero - 1 != bit)
  205. size += snprintf(data + size,
  206. datalen - size - 1,
  207. "0x%lx-0x%lx,", bit, zero - 1);
  208. else
  209. size += snprintf(data + size,
  210. datalen - size - 1, "0x%lx,",
  211. bit);
  212. bit = find_next_bit(fault->opcodes, bitsize, zero);
  213. }
  214. debugfs_file_put(file->f_path.dentry);
  215. data[size - 1] = '\n';
  216. data[size] = '\0';
  217. ret = simple_read_from_buffer(buf, len, pos, data, size);
  218. free_data:
  219. kfree(data);
  220. return ret;
  221. }
  222. static const struct file_operations __fault_opcodes_fops = {
  223. .owner = THIS_MODULE,
  224. .open = fault_opcodes_open,
  225. .read = fault_opcodes_read,
  226. .write = fault_opcodes_write,
  227. .llseek = no_llseek
  228. };
  229. void hfi1_fault_exit_debugfs(struct hfi1_ibdev *ibd)
  230. {
  231. if (ibd->fault)
  232. debugfs_remove_recursive(ibd->fault->dir);
  233. kfree(ibd->fault);
  234. ibd->fault = NULL;
  235. }
  236. int hfi1_fault_init_debugfs(struct hfi1_ibdev *ibd)
  237. {
  238. struct dentry *parent = ibd->hfi1_ibdev_dbg;
  239. ibd->fault = kzalloc(sizeof(*ibd->fault), GFP_KERNEL);
  240. if (!ibd->fault)
  241. return -ENOMEM;
  242. ibd->fault->attr.interval = 1;
  243. ibd->fault->attr.require_end = ULONG_MAX;
  244. ibd->fault->attr.stacktrace_depth = 32;
  245. ibd->fault->attr.dname = NULL;
  246. ibd->fault->attr.verbose = 0;
  247. ibd->fault->enable = false;
  248. ibd->fault->opcode = false;
  249. ibd->fault->fault_skip = 0;
  250. ibd->fault->skip = 0;
  251. ibd->fault->direction = HFI1_FAULT_DIR_TXRX;
  252. ibd->fault->suppress_err = false;
  253. bitmap_zero(ibd->fault->opcodes,
  254. sizeof(ibd->fault->opcodes) * BITS_PER_BYTE);
  255. ibd->fault->dir =
  256. fault_create_debugfs_attr("fault", parent,
  257. &ibd->fault->attr);
  258. if (IS_ERR(ibd->fault->dir)) {
  259. kfree(ibd->fault);
  260. ibd->fault = NULL;
  261. return -ENOENT;
  262. }
  263. DEBUGFS_SEQ_FILE_CREATE(fault_stats, ibd->fault->dir, ibd);
  264. if (!debugfs_create_bool("enable", 0600, ibd->fault->dir,
  265. &ibd->fault->enable))
  266. goto fail;
  267. if (!debugfs_create_bool("suppress_err", 0600,
  268. ibd->fault->dir,
  269. &ibd->fault->suppress_err))
  270. goto fail;
  271. if (!debugfs_create_bool("opcode_mode", 0600, ibd->fault->dir,
  272. &ibd->fault->opcode))
  273. goto fail;
  274. if (!debugfs_create_file("opcodes", 0600, ibd->fault->dir,
  275. ibd->fault, &__fault_opcodes_fops))
  276. goto fail;
  277. if (!debugfs_create_u64("skip_pkts", 0600,
  278. ibd->fault->dir,
  279. &ibd->fault->fault_skip))
  280. goto fail;
  281. if (!debugfs_create_u64("skip_usec", 0600,
  282. ibd->fault->dir,
  283. &ibd->fault->fault_skip_usec))
  284. goto fail;
  285. if (!debugfs_create_u8("direction", 0600, ibd->fault->dir,
  286. &ibd->fault->direction))
  287. goto fail;
  288. return 0;
  289. fail:
  290. hfi1_fault_exit_debugfs(ibd);
  291. return -ENOMEM;
  292. }
  293. bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd)
  294. {
  295. if (ibd->fault)
  296. return ibd->fault->suppress_err;
  297. return false;
  298. }
  299. static bool __hfi1_should_fault(struct hfi1_ibdev *ibd, u32 opcode,
  300. u8 direction)
  301. {
  302. bool ret = false;
  303. if (!ibd->fault || !ibd->fault->enable)
  304. return false;
  305. if (!(ibd->fault->direction & direction))
  306. return false;
  307. if (ibd->fault->opcode) {
  308. if (bitmap_empty(ibd->fault->opcodes,
  309. (sizeof(ibd->fault->opcodes) *
  310. BITS_PER_BYTE)))
  311. return false;
  312. if (!(test_bit(opcode, ibd->fault->opcodes)))
  313. return false;
  314. }
  315. if (ibd->fault->fault_skip_usec &&
  316. time_before(jiffies, ibd->fault->skip_usec))
  317. return false;
  318. if (ibd->fault->fault_skip && ibd->fault->skip) {
  319. ibd->fault->skip--;
  320. return false;
  321. }
  322. ret = should_fail(&ibd->fault->attr, 1);
  323. if (ret) {
  324. ibd->fault->skip = ibd->fault->fault_skip;
  325. ibd->fault->skip_usec = jiffies +
  326. usecs_to_jiffies(ibd->fault->fault_skip_usec);
  327. }
  328. return ret;
  329. }
  330. bool hfi1_dbg_should_fault_tx(struct rvt_qp *qp, u32 opcode)
  331. {
  332. struct hfi1_ibdev *ibd = to_idev(qp->ibqp.device);
  333. if (__hfi1_should_fault(ibd, opcode, HFI1_FAULT_DIR_TX)) {
  334. trace_hfi1_fault_opcode(qp, opcode);
  335. ibd->fault->n_txfaults[opcode]++;
  336. return true;
  337. }
  338. return false;
  339. }
  340. bool hfi1_dbg_should_fault_rx(struct hfi1_packet *packet)
  341. {
  342. struct hfi1_ibdev *ibd = &packet->rcd->dd->verbs_dev;
  343. if (__hfi1_should_fault(ibd, packet->opcode, HFI1_FAULT_DIR_RX)) {
  344. trace_hfi1_fault_packet(packet);
  345. ibd->fault->n_rxfaults[packet->opcode]++;
  346. return true;
  347. }
  348. return false;
  349. }