spurious.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright (C) 1992, 1998-2004 Linus Torvalds, Ingo Molnar
  4. *
  5. * This file contains spurious interrupt handling.
  6. */
  7. #include <linux/jiffies.h>
  8. #include <linux/irq.h>
  9. #include <linux/module.h>
  10. #include <linux/interrupt.h>
  11. #include <linux/moduleparam.h>
  12. #include <linux/timer.h>
  13. #include "internals.h"
  14. static int irqfixup __read_mostly;
  15. #define POLL_SPURIOUS_IRQ_INTERVAL (HZ/10)
  16. static void poll_spurious_irqs(struct timer_list *unused);
  17. static DEFINE_TIMER(poll_spurious_irq_timer, poll_spurious_irqs);
  18. static int irq_poll_cpu;
  19. static atomic_t irq_poll_active;
  20. /*
  21. * We wait here for a poller to finish.
  22. *
  23. * If the poll runs on this CPU, then we yell loudly and return
  24. * false. That will leave the interrupt line disabled in the worst
  25. * case, but it should never happen.
  26. *
  27. * We wait until the poller is done and then recheck disabled and
  28. * action (about to be disabled). Only if it's still active, we return
  29. * true and let the handler run.
  30. */
  31. bool irq_wait_for_poll(struct irq_desc *desc)
  32. {
  33. if (WARN_ONCE(irq_poll_cpu == smp_processor_id(),
  34. "irq poll in progress on cpu %d for irq %d\n",
  35. smp_processor_id(), desc->irq_data.irq))
  36. return false;
  37. #ifdef CONFIG_SMP
  38. do {
  39. raw_spin_unlock(&desc->lock);
  40. while (irqd_irq_inprogress(&desc->irq_data))
  41. cpu_relax();
  42. raw_spin_lock(&desc->lock);
  43. } while (irqd_irq_inprogress(&desc->irq_data));
  44. /* Might have been disabled in meantime */
  45. return !irqd_irq_disabled(&desc->irq_data) && desc->action;
  46. #else
  47. return false;
  48. #endif
  49. }
  50. /*
  51. * Recovery handler for misrouted interrupts.
  52. */
  53. static int try_one_irq(struct irq_desc *desc, bool force)
  54. {
  55. irqreturn_t ret = IRQ_NONE;
  56. struct irqaction *action;
  57. raw_spin_lock(&desc->lock);
  58. /*
  59. * PER_CPU, nested thread interrupts and interrupts explicitely
  60. * marked polled are excluded from polling.
  61. */
  62. if (irq_settings_is_per_cpu(desc) ||
  63. irq_settings_is_nested_thread(desc) ||
  64. irq_settings_is_polled(desc))
  65. goto out;
  66. /*
  67. * Do not poll disabled interrupts unless the spurious
  68. * disabled poller asks explicitely.
  69. */
  70. if (irqd_irq_disabled(&desc->irq_data) && !force)
  71. goto out;
  72. /*
  73. * All handlers must agree on IRQF_SHARED, so we test just the
  74. * first.
  75. */
  76. action = desc->action;
  77. if (!action || !(action->flags & IRQF_SHARED) ||
  78. (action->flags & __IRQF_TIMER))
  79. goto out;
  80. /* Already running on another processor */
  81. if (irqd_irq_inprogress(&desc->irq_data)) {
  82. /*
  83. * Already running: If it is shared get the other
  84. * CPU to go looking for our mystery interrupt too
  85. */
  86. desc->istate |= IRQS_PENDING;
  87. goto out;
  88. }
  89. /* Mark it poll in progress */
  90. desc->istate |= IRQS_POLL_INPROGRESS;
  91. do {
  92. if (handle_irq_event(desc) == IRQ_HANDLED)
  93. ret = IRQ_HANDLED;
  94. /* Make sure that there is still a valid action */
  95. action = desc->action;
  96. } while ((desc->istate & IRQS_PENDING) && action);
  97. desc->istate &= ~IRQS_POLL_INPROGRESS;
  98. out:
  99. raw_spin_unlock(&desc->lock);
  100. return ret == IRQ_HANDLED;
  101. }
  102. static int misrouted_irq(int irq)
  103. {
  104. struct irq_desc *desc;
  105. int i, ok = 0;
  106. if (atomic_inc_return(&irq_poll_active) != 1)
  107. goto out;
  108. irq_poll_cpu = smp_processor_id();
  109. for_each_irq_desc(i, desc) {
  110. if (!i)
  111. continue;
  112. if (i == irq) /* Already tried */
  113. continue;
  114. if (try_one_irq(desc, false))
  115. ok = 1;
  116. }
  117. out:
  118. atomic_dec(&irq_poll_active);
  119. /* So the caller can adjust the irq error counts */
  120. return ok;
  121. }
  122. static void poll_spurious_irqs(struct timer_list *unused)
  123. {
  124. struct irq_desc *desc;
  125. int i;
  126. if (atomic_inc_return(&irq_poll_active) != 1)
  127. goto out;
  128. irq_poll_cpu = smp_processor_id();
  129. for_each_irq_desc(i, desc) {
  130. unsigned int state;
  131. if (!i)
  132. continue;
  133. /* Racy but it doesn't matter */
  134. state = desc->istate;
  135. barrier();
  136. if (!(state & IRQS_SPURIOUS_DISABLED))
  137. continue;
  138. local_irq_disable();
  139. try_one_irq(desc, true);
  140. local_irq_enable();
  141. }
  142. out:
  143. atomic_dec(&irq_poll_active);
  144. mod_timer(&poll_spurious_irq_timer,
  145. jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
  146. }
  147. static inline int bad_action_ret(irqreturn_t action_ret)
  148. {
  149. unsigned int r = action_ret;
  150. if (likely(r <= (IRQ_HANDLED | IRQ_WAKE_THREAD)))
  151. return 0;
  152. return 1;
  153. }
  154. /*
  155. * If 99,900 of the previous 100,000 interrupts have not been handled
  156. * then assume that the IRQ is stuck in some manner. Drop a diagnostic
  157. * and try to turn the IRQ off.
  158. *
  159. * (The other 100-of-100,000 interrupts may have been a correctly
  160. * functioning device sharing an IRQ with the failing one)
  161. */
  162. static void __report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret)
  163. {
  164. unsigned int irq = irq_desc_get_irq(desc);
  165. struct irqaction *action;
  166. unsigned long flags;
  167. if (bad_action_ret(action_ret)) {
  168. printk(KERN_ERR "irq event %d: bogus return value %x\n",
  169. irq, action_ret);
  170. } else {
  171. printk(KERN_ERR "irq %d: nobody cared (try booting with "
  172. "the \"irqpoll\" option)\n", irq);
  173. }
  174. dump_stack();
  175. printk(KERN_ERR "handlers:\n");
  176. /*
  177. * We need to take desc->lock here. note_interrupt() is called
  178. * w/o desc->lock held, but IRQ_PROGRESS set. We might race
  179. * with something else removing an action. It's ok to take
  180. * desc->lock here. See synchronize_irq().
  181. */
  182. raw_spin_lock_irqsave(&desc->lock, flags);
  183. for_each_action_of_desc(desc, action) {
  184. printk(KERN_ERR "[<%p>] %pf", action->handler, action->handler);
  185. if (action->thread_fn)
  186. printk(KERN_CONT " threaded [<%p>] %pf",
  187. action->thread_fn, action->thread_fn);
  188. printk(KERN_CONT "\n");
  189. }
  190. raw_spin_unlock_irqrestore(&desc->lock, flags);
  191. }
  192. static void report_bad_irq(struct irq_desc *desc, irqreturn_t action_ret)
  193. {
  194. static int count = 100;
  195. if (count > 0) {
  196. count--;
  197. __report_bad_irq(desc, action_ret);
  198. }
  199. }
  200. static inline int
  201. try_misrouted_irq(unsigned int irq, struct irq_desc *desc,
  202. irqreturn_t action_ret)
  203. {
  204. struct irqaction *action;
  205. if (!irqfixup)
  206. return 0;
  207. /* We didn't actually handle the IRQ - see if it was misrouted? */
  208. if (action_ret == IRQ_NONE)
  209. return 1;
  210. /*
  211. * But for 'irqfixup == 2' we also do it for handled interrupts if
  212. * they are marked as IRQF_IRQPOLL (or for irq zero, which is the
  213. * traditional PC timer interrupt.. Legacy)
  214. */
  215. if (irqfixup < 2)
  216. return 0;
  217. if (!irq)
  218. return 1;
  219. /*
  220. * Since we don't get the descriptor lock, "action" can
  221. * change under us. We don't really care, but we don't
  222. * want to follow a NULL pointer. So tell the compiler to
  223. * just load it once by using a barrier.
  224. */
  225. action = desc->action;
  226. barrier();
  227. return action && (action->flags & IRQF_IRQPOLL);
  228. }
  229. #define SPURIOUS_DEFERRED 0x80000000
  230. void note_interrupt(struct irq_desc *desc, irqreturn_t action_ret)
  231. {
  232. unsigned int irq;
  233. if (desc->istate & IRQS_POLL_INPROGRESS ||
  234. irq_settings_is_polled(desc))
  235. return;
  236. if (bad_action_ret(action_ret)) {
  237. report_bad_irq(desc, action_ret);
  238. return;
  239. }
  240. /*
  241. * We cannot call note_interrupt from the threaded handler
  242. * because we need to look at the compound of all handlers
  243. * (primary and threaded). Aside of that in the threaded
  244. * shared case we have no serialization against an incoming
  245. * hardware interrupt while we are dealing with a threaded
  246. * result.
  247. *
  248. * So in case a thread is woken, we just note the fact and
  249. * defer the analysis to the next hardware interrupt.
  250. *
  251. * The threaded handlers store whether they sucessfully
  252. * handled an interrupt and we check whether that number
  253. * changed versus the last invocation.
  254. *
  255. * We could handle all interrupts with the delayed by one
  256. * mechanism, but for the non forced threaded case we'd just
  257. * add pointless overhead to the straight hardirq interrupts
  258. * for the sake of a few lines less code.
  259. */
  260. if (action_ret & IRQ_WAKE_THREAD) {
  261. /*
  262. * There is a thread woken. Check whether one of the
  263. * shared primary handlers returned IRQ_HANDLED. If
  264. * not we defer the spurious detection to the next
  265. * interrupt.
  266. */
  267. if (action_ret == IRQ_WAKE_THREAD) {
  268. int handled;
  269. /*
  270. * We use bit 31 of thread_handled_last to
  271. * denote the deferred spurious detection
  272. * active. No locking necessary as
  273. * thread_handled_last is only accessed here
  274. * and we have the guarantee that hard
  275. * interrupts are not reentrant.
  276. */
  277. if (!(desc->threads_handled_last & SPURIOUS_DEFERRED)) {
  278. desc->threads_handled_last |= SPURIOUS_DEFERRED;
  279. return;
  280. }
  281. /*
  282. * Check whether one of the threaded handlers
  283. * returned IRQ_HANDLED since the last
  284. * interrupt happened.
  285. *
  286. * For simplicity we just set bit 31, as it is
  287. * set in threads_handled_last as well. So we
  288. * avoid extra masking. And we really do not
  289. * care about the high bits of the handled
  290. * count. We just care about the count being
  291. * different than the one we saw before.
  292. */
  293. handled = atomic_read(&desc->threads_handled);
  294. handled |= SPURIOUS_DEFERRED;
  295. if (handled != desc->threads_handled_last) {
  296. action_ret = IRQ_HANDLED;
  297. /*
  298. * Note: We keep the SPURIOUS_DEFERRED
  299. * bit set. We are handling the
  300. * previous invocation right now.
  301. * Keep it for the current one, so the
  302. * next hardware interrupt will
  303. * account for it.
  304. */
  305. desc->threads_handled_last = handled;
  306. } else {
  307. /*
  308. * None of the threaded handlers felt
  309. * responsible for the last interrupt
  310. *
  311. * We keep the SPURIOUS_DEFERRED bit
  312. * set in threads_handled_last as we
  313. * need to account for the current
  314. * interrupt as well.
  315. */
  316. action_ret = IRQ_NONE;
  317. }
  318. } else {
  319. /*
  320. * One of the primary handlers returned
  321. * IRQ_HANDLED. So we don't care about the
  322. * threaded handlers on the same line. Clear
  323. * the deferred detection bit.
  324. *
  325. * In theory we could/should check whether the
  326. * deferred bit is set and take the result of
  327. * the previous run into account here as
  328. * well. But it's really not worth the
  329. * trouble. If every other interrupt is
  330. * handled we never trigger the spurious
  331. * detector. And if this is just the one out
  332. * of 100k unhandled ones which is handled
  333. * then we merily delay the spurious detection
  334. * by one hard interrupt. Not a real problem.
  335. */
  336. desc->threads_handled_last &= ~SPURIOUS_DEFERRED;
  337. }
  338. }
  339. if (unlikely(action_ret == IRQ_NONE)) {
  340. /*
  341. * If we are seeing only the odd spurious IRQ caused by
  342. * bus asynchronicity then don't eventually trigger an error,
  343. * otherwise the counter becomes a doomsday timer for otherwise
  344. * working systems
  345. */
  346. if (time_after(jiffies, desc->last_unhandled + HZ/10))
  347. desc->irqs_unhandled = 1;
  348. else
  349. desc->irqs_unhandled++;
  350. desc->last_unhandled = jiffies;
  351. }
  352. irq = irq_desc_get_irq(desc);
  353. if (unlikely(try_misrouted_irq(irq, desc, action_ret))) {
  354. int ok = misrouted_irq(irq);
  355. if (action_ret == IRQ_NONE)
  356. desc->irqs_unhandled -= ok;
  357. }
  358. desc->irq_count++;
  359. if (likely(desc->irq_count < 100000))
  360. return;
  361. desc->irq_count = 0;
  362. if (unlikely(desc->irqs_unhandled > 99900)) {
  363. /*
  364. * The interrupt is stuck
  365. */
  366. __report_bad_irq(desc, action_ret);
  367. /*
  368. * Now kill the IRQ
  369. */
  370. printk(KERN_EMERG "Disabling IRQ #%d\n", irq);
  371. desc->istate |= IRQS_SPURIOUS_DISABLED;
  372. desc->depth++;
  373. irq_disable(desc);
  374. mod_timer(&poll_spurious_irq_timer,
  375. jiffies + POLL_SPURIOUS_IRQ_INTERVAL);
  376. }
  377. desc->irqs_unhandled = 0;
  378. }
  379. bool noirqdebug __read_mostly;
  380. int noirqdebug_setup(char *str)
  381. {
  382. noirqdebug = 1;
  383. printk(KERN_INFO "IRQ lockup detection disabled\n");
  384. return 1;
  385. }
  386. __setup("noirqdebug", noirqdebug_setup);
  387. module_param(noirqdebug, bool, 0644);
  388. MODULE_PARM_DESC(noirqdebug, "Disable irq lockup detection when true");
  389. static int __init irqfixup_setup(char *str)
  390. {
  391. irqfixup = 1;
  392. printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n");
  393. printk(KERN_WARNING "This may impact system performance.\n");
  394. return 1;
  395. }
  396. __setup("irqfixup", irqfixup_setup);
  397. module_param(irqfixup, int, 0644);
  398. static int __init irqpoll_setup(char *str)
  399. {
  400. irqfixup = 2;
  401. printk(KERN_WARNING "Misrouted IRQ fixup and polling support "
  402. "enabled\n");
  403. printk(KERN_WARNING "This may significantly impact system "
  404. "performance\n");
  405. return 1;
  406. }
  407. __setup("irqpoll", irqpoll_setup);