eventfd.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974
  1. /*
  2. * kvm eventfd support - use eventfd objects to signal various KVM events
  3. *
  4. * Copyright 2009 Novell. All Rights Reserved.
  5. * Copyright 2010 Red Hat, Inc. and/or its affiliates.
  6. *
  7. * Author:
  8. * Gregory Haskins <ghaskins@novell.com>
  9. *
  10. * This file is free software; you can redistribute it and/or modify
  11. * it under the terms of version 2 of the GNU General Public License
  12. * as published by the Free Software Foundation.
  13. *
  14. * This program is distributed in the hope that it will be useful,
  15. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  17. * GNU General Public License for more details.
  18. *
  19. * You should have received a copy of the GNU General Public License
  20. * along with this program; if not, write to the Free Software Foundation,
  21. * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
  22. */
  23. #include <linux/kvm_host.h>
  24. #include <linux/kvm.h>
  25. #include <linux/kvm_irqfd.h>
  26. #include <linux/workqueue.h>
  27. #include <linux/syscalls.h>
  28. #include <linux/wait.h>
  29. #include <linux/poll.h>
  30. #include <linux/file.h>
  31. #include <linux/list.h>
  32. #include <linux/eventfd.h>
  33. #include <linux/kernel.h>
  34. #include <linux/srcu.h>
  35. #include <linux/slab.h>
  36. #include <linux/seqlock.h>
  37. #include <linux/irqbypass.h>
  38. #include <trace/events/kvm.h>
  39. #include <kvm/iodev.h>
  40. #ifdef CONFIG_HAVE_KVM_IRQFD
  41. static struct workqueue_struct *irqfd_cleanup_wq;
  42. bool __attribute__((weak))
  43. kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args)
  44. {
  45. return true;
  46. }
  47. static void
  48. irqfd_inject(struct work_struct *work)
  49. {
  50. struct kvm_kernel_irqfd *irqfd =
  51. container_of(work, struct kvm_kernel_irqfd, inject);
  52. struct kvm *kvm = irqfd->kvm;
  53. if (!irqfd->resampler) {
  54. kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1,
  55. false);
  56. kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0,
  57. false);
  58. } else
  59. kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
  60. irqfd->gsi, 1, false);
  61. }
  62. /*
  63. * Since resampler irqfds share an IRQ source ID, we de-assert once
  64. * then notify all of the resampler irqfds using this GSI. We can't
  65. * do multiple de-asserts or we risk racing with incoming re-asserts.
  66. */
  67. static void
  68. irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian)
  69. {
  70. struct kvm_kernel_irqfd_resampler *resampler;
  71. struct kvm *kvm;
  72. struct kvm_kernel_irqfd *irqfd;
  73. int idx;
  74. resampler = container_of(kian,
  75. struct kvm_kernel_irqfd_resampler, notifier);
  76. kvm = resampler->kvm;
  77. kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
  78. resampler->notifier.gsi, 0, false);
  79. idx = srcu_read_lock(&kvm->irq_srcu);
  80. list_for_each_entry_rcu(irqfd, &resampler->list, resampler_link)
  81. eventfd_signal(irqfd->resamplefd, 1);
  82. srcu_read_unlock(&kvm->irq_srcu, idx);
  83. }
  84. static void
  85. irqfd_resampler_shutdown(struct kvm_kernel_irqfd *irqfd)
  86. {
  87. struct kvm_kernel_irqfd_resampler *resampler = irqfd->resampler;
  88. struct kvm *kvm = resampler->kvm;
  89. mutex_lock(&kvm->irqfds.resampler_lock);
  90. list_del_rcu(&irqfd->resampler_link);
  91. synchronize_srcu(&kvm->irq_srcu);
  92. if (list_empty(&resampler->list)) {
  93. list_del(&resampler->link);
  94. kvm_unregister_irq_ack_notifier(kvm, &resampler->notifier);
  95. kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
  96. resampler->notifier.gsi, 0, false);
  97. kfree(resampler);
  98. }
  99. mutex_unlock(&kvm->irqfds.resampler_lock);
  100. }
  101. /*
  102. * Race-free decouple logic (ordering is critical)
  103. */
  104. static void
  105. irqfd_shutdown(struct work_struct *work)
  106. {
  107. struct kvm_kernel_irqfd *irqfd =
  108. container_of(work, struct kvm_kernel_irqfd, shutdown);
  109. struct kvm *kvm = irqfd->kvm;
  110. u64 cnt;
  111. /* Make sure irqfd has been initalized in assign path. */
  112. synchronize_srcu(&kvm->irq_srcu);
  113. /*
  114. * Synchronize with the wait-queue and unhook ourselves to prevent
  115. * further events.
  116. */
  117. eventfd_ctx_remove_wait_queue(irqfd->eventfd, &irqfd->wait, &cnt);
  118. /*
  119. * We know no new events will be scheduled at this point, so block
  120. * until all previously outstanding events have completed
  121. */
  122. flush_work(&irqfd->inject);
  123. if (irqfd->resampler) {
  124. irqfd_resampler_shutdown(irqfd);
  125. eventfd_ctx_put(irqfd->resamplefd);
  126. }
  127. /*
  128. * It is now safe to release the object's resources
  129. */
  130. #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
  131. irq_bypass_unregister_consumer(&irqfd->consumer);
  132. #endif
  133. eventfd_ctx_put(irqfd->eventfd);
  134. kfree(irqfd);
  135. }
  136. /* assumes kvm->irqfds.lock is held */
  137. static bool
  138. irqfd_is_active(struct kvm_kernel_irqfd *irqfd)
  139. {
  140. return list_empty(&irqfd->list) ? false : true;
  141. }
  142. /*
  143. * Mark the irqfd as inactive and schedule it for removal
  144. *
  145. * assumes kvm->irqfds.lock is held
  146. */
  147. static void
  148. irqfd_deactivate(struct kvm_kernel_irqfd *irqfd)
  149. {
  150. BUG_ON(!irqfd_is_active(irqfd));
  151. list_del_init(&irqfd->list);
  152. queue_work(irqfd_cleanup_wq, &irqfd->shutdown);
  153. }
  154. int __attribute__((weak)) kvm_arch_set_irq_inatomic(
  155. struct kvm_kernel_irq_routing_entry *irq,
  156. struct kvm *kvm, int irq_source_id,
  157. int level,
  158. bool line_status)
  159. {
  160. return -EWOULDBLOCK;
  161. }
  162. /*
  163. * Called with wqh->lock held and interrupts disabled
  164. */
  165. static int
  166. irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
  167. {
  168. struct kvm_kernel_irqfd *irqfd =
  169. container_of(wait, struct kvm_kernel_irqfd, wait);
  170. __poll_t flags = key_to_poll(key);
  171. struct kvm_kernel_irq_routing_entry irq;
  172. struct kvm *kvm = irqfd->kvm;
  173. unsigned seq;
  174. int idx;
  175. if (flags & EPOLLIN) {
  176. idx = srcu_read_lock(&kvm->irq_srcu);
  177. do {
  178. seq = read_seqcount_begin(&irqfd->irq_entry_sc);
  179. irq = irqfd->irq_entry;
  180. } while (read_seqcount_retry(&irqfd->irq_entry_sc, seq));
  181. /* An event has been signaled, inject an interrupt */
  182. if (kvm_arch_set_irq_inatomic(&irq, kvm,
  183. KVM_USERSPACE_IRQ_SOURCE_ID, 1,
  184. false) == -EWOULDBLOCK)
  185. schedule_work(&irqfd->inject);
  186. srcu_read_unlock(&kvm->irq_srcu, idx);
  187. }
  188. if (flags & EPOLLHUP) {
  189. /* The eventfd is closing, detach from KVM */
  190. unsigned long flags;
  191. spin_lock_irqsave(&kvm->irqfds.lock, flags);
  192. /*
  193. * We must check if someone deactivated the irqfd before
  194. * we could acquire the irqfds.lock since the item is
  195. * deactivated from the KVM side before it is unhooked from
  196. * the wait-queue. If it is already deactivated, we can
  197. * simply return knowing the other side will cleanup for us.
  198. * We cannot race against the irqfd going away since the
  199. * other side is required to acquire wqh->lock, which we hold
  200. */
  201. if (irqfd_is_active(irqfd))
  202. irqfd_deactivate(irqfd);
  203. spin_unlock_irqrestore(&kvm->irqfds.lock, flags);
  204. }
  205. return 0;
  206. }
  207. static void
  208. irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh,
  209. poll_table *pt)
  210. {
  211. struct kvm_kernel_irqfd *irqfd =
  212. container_of(pt, struct kvm_kernel_irqfd, pt);
  213. add_wait_queue(wqh, &irqfd->wait);
  214. }
  215. /* Must be called under irqfds.lock */
  216. static void irqfd_update(struct kvm *kvm, struct kvm_kernel_irqfd *irqfd)
  217. {
  218. struct kvm_kernel_irq_routing_entry *e;
  219. struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
  220. int n_entries;
  221. n_entries = kvm_irq_map_gsi(kvm, entries, irqfd->gsi);
  222. write_seqcount_begin(&irqfd->irq_entry_sc);
  223. e = entries;
  224. if (n_entries == 1)
  225. irqfd->irq_entry = *e;
  226. else
  227. irqfd->irq_entry.type = 0;
  228. write_seqcount_end(&irqfd->irq_entry_sc);
  229. }
  230. #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
  231. void __attribute__((weak)) kvm_arch_irq_bypass_stop(
  232. struct irq_bypass_consumer *cons)
  233. {
  234. }
  235. void __attribute__((weak)) kvm_arch_irq_bypass_start(
  236. struct irq_bypass_consumer *cons)
  237. {
  238. }
  239. int __attribute__((weak)) kvm_arch_update_irqfd_routing(
  240. struct kvm *kvm, unsigned int host_irq,
  241. uint32_t guest_irq, bool set)
  242. {
  243. return 0;
  244. }
  245. #endif
  246. static int
  247. kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
  248. {
  249. struct kvm_kernel_irqfd *irqfd, *tmp;
  250. struct fd f;
  251. struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL;
  252. int ret;
  253. __poll_t events;
  254. int idx;
  255. if (!kvm_arch_intc_initialized(kvm))
  256. return -EAGAIN;
  257. if (!kvm_arch_irqfd_allowed(kvm, args))
  258. return -EINVAL;
  259. irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL);
  260. if (!irqfd)
  261. return -ENOMEM;
  262. irqfd->kvm = kvm;
  263. irqfd->gsi = args->gsi;
  264. INIT_LIST_HEAD(&irqfd->list);
  265. INIT_WORK(&irqfd->inject, irqfd_inject);
  266. INIT_WORK(&irqfd->shutdown, irqfd_shutdown);
  267. seqcount_init(&irqfd->irq_entry_sc);
  268. f = fdget(args->fd);
  269. if (!f.file) {
  270. ret = -EBADF;
  271. goto out;
  272. }
  273. eventfd = eventfd_ctx_fileget(f.file);
  274. if (IS_ERR(eventfd)) {
  275. ret = PTR_ERR(eventfd);
  276. goto fail;
  277. }
  278. irqfd->eventfd = eventfd;
  279. if (args->flags & KVM_IRQFD_FLAG_RESAMPLE) {
  280. struct kvm_kernel_irqfd_resampler *resampler;
  281. resamplefd = eventfd_ctx_fdget(args->resamplefd);
  282. if (IS_ERR(resamplefd)) {
  283. ret = PTR_ERR(resamplefd);
  284. goto fail;
  285. }
  286. irqfd->resamplefd = resamplefd;
  287. INIT_LIST_HEAD(&irqfd->resampler_link);
  288. mutex_lock(&kvm->irqfds.resampler_lock);
  289. list_for_each_entry(resampler,
  290. &kvm->irqfds.resampler_list, link) {
  291. if (resampler->notifier.gsi == irqfd->gsi) {
  292. irqfd->resampler = resampler;
  293. break;
  294. }
  295. }
  296. if (!irqfd->resampler) {
  297. resampler = kzalloc(sizeof(*resampler), GFP_KERNEL);
  298. if (!resampler) {
  299. ret = -ENOMEM;
  300. mutex_unlock(&kvm->irqfds.resampler_lock);
  301. goto fail;
  302. }
  303. resampler->kvm = kvm;
  304. INIT_LIST_HEAD(&resampler->list);
  305. resampler->notifier.gsi = irqfd->gsi;
  306. resampler->notifier.irq_acked = irqfd_resampler_ack;
  307. INIT_LIST_HEAD(&resampler->link);
  308. list_add(&resampler->link, &kvm->irqfds.resampler_list);
  309. kvm_register_irq_ack_notifier(kvm,
  310. &resampler->notifier);
  311. irqfd->resampler = resampler;
  312. }
  313. list_add_rcu(&irqfd->resampler_link, &irqfd->resampler->list);
  314. synchronize_srcu(&kvm->irq_srcu);
  315. mutex_unlock(&kvm->irqfds.resampler_lock);
  316. }
  317. /*
  318. * Install our own custom wake-up handling so we are notified via
  319. * a callback whenever someone signals the underlying eventfd
  320. */
  321. init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup);
  322. init_poll_funcptr(&irqfd->pt, irqfd_ptable_queue_proc);
  323. spin_lock_irq(&kvm->irqfds.lock);
  324. ret = 0;
  325. list_for_each_entry(tmp, &kvm->irqfds.items, list) {
  326. if (irqfd->eventfd != tmp->eventfd)
  327. continue;
  328. /* This fd is used for another irq already. */
  329. ret = -EBUSY;
  330. spin_unlock_irq(&kvm->irqfds.lock);
  331. goto fail;
  332. }
  333. idx = srcu_read_lock(&kvm->irq_srcu);
  334. irqfd_update(kvm, irqfd);
  335. list_add_tail(&irqfd->list, &kvm->irqfds.items);
  336. spin_unlock_irq(&kvm->irqfds.lock);
  337. /*
  338. * Check if there was an event already pending on the eventfd
  339. * before we registered, and trigger it as if we didn't miss it.
  340. */
  341. events = vfs_poll(f.file, &irqfd->pt);
  342. if (events & EPOLLIN)
  343. schedule_work(&irqfd->inject);
  344. #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
  345. if (kvm_arch_has_irq_bypass()) {
  346. irqfd->consumer.token = (void *)irqfd->eventfd;
  347. irqfd->consumer.add_producer = kvm_arch_irq_bypass_add_producer;
  348. irqfd->consumer.del_producer = kvm_arch_irq_bypass_del_producer;
  349. irqfd->consumer.stop = kvm_arch_irq_bypass_stop;
  350. irqfd->consumer.start = kvm_arch_irq_bypass_start;
  351. ret = irq_bypass_register_consumer(&irqfd->consumer);
  352. if (ret)
  353. pr_info("irq bypass consumer (token %p) registration fails: %d\n",
  354. irqfd->consumer.token, ret);
  355. }
  356. #endif
  357. srcu_read_unlock(&kvm->irq_srcu, idx);
  358. /*
  359. * do not drop the file until the irqfd is fully initialized, otherwise
  360. * we might race against the EPOLLHUP
  361. */
  362. fdput(f);
  363. return 0;
  364. fail:
  365. if (irqfd->resampler)
  366. irqfd_resampler_shutdown(irqfd);
  367. if (resamplefd && !IS_ERR(resamplefd))
  368. eventfd_ctx_put(resamplefd);
  369. if (eventfd && !IS_ERR(eventfd))
  370. eventfd_ctx_put(eventfd);
  371. fdput(f);
  372. out:
  373. kfree(irqfd);
  374. return ret;
  375. }
  376. bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
  377. {
  378. struct kvm_irq_ack_notifier *kian;
  379. int gsi, idx;
  380. idx = srcu_read_lock(&kvm->irq_srcu);
  381. gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
  382. if (gsi != -1)
  383. hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
  384. link)
  385. if (kian->gsi == gsi) {
  386. srcu_read_unlock(&kvm->irq_srcu, idx);
  387. return true;
  388. }
  389. srcu_read_unlock(&kvm->irq_srcu, idx);
  390. return false;
  391. }
  392. EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
  393. void kvm_notify_acked_gsi(struct kvm *kvm, int gsi)
  394. {
  395. struct kvm_irq_ack_notifier *kian;
  396. hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
  397. link)
  398. if (kian->gsi == gsi)
  399. kian->irq_acked(kian);
  400. }
  401. void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
  402. {
  403. int gsi, idx;
  404. trace_kvm_ack_irq(irqchip, pin);
  405. idx = srcu_read_lock(&kvm->irq_srcu);
  406. gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
  407. if (gsi != -1)
  408. kvm_notify_acked_gsi(kvm, gsi);
  409. srcu_read_unlock(&kvm->irq_srcu, idx);
  410. }
  411. void kvm_register_irq_ack_notifier(struct kvm *kvm,
  412. struct kvm_irq_ack_notifier *kian)
  413. {
  414. mutex_lock(&kvm->irq_lock);
  415. hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
  416. mutex_unlock(&kvm->irq_lock);
  417. kvm_arch_post_irq_ack_notifier_list_update(kvm);
  418. }
  419. void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
  420. struct kvm_irq_ack_notifier *kian)
  421. {
  422. mutex_lock(&kvm->irq_lock);
  423. hlist_del_init_rcu(&kian->link);
  424. mutex_unlock(&kvm->irq_lock);
  425. synchronize_srcu(&kvm->irq_srcu);
  426. kvm_arch_post_irq_ack_notifier_list_update(kvm);
  427. }
  428. #endif
  429. void
  430. kvm_eventfd_init(struct kvm *kvm)
  431. {
  432. #ifdef CONFIG_HAVE_KVM_IRQFD
  433. spin_lock_init(&kvm->irqfds.lock);
  434. INIT_LIST_HEAD(&kvm->irqfds.items);
  435. INIT_LIST_HEAD(&kvm->irqfds.resampler_list);
  436. mutex_init(&kvm->irqfds.resampler_lock);
  437. #endif
  438. INIT_LIST_HEAD(&kvm->ioeventfds);
  439. }
  440. #ifdef CONFIG_HAVE_KVM_IRQFD
  441. /*
  442. * shutdown any irqfd's that match fd+gsi
  443. */
  444. static int
  445. kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args)
  446. {
  447. struct kvm_kernel_irqfd *irqfd, *tmp;
  448. struct eventfd_ctx *eventfd;
  449. eventfd = eventfd_ctx_fdget(args->fd);
  450. if (IS_ERR(eventfd))
  451. return PTR_ERR(eventfd);
  452. spin_lock_irq(&kvm->irqfds.lock);
  453. list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) {
  454. if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi) {
  455. /*
  456. * This clearing of irq_entry.type is needed for when
  457. * another thread calls kvm_irq_routing_update before
  458. * we flush workqueue below (we synchronize with
  459. * kvm_irq_routing_update using irqfds.lock).
  460. */
  461. write_seqcount_begin(&irqfd->irq_entry_sc);
  462. irqfd->irq_entry.type = 0;
  463. write_seqcount_end(&irqfd->irq_entry_sc);
  464. irqfd_deactivate(irqfd);
  465. }
  466. }
  467. spin_unlock_irq(&kvm->irqfds.lock);
  468. eventfd_ctx_put(eventfd);
  469. /*
  470. * Block until we know all outstanding shutdown jobs have completed
  471. * so that we guarantee there will not be any more interrupts on this
  472. * gsi once this deassign function returns.
  473. */
  474. flush_workqueue(irqfd_cleanup_wq);
  475. return 0;
  476. }
  477. int
  478. kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
  479. {
  480. if (args->flags & ~(KVM_IRQFD_FLAG_DEASSIGN | KVM_IRQFD_FLAG_RESAMPLE))
  481. return -EINVAL;
  482. if (args->flags & KVM_IRQFD_FLAG_DEASSIGN)
  483. return kvm_irqfd_deassign(kvm, args);
  484. return kvm_irqfd_assign(kvm, args);
  485. }
  486. /*
  487. * This function is called as the kvm VM fd is being released. Shutdown all
  488. * irqfds that still remain open
  489. */
  490. void
  491. kvm_irqfd_release(struct kvm *kvm)
  492. {
  493. struct kvm_kernel_irqfd *irqfd, *tmp;
  494. spin_lock_irq(&kvm->irqfds.lock);
  495. list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list)
  496. irqfd_deactivate(irqfd);
  497. spin_unlock_irq(&kvm->irqfds.lock);
  498. /*
  499. * Block until we know all outstanding shutdown jobs have completed
  500. * since we do not take a kvm* reference.
  501. */
  502. flush_workqueue(irqfd_cleanup_wq);
  503. }
  504. /*
  505. * Take note of a change in irq routing.
  506. * Caller must invoke synchronize_srcu(&kvm->irq_srcu) afterwards.
  507. */
  508. void kvm_irq_routing_update(struct kvm *kvm)
  509. {
  510. struct kvm_kernel_irqfd *irqfd;
  511. spin_lock_irq(&kvm->irqfds.lock);
  512. list_for_each_entry(irqfd, &kvm->irqfds.items, list) {
  513. irqfd_update(kvm, irqfd);
  514. #ifdef CONFIG_HAVE_KVM_IRQ_BYPASS
  515. if (irqfd->producer) {
  516. int ret = kvm_arch_update_irqfd_routing(
  517. irqfd->kvm, irqfd->producer->irq,
  518. irqfd->gsi, 1);
  519. WARN_ON(ret);
  520. }
  521. #endif
  522. }
  523. spin_unlock_irq(&kvm->irqfds.lock);
  524. }
  525. /*
  526. * create a host-wide workqueue for issuing deferred shutdown requests
  527. * aggregated from all vm* instances. We need our own isolated
  528. * queue to ease flushing work items when a VM exits.
  529. */
  530. int kvm_irqfd_init(void)
  531. {
  532. irqfd_cleanup_wq = alloc_workqueue("kvm-irqfd-cleanup", 0, 0);
  533. if (!irqfd_cleanup_wq)
  534. return -ENOMEM;
  535. return 0;
  536. }
  537. void kvm_irqfd_exit(void)
  538. {
  539. destroy_workqueue(irqfd_cleanup_wq);
  540. }
  541. #endif
  542. /*
  543. * --------------------------------------------------------------------
  544. * ioeventfd: translate a PIO/MMIO memory write to an eventfd signal.
  545. *
  546. * userspace can register a PIO/MMIO address with an eventfd for receiving
  547. * notification when the memory has been touched.
  548. * --------------------------------------------------------------------
  549. */
  550. struct _ioeventfd {
  551. struct list_head list;
  552. u64 addr;
  553. int length;
  554. struct eventfd_ctx *eventfd;
  555. u64 datamatch;
  556. struct kvm_io_device dev;
  557. u8 bus_idx;
  558. bool wildcard;
  559. };
  560. static inline struct _ioeventfd *
  561. to_ioeventfd(struct kvm_io_device *dev)
  562. {
  563. return container_of(dev, struct _ioeventfd, dev);
  564. }
  565. static void
  566. ioeventfd_release(struct _ioeventfd *p)
  567. {
  568. eventfd_ctx_put(p->eventfd);
  569. list_del(&p->list);
  570. kfree(p);
  571. }
  572. static bool
  573. ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val)
  574. {
  575. u64 _val;
  576. if (addr != p->addr)
  577. /* address must be precise for a hit */
  578. return false;
  579. if (!p->length)
  580. /* length = 0 means only look at the address, so always a hit */
  581. return true;
  582. if (len != p->length)
  583. /* address-range must be precise for a hit */
  584. return false;
  585. if (p->wildcard)
  586. /* all else equal, wildcard is always a hit */
  587. return true;
  588. /* otherwise, we have to actually compare the data */
  589. BUG_ON(!IS_ALIGNED((unsigned long)val, len));
  590. switch (len) {
  591. case 1:
  592. _val = *(u8 *)val;
  593. break;
  594. case 2:
  595. _val = *(u16 *)val;
  596. break;
  597. case 4:
  598. _val = *(u32 *)val;
  599. break;
  600. case 8:
  601. _val = *(u64 *)val;
  602. break;
  603. default:
  604. return false;
  605. }
  606. return _val == p->datamatch ? true : false;
  607. }
  608. /* MMIO/PIO writes trigger an event if the addr/val match */
  609. static int
  610. ioeventfd_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this, gpa_t addr,
  611. int len, const void *val)
  612. {
  613. struct _ioeventfd *p = to_ioeventfd(this);
  614. if (!ioeventfd_in_range(p, addr, len, val))
  615. return -EOPNOTSUPP;
  616. eventfd_signal(p->eventfd, 1);
  617. return 0;
  618. }
  619. /*
  620. * This function is called as KVM is completely shutting down. We do not
  621. * need to worry about locking just nuke anything we have as quickly as possible
  622. */
  623. static void
  624. ioeventfd_destructor(struct kvm_io_device *this)
  625. {
  626. struct _ioeventfd *p = to_ioeventfd(this);
  627. ioeventfd_release(p);
  628. }
  629. static const struct kvm_io_device_ops ioeventfd_ops = {
  630. .write = ioeventfd_write,
  631. .destructor = ioeventfd_destructor,
  632. };
  633. /* assumes kvm->slots_lock held */
  634. static bool
  635. ioeventfd_check_collision(struct kvm *kvm, struct _ioeventfd *p)
  636. {
  637. struct _ioeventfd *_p;
  638. list_for_each_entry(_p, &kvm->ioeventfds, list)
  639. if (_p->bus_idx == p->bus_idx &&
  640. _p->addr == p->addr &&
  641. (!_p->length || !p->length ||
  642. (_p->length == p->length &&
  643. (_p->wildcard || p->wildcard ||
  644. _p->datamatch == p->datamatch))))
  645. return true;
  646. return false;
  647. }
  648. static enum kvm_bus ioeventfd_bus_from_flags(__u32 flags)
  649. {
  650. if (flags & KVM_IOEVENTFD_FLAG_PIO)
  651. return KVM_PIO_BUS;
  652. if (flags & KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY)
  653. return KVM_VIRTIO_CCW_NOTIFY_BUS;
  654. return KVM_MMIO_BUS;
  655. }
  656. static int kvm_assign_ioeventfd_idx(struct kvm *kvm,
  657. enum kvm_bus bus_idx,
  658. struct kvm_ioeventfd *args)
  659. {
  660. struct eventfd_ctx *eventfd;
  661. struct _ioeventfd *p;
  662. int ret;
  663. eventfd = eventfd_ctx_fdget(args->fd);
  664. if (IS_ERR(eventfd))
  665. return PTR_ERR(eventfd);
  666. p = kzalloc(sizeof(*p), GFP_KERNEL);
  667. if (!p) {
  668. ret = -ENOMEM;
  669. goto fail;
  670. }
  671. INIT_LIST_HEAD(&p->list);
  672. p->addr = args->addr;
  673. p->bus_idx = bus_idx;
  674. p->length = args->len;
  675. p->eventfd = eventfd;
  676. /* The datamatch feature is optional, otherwise this is a wildcard */
  677. if (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH)
  678. p->datamatch = args->datamatch;
  679. else
  680. p->wildcard = true;
  681. mutex_lock(&kvm->slots_lock);
  682. /* Verify that there isn't a match already */
  683. if (ioeventfd_check_collision(kvm, p)) {
  684. ret = -EEXIST;
  685. goto unlock_fail;
  686. }
  687. kvm_iodevice_init(&p->dev, &ioeventfd_ops);
  688. ret = kvm_io_bus_register_dev(kvm, bus_idx, p->addr, p->length,
  689. &p->dev);
  690. if (ret < 0)
  691. goto unlock_fail;
  692. kvm_get_bus(kvm, bus_idx)->ioeventfd_count++;
  693. list_add_tail(&p->list, &kvm->ioeventfds);
  694. mutex_unlock(&kvm->slots_lock);
  695. return 0;
  696. unlock_fail:
  697. mutex_unlock(&kvm->slots_lock);
  698. fail:
  699. kfree(p);
  700. eventfd_ctx_put(eventfd);
  701. return ret;
  702. }
  703. static int
  704. kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx,
  705. struct kvm_ioeventfd *args)
  706. {
  707. struct _ioeventfd *p, *tmp;
  708. struct eventfd_ctx *eventfd;
  709. struct kvm_io_bus *bus;
  710. int ret = -ENOENT;
  711. eventfd = eventfd_ctx_fdget(args->fd);
  712. if (IS_ERR(eventfd))
  713. return PTR_ERR(eventfd);
  714. mutex_lock(&kvm->slots_lock);
  715. list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) {
  716. bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH);
  717. if (p->bus_idx != bus_idx ||
  718. p->eventfd != eventfd ||
  719. p->addr != args->addr ||
  720. p->length != args->len ||
  721. p->wildcard != wildcard)
  722. continue;
  723. if (!p->wildcard && p->datamatch != args->datamatch)
  724. continue;
  725. kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
  726. bus = kvm_get_bus(kvm, bus_idx);
  727. if (bus)
  728. bus->ioeventfd_count--;
  729. ioeventfd_release(p);
  730. ret = 0;
  731. break;
  732. }
  733. mutex_unlock(&kvm->slots_lock);
  734. eventfd_ctx_put(eventfd);
  735. return ret;
  736. }
  737. static int kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
  738. {
  739. enum kvm_bus bus_idx = ioeventfd_bus_from_flags(args->flags);
  740. int ret = kvm_deassign_ioeventfd_idx(kvm, bus_idx, args);
  741. if (!args->len && bus_idx == KVM_MMIO_BUS)
  742. kvm_deassign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args);
  743. return ret;
  744. }
  745. static int
  746. kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
  747. {
  748. enum kvm_bus bus_idx;
  749. int ret;
  750. bus_idx = ioeventfd_bus_from_flags(args->flags);
  751. /* must be natural-word sized, or 0 to ignore length */
  752. switch (args->len) {
  753. case 0:
  754. case 1:
  755. case 2:
  756. case 4:
  757. case 8:
  758. break;
  759. default:
  760. return -EINVAL;
  761. }
  762. /* check for range overflow */
  763. if (args->addr + args->len < args->addr)
  764. return -EINVAL;
  765. /* check for extra flags that we don't understand */
  766. if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK)
  767. return -EINVAL;
  768. /* ioeventfd with no length can't be combined with DATAMATCH */
  769. if (!args->len && (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH))
  770. return -EINVAL;
  771. ret = kvm_assign_ioeventfd_idx(kvm, bus_idx, args);
  772. if (ret)
  773. goto fail;
  774. /* When length is ignored, MMIO is also put on a separate bus, for
  775. * faster lookups.
  776. */
  777. if (!args->len && bus_idx == KVM_MMIO_BUS) {
  778. ret = kvm_assign_ioeventfd_idx(kvm, KVM_FAST_MMIO_BUS, args);
  779. if (ret < 0)
  780. goto fast_fail;
  781. }
  782. return 0;
  783. fast_fail:
  784. kvm_deassign_ioeventfd_idx(kvm, bus_idx, args);
  785. fail:
  786. return ret;
  787. }
  788. int
  789. kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
  790. {
  791. if (args->flags & KVM_IOEVENTFD_FLAG_DEASSIGN)
  792. return kvm_deassign_ioeventfd(kvm, args);
  793. return kvm_assign_ioeventfd(kvm, args);
  794. }