xenbus_comms.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493
  1. /******************************************************************************
  2. * xenbus_comms.c
  3. *
  4. * Low level code to talks to Xen Store: ringbuffer and event channel.
  5. *
  6. * Copyright (C) 2005 Rusty Russell, IBM Corporation
  7. *
  8. * This program is free software; you can redistribute it and/or
  9. * modify it under the terms of the GNU General Public License version 2
  10. * as published by the Free Software Foundation; or, when distributed
  11. * separately from the Linux kernel or incorporated into other
  12. * software packages, subject to the following license:
  13. *
  14. * Permission is hereby granted, free of charge, to any person obtaining a copy
  15. * of this source file (the "Software"), to deal in the Software without
  16. * restriction, including without limitation the rights to use, copy, modify,
  17. * merge, publish, distribute, sublicense, and/or sell copies of the Software,
  18. * and to permit persons to whom the Software is furnished to do so, subject to
  19. * the following conditions:
  20. *
  21. * The above copyright notice and this permission notice shall be included in
  22. * all copies or substantial portions of the Software.
  23. *
  24. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  25. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  26. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  27. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  28. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  29. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  30. * IN THE SOFTWARE.
  31. */
  32. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  33. #include <linux/wait.h>
  34. #include <linux/interrupt.h>
  35. #include <linux/kthread.h>
  36. #include <linux/sched.h>
  37. #include <linux/err.h>
  38. #include <xen/xenbus.h>
  39. #include <asm/xen/hypervisor.h>
  40. #include <xen/events.h>
  41. #include <xen/page.h>
  42. #include "xenbus.h"
  43. /* A list of replies. Currently only one will ever be outstanding. */
  44. LIST_HEAD(xs_reply_list);
  45. /* A list of write requests. */
  46. LIST_HEAD(xb_write_list);
  47. DECLARE_WAIT_QUEUE_HEAD(xb_waitq);
  48. DEFINE_MUTEX(xb_write_mutex);
  49. /* Protect xenbus reader thread against save/restore. */
  50. DEFINE_MUTEX(xs_response_mutex);
  51. static int xenbus_irq;
  52. static struct task_struct *xenbus_task;
  53. static DECLARE_WORK(probe_work, xenbus_probe);
  54. static irqreturn_t wake_waiting(int irq, void *unused)
  55. {
  56. if (unlikely(xenstored_ready == 0)) {
  57. xenstored_ready = 1;
  58. schedule_work(&probe_work);
  59. }
  60. wake_up(&xb_waitq);
  61. return IRQ_HANDLED;
  62. }
  63. static int check_indexes(XENSTORE_RING_IDX cons, XENSTORE_RING_IDX prod)
  64. {
  65. return ((prod - cons) <= XENSTORE_RING_SIZE);
  66. }
  67. static void *get_output_chunk(XENSTORE_RING_IDX cons,
  68. XENSTORE_RING_IDX prod,
  69. char *buf, uint32_t *len)
  70. {
  71. *len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(prod);
  72. if ((XENSTORE_RING_SIZE - (prod - cons)) < *len)
  73. *len = XENSTORE_RING_SIZE - (prod - cons);
  74. return buf + MASK_XENSTORE_IDX(prod);
  75. }
  76. static const void *get_input_chunk(XENSTORE_RING_IDX cons,
  77. XENSTORE_RING_IDX prod,
  78. const char *buf, uint32_t *len)
  79. {
  80. *len = XENSTORE_RING_SIZE - MASK_XENSTORE_IDX(cons);
  81. if ((prod - cons) < *len)
  82. *len = prod - cons;
  83. return buf + MASK_XENSTORE_IDX(cons);
  84. }
  85. static int xb_data_to_write(void)
  86. {
  87. struct xenstore_domain_interface *intf = xen_store_interface;
  88. return (intf->req_prod - intf->req_cons) != XENSTORE_RING_SIZE &&
  89. !list_empty(&xb_write_list);
  90. }
  91. /**
  92. * xb_write - low level write
  93. * @data: buffer to send
  94. * @len: length of buffer
  95. *
  96. * Returns number of bytes written or -err.
  97. */
  98. static int xb_write(const void *data, unsigned int len)
  99. {
  100. struct xenstore_domain_interface *intf = xen_store_interface;
  101. XENSTORE_RING_IDX cons, prod;
  102. unsigned int bytes = 0;
  103. while (len != 0) {
  104. void *dst;
  105. unsigned int avail;
  106. /* Read indexes, then verify. */
  107. cons = intf->req_cons;
  108. prod = intf->req_prod;
  109. if (!check_indexes(cons, prod)) {
  110. intf->req_cons = intf->req_prod = 0;
  111. return -EIO;
  112. }
  113. if (!xb_data_to_write())
  114. return bytes;
  115. /* Must write data /after/ reading the consumer index. */
  116. virt_mb();
  117. dst = get_output_chunk(cons, prod, intf->req, &avail);
  118. if (avail == 0)
  119. continue;
  120. if (avail > len)
  121. avail = len;
  122. memcpy(dst, data, avail);
  123. data += avail;
  124. len -= avail;
  125. bytes += avail;
  126. /* Other side must not see new producer until data is there. */
  127. virt_wmb();
  128. intf->req_prod += avail;
  129. /* Implies mb(): other side will see the updated producer. */
  130. if (prod <= intf->req_cons)
  131. notify_remote_via_evtchn(xen_store_evtchn);
  132. }
  133. return bytes;
  134. }
  135. static int xb_data_to_read(void)
  136. {
  137. struct xenstore_domain_interface *intf = xen_store_interface;
  138. return (intf->rsp_cons != intf->rsp_prod);
  139. }
  140. static int xb_read(void *data, unsigned int len)
  141. {
  142. struct xenstore_domain_interface *intf = xen_store_interface;
  143. XENSTORE_RING_IDX cons, prod;
  144. unsigned int bytes = 0;
  145. while (len != 0) {
  146. unsigned int avail;
  147. const char *src;
  148. /* Read indexes, then verify. */
  149. cons = intf->rsp_cons;
  150. prod = intf->rsp_prod;
  151. if (cons == prod)
  152. return bytes;
  153. if (!check_indexes(cons, prod)) {
  154. intf->rsp_cons = intf->rsp_prod = 0;
  155. return -EIO;
  156. }
  157. src = get_input_chunk(cons, prod, intf->rsp, &avail);
  158. if (avail == 0)
  159. continue;
  160. if (avail > len)
  161. avail = len;
  162. /* Must read data /after/ reading the producer index. */
  163. virt_rmb();
  164. memcpy(data, src, avail);
  165. data += avail;
  166. len -= avail;
  167. bytes += avail;
  168. /* Other side must not see free space until we've copied out */
  169. virt_mb();
  170. intf->rsp_cons += avail;
  171. /* Implies mb(): other side will see the updated consumer. */
  172. if (intf->rsp_prod - cons >= XENSTORE_RING_SIZE)
  173. notify_remote_via_evtchn(xen_store_evtchn);
  174. }
  175. return bytes;
  176. }
  177. static int process_msg(void)
  178. {
  179. static struct {
  180. struct xsd_sockmsg msg;
  181. char *body;
  182. union {
  183. void *alloc;
  184. struct xs_watch_event *watch;
  185. };
  186. bool in_msg;
  187. bool in_hdr;
  188. unsigned int read;
  189. } state;
  190. struct xb_req_data *req;
  191. int err;
  192. unsigned int len;
  193. if (!state.in_msg) {
  194. state.in_msg = true;
  195. state.in_hdr = true;
  196. state.read = 0;
  197. /*
  198. * We must disallow save/restore while reading a message.
  199. * A partial read across s/r leaves us out of sync with
  200. * xenstored.
  201. * xs_response_mutex is locked as long as we are processing one
  202. * message. state.in_msg will be true as long as we are holding
  203. * the lock here.
  204. */
  205. mutex_lock(&xs_response_mutex);
  206. if (!xb_data_to_read()) {
  207. /* We raced with save/restore: pending data 'gone'. */
  208. mutex_unlock(&xs_response_mutex);
  209. state.in_msg = false;
  210. return 0;
  211. }
  212. }
  213. if (state.in_hdr) {
  214. if (state.read != sizeof(state.msg)) {
  215. err = xb_read((void *)&state.msg + state.read,
  216. sizeof(state.msg) - state.read);
  217. if (err < 0)
  218. goto out;
  219. state.read += err;
  220. if (state.read != sizeof(state.msg))
  221. return 0;
  222. if (state.msg.len > XENSTORE_PAYLOAD_MAX) {
  223. err = -EINVAL;
  224. goto out;
  225. }
  226. }
  227. len = state.msg.len + 1;
  228. if (state.msg.type == XS_WATCH_EVENT)
  229. len += sizeof(*state.watch);
  230. state.alloc = kmalloc(len, GFP_NOIO | __GFP_HIGH);
  231. if (!state.alloc)
  232. return -ENOMEM;
  233. if (state.msg.type == XS_WATCH_EVENT)
  234. state.body = state.watch->body;
  235. else
  236. state.body = state.alloc;
  237. state.in_hdr = false;
  238. state.read = 0;
  239. }
  240. err = xb_read(state.body + state.read, state.msg.len - state.read);
  241. if (err < 0)
  242. goto out;
  243. state.read += err;
  244. if (state.read != state.msg.len)
  245. return 0;
  246. state.body[state.msg.len] = '\0';
  247. if (state.msg.type == XS_WATCH_EVENT) {
  248. state.watch->len = state.msg.len;
  249. err = xs_watch_msg(state.watch);
  250. } else {
  251. err = -ENOENT;
  252. mutex_lock(&xb_write_mutex);
  253. list_for_each_entry(req, &xs_reply_list, list) {
  254. if (req->msg.req_id == state.msg.req_id) {
  255. list_del(&req->list);
  256. err = 0;
  257. break;
  258. }
  259. }
  260. mutex_unlock(&xb_write_mutex);
  261. if (err)
  262. goto out;
  263. if (req->state == xb_req_state_wait_reply) {
  264. req->msg.req_id = req->caller_req_id;
  265. req->msg.type = state.msg.type;
  266. req->msg.len = state.msg.len;
  267. req->body = state.body;
  268. /* write body, then update state */
  269. virt_wmb();
  270. req->state = xb_req_state_got_reply;
  271. req->cb(req);
  272. } else
  273. kfree(req);
  274. }
  275. mutex_unlock(&xs_response_mutex);
  276. state.in_msg = false;
  277. state.alloc = NULL;
  278. return err;
  279. out:
  280. mutex_unlock(&xs_response_mutex);
  281. state.in_msg = false;
  282. kfree(state.alloc);
  283. state.alloc = NULL;
  284. return err;
  285. }
  286. static int process_writes(void)
  287. {
  288. static struct {
  289. struct xb_req_data *req;
  290. int idx;
  291. unsigned int written;
  292. } state;
  293. void *base;
  294. unsigned int len;
  295. int err = 0;
  296. if (!xb_data_to_write())
  297. return 0;
  298. mutex_lock(&xb_write_mutex);
  299. if (!state.req) {
  300. state.req = list_first_entry(&xb_write_list,
  301. struct xb_req_data, list);
  302. state.idx = -1;
  303. state.written = 0;
  304. }
  305. if (state.req->state == xb_req_state_aborted)
  306. goto out_err;
  307. while (state.idx < state.req->num_vecs) {
  308. if (state.idx < 0) {
  309. base = &state.req->msg;
  310. len = sizeof(state.req->msg);
  311. } else {
  312. base = state.req->vec[state.idx].iov_base;
  313. len = state.req->vec[state.idx].iov_len;
  314. }
  315. err = xb_write(base + state.written, len - state.written);
  316. if (err < 0)
  317. goto out_err;
  318. state.written += err;
  319. if (state.written != len)
  320. goto out;
  321. state.idx++;
  322. state.written = 0;
  323. }
  324. list_del(&state.req->list);
  325. state.req->state = xb_req_state_wait_reply;
  326. list_add_tail(&state.req->list, &xs_reply_list);
  327. state.req = NULL;
  328. out:
  329. mutex_unlock(&xb_write_mutex);
  330. return 0;
  331. out_err:
  332. state.req->msg.type = XS_ERROR;
  333. state.req->err = err;
  334. list_del(&state.req->list);
  335. if (state.req->state == xb_req_state_aborted)
  336. kfree(state.req);
  337. else {
  338. /* write err, then update state */
  339. virt_wmb();
  340. state.req->state = xb_req_state_got_reply;
  341. wake_up(&state.req->wq);
  342. }
  343. mutex_unlock(&xb_write_mutex);
  344. state.req = NULL;
  345. return err;
  346. }
  347. static int xb_thread_work(void)
  348. {
  349. return xb_data_to_read() || xb_data_to_write();
  350. }
  351. static int xenbus_thread(void *unused)
  352. {
  353. int err;
  354. while (!kthread_should_stop()) {
  355. if (wait_event_interruptible(xb_waitq, xb_thread_work()))
  356. continue;
  357. err = process_msg();
  358. if (err == -ENOMEM)
  359. schedule();
  360. else if (err)
  361. pr_warn_ratelimited("error %d while reading message\n",
  362. err);
  363. err = process_writes();
  364. if (err)
  365. pr_warn_ratelimited("error %d while writing message\n",
  366. err);
  367. }
  368. xenbus_task = NULL;
  369. return 0;
  370. }
  371. /**
  372. * xb_init_comms - Set up interrupt handler off store event channel.
  373. */
  374. int xb_init_comms(void)
  375. {
  376. struct xenstore_domain_interface *intf = xen_store_interface;
  377. if (intf->req_prod != intf->req_cons)
  378. pr_err("request ring is not quiescent (%08x:%08x)!\n",
  379. intf->req_cons, intf->req_prod);
  380. if (intf->rsp_prod != intf->rsp_cons) {
  381. pr_warn("response ring is not quiescent (%08x:%08x): fixing up\n",
  382. intf->rsp_cons, intf->rsp_prod);
  383. /* breaks kdump */
  384. if (!reset_devices)
  385. intf->rsp_cons = intf->rsp_prod;
  386. }
  387. if (xenbus_irq) {
  388. /* Already have an irq; assume we're resuming */
  389. rebind_evtchn_irq(xen_store_evtchn, xenbus_irq);
  390. } else {
  391. int err;
  392. err = bind_evtchn_to_irqhandler(xen_store_evtchn, wake_waiting,
  393. 0, "xenbus", &xb_waitq);
  394. if (err < 0) {
  395. pr_err("request irq failed %i\n", err);
  396. return err;
  397. }
  398. xenbus_irq = err;
  399. if (!xenbus_task) {
  400. xenbus_task = kthread_run(xenbus_thread, NULL,
  401. "xenbus");
  402. if (IS_ERR(xenbus_task))
  403. return PTR_ERR(xenbus_task);
  404. }
  405. }
  406. return 0;
  407. }
  408. void xb_deinit_comms(void)
  409. {
  410. unbind_from_irqhandler(xenbus_irq, &xb_waitq);
  411. xenbus_irq = 0;
  412. }