vhost.c 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666
  1. /*
  2. * OpenConnect (SSL + DTLS) VPN client
  3. *
  4. * Copyright © 2021 David Woodhouse.
  5. *
  6. * Author: David Woodhouse <dwmw2@infradead.org>
  7. *
  8. * This program is free software; you can redistribute it and/or
  9. * modify it under the terms of the GNU Lesser General Public License
  10. * version 2.1, as published by the Free Software Foundation.
  11. *
  12. * This program is distributed in the hope that it will be useful, but
  13. * WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. */
  17. #include <config.h>
  18. #include "openconnect-internal.h"
  19. #include <linux/if_tun.h>
  20. #include <linux/vhost.h>
  21. #include <sys/eventfd.h>
  22. #include <sys/ioctl.h>
  23. #include <sys/stat.h>
  24. #include <sys/types.h>
  25. #include <sys/wait.h>
  26. #include <sys/mman.h>
  27. #include <ctype.h>
  28. #include <errno.h>
  29. #include <fcntl.h>
  30. #include <signal.h>
  31. #include <stdio.h>
  32. #include <stdlib.h>
  33. #include <string.h>
  34. #include <unistd.h>
  35. #define debug_vhost 0
  36. #define barrier() __sync_synchronize()
  37. #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
  38. #define vio16(x) ((uint16_t)(x))
  39. #define vio32(x) ((uint32_t)(x))
  40. #define vio64(x) ((uint64_t)(x))
  41. #else
  42. #define vio16(x) ((uint16_t)__builtin_bswap16(x))
  43. #define vio32(x) ((uint32_t)__builtin_bswap32(x))
  44. #define vio64(x) ((uint64_t)__builtin_bswap64(x))
  45. #endif
  46. static int setup_vring(struct openconnect_info *vpninfo, int idx)
  47. {
  48. struct oc_vring *vring = idx ? &vpninfo->tx_vring : &vpninfo->rx_vring;
  49. int ret;
  50. if (getenv("NOVHOST"))
  51. return -EINVAL;
  52. vring->desc = calloc(vpninfo->vhost_ring_size, sizeof(*vring->desc));
  53. vring->avail = calloc(vpninfo->vhost_ring_size + 3, 2);
  54. vring->used = calloc(1 + (vpninfo->vhost_ring_size * 2), 4);
  55. if (!vring->desc || !vring->avail || !vring->used)
  56. return -ENOMEM;
  57. for (int i = 0; i < vpninfo->vhost_ring_size; i++)
  58. vring->avail->ring[i] = i;
  59. struct vhost_vring_state vs = { };
  60. vs.index = idx;
  61. vs.num = vpninfo->vhost_ring_size;
  62. if (ioctl(vpninfo->vhost_fd, VHOST_SET_VRING_NUM, &vs) < 0) {
  63. ret = -errno;
  64. vpn_progress(vpninfo, PRG_ERR, _("Failed to set vring #%d size: %s\n"),
  65. idx, strerror(-ret));
  66. return ret;
  67. }
  68. vs.num = 0;
  69. if (ioctl(vpninfo->vhost_fd, VHOST_SET_VRING_BASE, &vs) < 0) {
  70. ret = -errno;
  71. vpn_progress(vpninfo, PRG_ERR, _("Failed to set vring #%d base: %s\n"),
  72. idx, strerror(-ret));
  73. return ret;
  74. }
  75. struct vhost_vring_addr va = { };
  76. va.index = idx;
  77. va.desc_user_addr = (unsigned long)vring->desc;
  78. va.avail_user_addr = (unsigned long)vring->avail;
  79. va.used_user_addr = (unsigned long)vring->used;
  80. if (ioctl(vpninfo->vhost_fd, VHOST_SET_VRING_ADDR, &va) < 0) {
  81. ret = -errno;
  82. vpn_progress(vpninfo, PRG_ERR, _("Failed to set vring #%d base: %s\n"),
  83. idx, strerror(-ret));
  84. return ret;
  85. }
  86. struct vhost_vring_file vf = { };
  87. vf.index = idx;
  88. vf.fd = vpninfo->tun_fd;
  89. if (ioctl(vpninfo->vhost_fd, VHOST_NET_SET_BACKEND, &vf) < 0) {
  90. ret = -errno;
  91. vpn_progress(vpninfo, PRG_ERR, _("Failed to set vring #%d RX backend: %s\n"),
  92. idx, strerror(-ret));
  93. return ret;
  94. }
  95. vf.fd = vpninfo->vhost_call_fd;
  96. if (ioctl(vpninfo->vhost_fd, VHOST_SET_VRING_CALL, &vf) < 0) {
  97. ret = -errno;
  98. vpn_progress(vpninfo, PRG_ERR, _("Failed to set vring #%d call eventfd: %s\n"),
  99. idx, strerror(-ret));
  100. return ret;
  101. }
  102. vf.fd = vpninfo->vhost_kick_fd;
  103. if (ioctl(vpninfo->vhost_fd, VHOST_SET_VRING_KICK, &vf) < 0) {
  104. ret = -errno;
  105. vpn_progress(vpninfo, PRG_ERR, _("Failed to set vring #%d kick eventfd: %s\n"),
  106. idx, strerror(-ret));
  107. return ret;
  108. }
  109. return 0;
  110. }
  111. /*
  112. * This is awful. The kernel doesn't let us just ask for a 1:1 mapping of
  113. * our virtual address space; we have to *know* the minimum and maximum
  114. * addresses. We can't test it directly with VHOST_SET_MEM_TABLE because
  115. * that actually succeeds, and the failure only occurs later when we try
  116. * to use a buffer at an address that *is* valid, but our memory table
  117. * *could* point to addresses that aren't. Ewww.
  118. *
  119. * So... attempt to work out what TASK_SIZE is for the kernel we happen
  120. * to be running on right now...
  121. */
  122. static int testaddr(unsigned long addr)
  123. {
  124. void *res = mmap((void *)addr, getpagesize(), PROT_NONE,
  125. MAP_FIXED|MAP_ANONYMOUS, -1, 0);
  126. if (res == MAP_FAILED) {
  127. if (errno == EEXIST || errno == EINVAL)
  128. return 1;
  129. /* We get ENOMEM for a bad virtual address */
  130. return 0;
  131. }
  132. /* It shouldn't actually succeed without either MAP_SHARED or
  133. * MAP_PRIVATE in the flags, but just in case... */
  134. munmap((void *)addr, getpagesize());
  135. return 1;
  136. }
  137. static int find_vmem_range(struct openconnect_info *vpninfo,
  138. struct vhost_memory *vmem)
  139. {
  140. const unsigned long page_size = getpagesize();
  141. unsigned long top;
  142. unsigned long bottom;
  143. top = -page_size;
  144. if (testaddr(top)) {
  145. vmem->regions[0].memory_size = top;
  146. goto out;
  147. }
  148. /* 'top' is the lowest address known *not* to work */
  149. bottom = top;
  150. while (1) {
  151. bottom >>= 1;
  152. bottom &= ~(page_size - 1);
  153. if (!bottom) {
  154. vpn_progress(vpninfo, PRG_ERR,
  155. _("Failed to find virtual task size; search reached zero"));
  156. return -EINVAL;
  157. }
  158. if (testaddr(bottom))
  159. break;
  160. top = bottom;
  161. }
  162. /* It's often a page or two below the boundary */
  163. top -= page_size;
  164. if (testaddr(top)) {
  165. vmem->regions[0].memory_size = top;
  166. goto out;
  167. }
  168. top -= page_size;
  169. if (testaddr(top)) {
  170. vmem->regions[0].memory_size = top;
  171. goto out;
  172. }
  173. /* Now, bottom is the highest address known to work,
  174. and we must search between it and 'top' which is
  175. the lowest address known not to. */
  176. while (bottom + page_size != top) {
  177. unsigned long test = bottom + (top - bottom) / 2;
  178. test &= ~(page_size - 1);
  179. if (testaddr(test)) {
  180. bottom = test;
  181. continue;
  182. }
  183. test -= page_size;
  184. if (testaddr(test)) {
  185. vmem->regions[0].memory_size = test;
  186. goto out;
  187. }
  188. test -= page_size;
  189. if (testaddr(test)) {
  190. vmem->regions[0].memory_size = test;
  191. goto out;
  192. }
  193. top = test;
  194. }
  195. vmem->regions[0].memory_size = bottom;
  196. out:
  197. vmem->regions[0].guest_phys_addr = page_size;
  198. vmem->regions[0].userspace_addr = page_size;
  199. vpn_progress(vpninfo, PRG_DEBUG, _("Detected virtual address range 0x%lx-0x%lx\n"),
  200. page_size,
  201. (unsigned long)(page_size + vmem->regions[0].memory_size));
  202. return 0;
  203. }
  204. #define OC_VHOST_NET_FEATURES ((1ULL << VHOST_NET_F_VIRTIO_NET_HDR) | \
  205. (1ULL << VIRTIO_F_VERSION_1) | \
  206. (1ULL << VIRTIO_RING_F_EVENT_IDX))
  207. int setup_vhost(struct openconnect_info *vpninfo, int tun_fd)
  208. {
  209. int ret;
  210. /* If tuned for latency not bandwidth, that isn't vhost-net */
  211. if (vpninfo->max_qlen < 16) {
  212. vpn_progress(vpninfo, PRG_DEBUG,
  213. _("Not using vhost-net due to low queue length %d\n"),
  214. vpninfo->max_qlen);
  215. return -EINVAL;
  216. }
  217. vpninfo->vhost_ring_size = 1 << (32 - __builtin_clz(vpninfo->max_qlen - 1));
  218. if (vpninfo->vhost_ring_size < 32)
  219. vpninfo->vhost_ring_size = 32;
  220. if (vpninfo->vhost_ring_size > 32768)
  221. vpninfo->vhost_ring_size = 32768;
  222. vpninfo->vhost_fd = open("/dev/vhost-net", O_RDWR);
  223. if (vpninfo->vhost_fd == -1) {
  224. ret = -errno;
  225. vpn_progress(vpninfo, PRG_ERR, _("Failed to open /dev/vhost-net: %s\n"),
  226. strerror(-ret));
  227. goto err;
  228. }
  229. if (ioctl(vpninfo->vhost_fd, VHOST_SET_OWNER, NULL) < 0) {
  230. ret = -errno;
  231. vpn_progress(vpninfo, PRG_DEBUG, _("Failed to set vhost ownership: %s\n"),
  232. strerror(-ret));
  233. goto err;
  234. }
  235. uint64_t features;
  236. if (ioctl(vpninfo->vhost_fd, VHOST_GET_FEATURES, &features) < 0) {
  237. ret = -errno;
  238. vpn_progress(vpninfo, PRG_DEBUG, _("Failed to get vhost features: %s\n"),
  239. strerror(-ret));
  240. goto err;
  241. }
  242. if ((features & OC_VHOST_NET_FEATURES) != OC_VHOST_NET_FEATURES) {
  243. vpn_progress(vpninfo, PRG_DEBUG, _("vhost-net lacks required features: %llx\n"),
  244. (unsigned long long)features);
  245. return -EOPNOTSUPP;
  246. }
  247. features = OC_VHOST_NET_FEATURES;
  248. if (ioctl(vpninfo->vhost_fd, VHOST_SET_FEATURES, &features) < 0) {
  249. ret = -errno;
  250. vpn_progress(vpninfo, PRG_ERR, _("Failed to set vhost features: %s\n"),
  251. strerror(-ret));
  252. goto err;
  253. }
  254. vpninfo->vhost_kick_fd = eventfd(0, EFD_CLOEXEC|EFD_NONBLOCK);
  255. if (vpninfo->vhost_kick_fd == -1) {
  256. ret = -errno;
  257. vpn_progress(vpninfo, PRG_ERR, _("Failed to open vhost kick eventfd: %s\n"),
  258. strerror(-ret));
  259. goto err;
  260. }
  261. vpninfo->vhost_call_fd = eventfd(0, EFD_CLOEXEC|EFD_NONBLOCK);
  262. if (vpninfo->vhost_call_fd == -1) {
  263. ret = -errno;
  264. vpn_progress(vpninfo, PRG_ERR, _("Failed to open vhost call eventfd: %s\n"),
  265. strerror(-ret));
  266. goto err;
  267. }
  268. struct vhost_memory *vmem = alloca(sizeof(*vmem) + sizeof(vmem->regions[0]));
  269. memset(vmem, 0, sizeof(*vmem) + sizeof(vmem->regions[0]));
  270. vmem->nregions = 1;
  271. ret = find_vmem_range(vpninfo, vmem);
  272. if (ret)
  273. goto err;
  274. if (ioctl(vpninfo->vhost_fd, VHOST_SET_MEM_TABLE, vmem) < 0) {
  275. ret = -errno;
  276. vpn_progress(vpninfo, PRG_DEBUG, _("Failed to set vhost memory map: %s\n"),
  277. strerror(-ret));
  278. goto err;
  279. }
  280. ret = setup_vring(vpninfo, 0);
  281. if (ret)
  282. goto err;
  283. ret = setup_vring(vpninfo, 1);
  284. if (ret)
  285. goto err;
  286. /* This isn't just for bufferbloat; there are various issues with the XDP
  287. * code path:
  288. * https://lore.kernel.org/netdev/2433592d2b26deec33336dd3e83acfd273b0cf30.camel@infradead.org/T/
  289. */
  290. int sndbuf = vpninfo->ip_info.mtu;
  291. if (!sndbuf)
  292. sndbuf = 1500;
  293. sndbuf *= 2 * vpninfo->max_qlen;
  294. if (ioctl(vpninfo->tun_fd, TUNSETSNDBUF, &sndbuf) < 0) {
  295. ret = -errno;
  296. vpn_progress(vpninfo, PRG_INFO, _("Failed to set tun sndbuf: %s\n"),
  297. strerror(-ret));
  298. goto err;
  299. }
  300. vpn_progress(vpninfo, PRG_INFO, _("Using vhost-net for tun acceleration, ring size %d\n"),
  301. vpninfo->vhost_ring_size);
  302. monitor_fd_new(vpninfo, vhost_call);
  303. monitor_read_fd(vpninfo, vhost_call);
  304. return 0;
  305. err:
  306. shutdown_vhost(vpninfo);
  307. return ret;
  308. }
  309. static void free_vring(struct openconnect_info *vpninfo,
  310. struct oc_vring *vring)
  311. {
  312. if (vring->desc) {
  313. for (int i = 0; i < vpninfo->vhost_ring_size; i++) {
  314. if (vring->desc[i].addr)
  315. free_pkt(vpninfo, pkt_from_hdr(vio64(vring->desc[i].addr), virtio.h));
  316. }
  317. free(vring->desc);
  318. vring->desc = NULL;
  319. }
  320. free(vring->avail);
  321. vring->avail = NULL;
  322. free(vring->used);
  323. vring->used = NULL;
  324. }
  325. void shutdown_vhost(struct openconnect_info *vpninfo)
  326. {
  327. if (vpninfo->vhost_fd != -1)
  328. close(vpninfo->vhost_fd);
  329. if (vpninfo->vhost_kick_fd != -1)
  330. close(vpninfo->vhost_kick_fd);
  331. if (vpninfo->vhost_call_fd != -1)
  332. close(vpninfo->vhost_call_fd);
  333. vpninfo->vhost_fd = vpninfo->vhost_kick_fd = vpninfo->vhost_call_fd = -1;
  334. free_vring(vpninfo, &vpninfo->rx_vring);
  335. free_vring(vpninfo, &vpninfo->tx_vring);
  336. }
  337. /* used_event is the uint16_t element after the end of the
  338. * avail ring:
  339. *
  340. * struct virtq_avail {
  341. * le16 flags;
  342. * le16 idx;
  343. * le16 ring[ Queue Size ];
  344. * le16 used_event;
  345. * };
  346. */
  347. #define USED_EVENT(v, r) ((r)->avail->ring[(v)->vhost_ring_size])
  348. /* avail_event is the uint16_t element after the end of the
  349. * used ring, which is slightly less trivial to reference
  350. * than the used_event:
  351. *
  352. * struct virtq_used_elem {
  353. * le32 id;
  354. * le32 len;
  355. * };
  356. *
  357. * struct virtq_used {
  358. * le16 flags;
  359. * le16 idx;
  360. * struct virtq_used_elem ring[ Queue Size ];
  361. * le16 avail_event;
  362. * };
  363. *
  364. * So if we thought of it as an array of 16-bit values, 'flags' would
  365. * be at element [0], 'idx' at [1], the ring would start at [2], the
  366. * *second* element of the ring would be at [ 2 + 4 ] since each element
  367. * is as big as four 16-bit values, and thus avail_event would be at
  368. * [2 + 4 * RING_SIZE ]
  369. */
  370. #define AVAIL_EVENT(v, r) ((&(r)->used->flags)[2 + ((v)->vhost_ring_size * 4)])
  371. static void dump_vring(struct openconnect_info *vpninfo, struct oc_vring *ring)
  372. {
  373. vpn_progress(vpninfo, PRG_ERR,
  374. "next_avail 0x%x, used idx 0x%x seen_used 0x%x\n",
  375. vio16(ring->avail->idx), vio16(ring->used->idx),
  376. ring->seen_used);
  377. vpn_progress(vpninfo, PRG_ERR, "# ADDR AVAIL USED\n");
  378. /* Not an off-by-one; it's dumping avail_event and used_event too. */
  379. for (int i = 0; i < vpninfo->vhost_ring_size + 1; i++)
  380. vpn_progress(vpninfo, PRG_ERR,
  381. "%d %p %x %x\n", i,
  382. (void *)(unsigned long)vio64(ring->desc[i].addr),
  383. vio16(ring->avail->ring[i]),
  384. vio32(ring->used->ring[i].id));
  385. }
  386. /* With thanks to Eugenio Pérez Martin <eperezma@redhat.com> for writing
  387. * https://www.redhat.com/en/blog/virtqueues-and-virtio-ring-how-data-travels
  388. * which saved a lot of time and caffeine in getting this to work. */
  389. static inline int process_ring(struct openconnect_info *vpninfo, int tx, uint64_t *kick)
  390. {
  391. struct oc_vring *ring = tx ? &vpninfo->tx_vring : &vpninfo->rx_vring;
  392. const unsigned int ring_mask = vpninfo->vhost_ring_size - 1;
  393. int did_work = 0;
  394. /* First handle 'used' packets handed back to us from the ring.
  395. * For TX packets (incoming from VPN into the tun device) we just
  396. * free them now. For RX packets from the tun device we fill in
  397. * the length and queue them for sending over the VPN. */
  398. uint16_t used_idx = vio16(ring->used->idx);
  399. while (used_idx != ring->seen_used) {
  400. uint32_t desc = vio32(ring->used->ring[ring->seen_used & ring_mask].id);
  401. uint32_t len = vio32(ring->used->ring[ring->seen_used & ring_mask].len);
  402. if (desc > ring_mask) {
  403. inval:
  404. vpn_progress(vpninfo, PRG_ERR,
  405. _("Error: vhost gave back invalid descriptor %d, len %d\n"),
  406. desc, len);
  407. dump_vring(vpninfo, ring);
  408. vpninfo->quit_reason = "vhost error";
  409. return -EIO;
  410. }
  411. uint64_t addr = vio64(ring->desc[desc].addr);
  412. if (!addr) {
  413. vpn_progress(vpninfo, PRG_ERR,
  414. _("vhost gave back empty descriptor %d\n"),
  415. desc);
  416. dump_vring(vpninfo, ring);
  417. vpninfo->quit_reason = "vhost error";
  418. return -EIO;
  419. }
  420. struct pkt *this = pkt_from_hdr(addr, virtio.h);
  421. if (tx) {
  422. vpn_progress(vpninfo, PRG_TRACE,
  423. _("Free TX packet %p [%d] [used %d]\n"),
  424. this, ring->seen_used, used_idx);
  425. vpninfo->stats.rx_pkts++;
  426. vpninfo->stats.rx_bytes += this->len;
  427. free_pkt(vpninfo, this);
  428. } else {
  429. if (len < sizeof(this->virtio.h))
  430. goto inval;
  431. this->len = len - sizeof(this->virtio.h);
  432. vpn_progress(vpninfo, PRG_TRACE,
  433. _("RX packet %p(%d) [%d] [used %d]\n"),
  434. this, this->len, ring->seen_used, used_idx);
  435. if (debug_vhost)
  436. dump_buf_hex(vpninfo, PRG_TRACE, '<',
  437. (void *) &this->virtio.h,
  438. this->len + sizeof(this->virtio.h));
  439. /* If the incoming queue fill up, pretend we can't see any more
  440. * by contracting our idea of 'used_idx' back to *this* one. */
  441. if (queue_packet(&vpninfo->outgoing_queue, this) >= vpninfo->max_qlen)
  442. used_idx = ring->seen_used + 1;
  443. did_work = 1;
  444. }
  445. /* Zero the descriptor and line it up in the next slot in the avail ring. */
  446. ring->desc[desc].addr = 0;
  447. ring->avail->ring[ring->seen_used++ & ring_mask] = vio32(desc);
  448. }
  449. /* Now handle 'avail' and prime the RX ring full of empty buffers, or
  450. * the TX ring with anything we have on the VPN incoming queue. */
  451. uint16_t next_avail = vio16(ring->avail->idx);
  452. uint32_t desc = ring->avail->ring[next_avail & ring_mask];
  453. while (!ring->desc[desc].addr) {
  454. struct pkt *this;
  455. if (tx) {
  456. this = dequeue_packet(&vpninfo->incoming_queue);
  457. if (!this)
  458. break;
  459. /* If only a few packets on the queue, just send them
  460. * directly. The latency is much better. We benefit from
  461. * vhost-net TX when we're overloaded and want to use all
  462. * our CPU on the RX and crypto; there's not a lot of point
  463. * otherwise. */
  464. if (!*kick && vpninfo->incoming_queue.count < vpninfo->max_qlen / 2 &&
  465. next_avail == AVAIL_EVENT(vpninfo, ring)) {
  466. if (!os_write_tun(vpninfo, this)) {
  467. vpninfo->stats.rx_pkts++;
  468. vpninfo->stats.rx_bytes += this->len;
  469. free_pkt(vpninfo, this);
  470. continue;
  471. }
  472. /* Failed! Pretend it never happened; queue for vhost */
  473. }
  474. memset(&this->virtio.h, 0, sizeof(this->virtio.h));
  475. } else {
  476. int len = vpninfo->ip_info.mtu;
  477. this = alloc_pkt(vpninfo, len + vpninfo->pkt_trailer);
  478. if (!this)
  479. break;
  480. this->len = len;
  481. }
  482. if (!tx)
  483. ring->desc[desc].flags = vio16(VRING_DESC_F_WRITE);
  484. ring->desc[desc].addr = vio64((unsigned long)this + pkt_offset(virtio.h));
  485. ring->desc[desc].len = vio32(this->len + sizeof(this->virtio.h));
  486. barrier();
  487. if (debug_vhost) {
  488. if (tx) {
  489. vpn_progress(vpninfo, PRG_TRACE,
  490. _("Queue TX packet %p at desc %d avail %d\n"),
  491. this, desc, next_avail);
  492. if (debug_vhost)
  493. dump_buf_hex(vpninfo, PRG_TRACE, '>',
  494. (void *)&this->virtio.h,
  495. this->len + sizeof(this->virtio.h));
  496. } else
  497. vpn_progress(vpninfo, PRG_TRACE,
  498. _("Queue RX packet %p at desc %d avail %d\n"),
  499. this, desc, next_avail);
  500. }
  501. ring->avail->idx = vio16(++next_avail);
  502. barrier();
  503. uint16_t avail_event = AVAIL_EVENT(vpninfo, ring);
  504. barrier();
  505. if (avail_event == vio16(next_avail-1))
  506. *kick = 1;
  507. desc = ring->avail->ring[next_avail & ring_mask];
  508. }
  509. return did_work;
  510. }
  511. static int set_ring_wake(struct openconnect_info *vpninfo, int tx)
  512. {
  513. /* No wakeup for tun RX if the queue is already full. */
  514. if (!tx && vpninfo->outgoing_queue.count >= vpninfo->max_qlen)
  515. return 0;
  516. struct oc_vring *ring = tx ? &vpninfo->tx_vring : &vpninfo->rx_vring;
  517. uint16_t wake_idx = vio16(ring->seen_used);
  518. /* Ask it to wake us if the used idx moves on. */
  519. USED_EVENT(vpninfo, ring) = wake_idx;
  520. barrier();
  521. /* If it already did, loop again immediately */
  522. if (ring->used->idx != wake_idx) {
  523. vpn_progress(vpninfo, PRG_TRACE,
  524. _("Immediate wake because vhost ring moved on from 0x%x to 0x%x\n"),
  525. ring->used->idx, wake_idx);
  526. return 1;
  527. }
  528. return 0;
  529. }
  530. int vhost_tun_mainloop(struct openconnect_info *vpninfo, int *timeout, int readable, int did_work)
  531. {
  532. uint64_t kick = 0;
  533. if (vpninfo->outgoing_queue.count < vpninfo->max_qlen) {
  534. did_work += process_ring(vpninfo, 0, &kick);
  535. if (vpninfo->quit_reason)
  536. return 0;
  537. }
  538. did_work += process_ring(vpninfo, 1, &kick);
  539. if (vpninfo->quit_reason)
  540. return 0;
  541. if (kick) {
  542. barrier();
  543. if (write(vpninfo->vhost_kick_fd, &kick, sizeof(kick)) != sizeof(kick)) {
  544. /* Can never happen */
  545. vpn_progress(vpninfo, PRG_ERR,
  546. _("Failed to kick vhost-net eventfd\n"));
  547. }
  548. vpn_progress(vpninfo, PRG_TRACE,
  549. _("Kick vhost ring\n"));
  550. did_work = 1;
  551. }
  552. /* We only read from the eventfd when we're done with *actual*
  553. * work, which is when !did_work. Except in the cases where
  554. * we race with setting the ring wakeup and have to go round
  555. * again. */
  556. if (!did_work && readable) {
  557. uint64_t evt;
  558. if (read(vpninfo->vhost_call_fd, &evt, sizeof(evt)) != sizeof(evt)) {
  559. /* Do nothing */
  560. }
  561. }
  562. /* If we aren't going to have one more turn around the mainloop,
  563. * set the wake event indices. And if we find the rings have
  564. * moved on while we're doing that, take one more turn around
  565. * the mainloop... */
  566. return did_work || set_ring_wake(vpninfo, 1) || set_ring_wake(vpninfo, 0);
  567. }