futex.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535
  1. /*
  2. * Copyright (c) 2023 Agustina Arzille.
  3. *
  4. * This program is free software: you can redistribute it and/or modify
  5. * it under the terms of the GNU General Public License as published by
  6. * the Free Software Foundation, either version 3 of the License, or
  7. * (at your option) any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  16. *
  17. */
  18. #include <kern/clock.h>
  19. #include <kern/sleepq.h>
  20. #include <kern/turnstile.h>
  21. #include <kern/unwind.h>
  22. #include <kern/user.h>
  23. #include <vm/map.h>
  24. #include <vm/object.h>
  25. #define FUTEX_DATA_SHARED FUTEX_SHARED
  26. #define FUTEX_DATA_WAIT 0x02
  27. #define FUTEX_DATA_WAKE 0x04
  28. // Operations used in 'futex_map_addr' below.
  29. #define FUTEX_OP_CMP 0
  30. #define FUTEX_OP_SET 1
  31. #define FUTEX_OP_ROBUST_CLEAR 2
  32. #define FUTEX_OP_LOCK_PI 3
  33. struct futex_data;
  34. /*
  35. * Futex operations.
  36. *
  37. * Since the operations themselves are very similar across futexes'
  38. * "flavors", this structure aims to encapsulate the subtleties among them.
  39. */
  40. struct futex_ops
  41. {
  42. void* (*acquire) (const union sync_key *);
  43. void (*release) (void *);
  44. union
  45. {
  46. int (*wait) (struct futex_data *, int, uint32_t, uint64_t);
  47. int (*wake) (void *, uint32_t);
  48. };
  49. };
  50. struct futex_data
  51. {
  52. int *addr;
  53. struct vm_map *map;
  54. union sync_key key;
  55. uint32_t mode;
  56. void *wait_obj;
  57. const struct futex_ops *ops;
  58. };
  59. static int
  60. futex_check_addr (int *addr)
  61. {
  62. if (!user_check_range (addr, sizeof (int)))
  63. return (EFAULT);
  64. else if (((uintptr_t)addr % sizeof (int)) != 0)
  65. return (EINVAL);
  66. return (0);
  67. }
  68. static int
  69. futex_key_init (union sync_key *key, struct vm_map *map,
  70. int *addr, uint32_t flags)
  71. {
  72. if (likely (!(flags & FUTEX_SHARED)))
  73. /*
  74. * For task-local futexes the key is made up of the virtual address
  75. * itself and the calling thread's VM map.
  76. */
  77. sync_key_local_init (key, addr, map);
  78. else
  79. {
  80. /*
  81. * For task-shared futexes, the key is made of the <VM object, offset>
  82. * pair, obtained by performing a VM lookup on the virtual address.
  83. */
  84. struct vm_map_entry entry;
  85. int error = vm_map_lookup (map, (uintptr_t)addr, &entry);
  86. if (error)
  87. return (error);
  88. sync_key_shared_init (key, entry.object, entry.offset);
  89. }
  90. return (0);
  91. }
  92. static void*
  93. futex_sleepq_lend (const union sync_key *key)
  94. {
  95. return (sleepq_lend_key (key));
  96. }
  97. static void*
  98. futex_pi_lend (const union sync_key *key)
  99. {
  100. return (turnstile_lend_key (key));
  101. }
  102. static void
  103. futex_sleepq_return (void *obj)
  104. {
  105. sleepq_return ((struct sleepq *)obj);
  106. }
  107. static void
  108. futex_pi_return (void *obj)
  109. {
  110. turnstile_return ((struct turnstile *)obj);
  111. }
  112. static void*
  113. futex_sleepq_acquire (const union sync_key *key)
  114. {
  115. return (sleepq_acquire_key (key));
  116. }
  117. static void*
  118. futex_pi_acquire (const union sync_key *key)
  119. {
  120. return (turnstile_acquire_key (key));
  121. }
  122. static void
  123. futex_sleepq_release (void *obj)
  124. {
  125. sleepq_release ((struct sleepq *)obj);
  126. }
  127. static void
  128. futex_pi_release (void *obj)
  129. {
  130. turnstile_release ((struct turnstile *)obj);
  131. thread_propagate_priority ();
  132. }
  133. static int
  134. futex_sleepq_wait (struct futex_data *data, int value __unused,
  135. uint32_t flags, uint64_t ticks)
  136. {
  137. uint64_t *timep = NULL;
  138. if (flags & FUTEX_TIMED)
  139. {
  140. timep = &ticks;
  141. if (!(flags & FUTEX_ABSTIME))
  142. ticks += clock_get_time () + 1;
  143. }
  144. return (sleepq_wait_movable ((struct sleepq **)&data->wait_obj,
  145. "futex", timep));
  146. }
  147. static int futex_map_addr (struct futex_data *, int, int);
  148. static int
  149. futex_pi_wait (struct futex_data *data, int value,
  150. uint32_t flags, uint64_t ticks)
  151. {
  152. struct thread *thr = thread_by_kuid ((uint32_t)value & FUTEX_TID_MASK);
  153. if (! thr)
  154. return (ESRCH);
  155. else if (thr == thread_self ())
  156. {
  157. thread_unref (thr);
  158. return (EALREADY);
  159. }
  160. int error;
  161. if (!(flags & FUTEX_TIMED))
  162. error = turnstile_wait (data->wait_obj, "futex-pi", thr);
  163. else
  164. {
  165. if (!(flags & FUTEX_ABSTIME))
  166. ticks += clock_get_time () + 1;
  167. error = turnstile_timedwait (data->wait_obj, "futex-pi", thr, ticks);
  168. }
  169. thread_unref (thr);
  170. if (error)
  171. return (error);
  172. /*
  173. * PI futexes have ownership semantics. In order to apply them, we
  174. * need to change the futex word's value after a succesful wait.
  175. */
  176. error = futex_map_addr (data, thread_id (thread_self ()), FUTEX_OP_LOCK_PI);
  177. if (! error)
  178. turnstile_own (data->wait_obj);
  179. return (error);
  180. }
  181. static int
  182. futex_sleepq_wake (void *sleepq, uint32_t flags)
  183. {
  184. if (flags & FUTEX_BROADCAST)
  185. sleepq_broadcast ((struct sleepq *)sleepq);
  186. else
  187. sleepq_signal ((struct sleepq *)sleepq);
  188. return (0);
  189. }
  190. static int
  191. futex_pi_wake (void *obj, uint32_t flags)
  192. {
  193. struct turnstile *turnstile = (struct turnstile *)obj;
  194. if (!turnstile_owned_by (turnstile, thread_self ()))
  195. return (EPERM);
  196. turnstile_disown (turnstile);
  197. if (flags & FUTEX_BROADCAST)
  198. turnstile_broadcast (turnstile);
  199. else
  200. turnstile_signal (turnstile);
  201. return (0);
  202. }
  203. // Set of operations used when waiting on a futex.
  204. static const struct futex_ops futex_wait_ops[] =
  205. {
  206. {
  207. .acquire = futex_sleepq_lend,
  208. .release = futex_sleepq_return,
  209. .wait = futex_sleepq_wait
  210. },
  211. {
  212. .acquire = futex_pi_lend,
  213. .release = futex_pi_return,
  214. .wait = futex_pi_wait
  215. }
  216. };
  217. // Operations used when waking futex waiters.
  218. static const struct futex_ops futex_wake_ops[] =
  219. {
  220. {
  221. .acquire = futex_sleepq_acquire,
  222. .release = futex_sleepq_release,
  223. .wake = futex_sleepq_wake
  224. },
  225. {
  226. .acquire = futex_pi_acquire,
  227. .release = futex_pi_release,
  228. .wake = futex_pi_wake
  229. }
  230. };
  231. static const struct futex_ops*
  232. futex_select_ops (uint32_t flags, uint32_t mode)
  233. {
  234. uint32_t idx = (flags & FUTEX_PI) / FUTEX_PI;
  235. assert (idx <= 1);
  236. return ((mode & FUTEX_DATA_WAIT) ?
  237. &futex_wait_ops[idx] : &futex_wake_ops[idx]);
  238. }
  239. static int
  240. futex_data_init (struct futex_data *data, int *addr,
  241. uint32_t flags, uint32_t mode)
  242. {
  243. data->wait_obj = NULL;
  244. data->mode = 0;
  245. int error = futex_check_addr (addr);
  246. if (error)
  247. return (error);
  248. data->mode = mode;
  249. data->map = vm_map_self ();
  250. data->addr = addr;
  251. error = futex_key_init (&data->key, data->map, addr, flags);
  252. if (error)
  253. return (error);
  254. else if (flags & FUTEX_SHARED)
  255. data->mode |= FUTEX_DATA_SHARED;
  256. data->ops = futex_select_ops (flags, mode);
  257. return (0);
  258. }
  259. static void
  260. futex_data_release (struct futex_data *data)
  261. {
  262. if (!data->wait_obj)
  263. return;
  264. data->ops->release (data->wait_obj);
  265. data->wait_obj = NULL;
  266. }
  267. static void
  268. futex_data_fini (void *arg)
  269. {
  270. struct futex_data *data = arg;
  271. if (data->mode & FUTEX_DATA_SHARED)
  272. vm_object_unref (data->key.shared.object);
  273. futex_data_release (data);
  274. }
  275. static int
  276. futex_robust_clear (int *addr, int value)
  277. {
  278. while (1)
  279. {
  280. int tmp = atomic_load_rlx (addr);
  281. if ((tmp & FUTEX_TID_MASK) != (uint32_t)value)
  282. return (0);
  283. else if (atomic_cas_bool_rel (addr, tmp, tmp | FUTEX_OWNER_DIED))
  284. /*
  285. * Use a negative value to indicate that a wakeup is needed. This is
  286. * done so that it doesn't clash with any errno value.
  287. */
  288. return ((tmp & FUTEX_WAITERS) ? -1 : 0);
  289. cpu_pause ();
  290. }
  291. }
  292. static int
  293. futex_map_addr (struct futex_data *data, int value, int op)
  294. {
  295. int prot = op == FUTEX_OP_CMP ? VM_PROT_READ : VM_PROT_RDWR;
  296. struct unw_fixup fixup;
  297. int error = unw_fixup_save (&fixup);
  298. if (error)
  299. {
  300. /*
  301. * There was a page fault when accessing the address. Test if this
  302. * was simply because it needs to be paged in, and we couldn't do it
  303. * earlier due to us holding a spinlock, or because there was another
  304. * issue (like a protection error).
  305. */
  306. thread_pagefault_enable ();
  307. if (error != EAGAIN)
  308. return (error);
  309. // Drop the lock on the wait queue and page in the address.
  310. futex_data_release (data);
  311. cpu_flags_t flags;
  312. cpu_intr_save (&flags);
  313. error = vm_map_fault (data->map, (uintptr_t)data->addr, prot);
  314. cpu_intr_restore (flags);
  315. if (error)
  316. return (error);
  317. data->wait_obj = data->ops->acquire (&data->key);
  318. error = 0;
  319. }
  320. thread_pagefault_disable ();
  321. switch (op)
  322. {
  323. case FUTEX_OP_CMP:
  324. error = value == *data->addr ? 0 : EAGAIN;
  325. break;
  326. case FUTEX_OP_SET:
  327. *data->addr = value;
  328. break;
  329. case FUTEX_OP_ROBUST_CLEAR:
  330. error = futex_robust_clear (data->addr, value);
  331. break;
  332. case FUTEX_OP_LOCK_PI:
  333. error = atomic_cas_bool_acq (data->addr, 0, value | FUTEX_WAITERS) ?
  334. 0 : EAGAIN;
  335. break;
  336. }
  337. thread_pagefault_enable ();
  338. return (error);
  339. }
  340. int
  341. futex_wait (int *addr, int value, uint32_t flags, uint64_t ticks)
  342. {
  343. CLEANUP (futex_data_fini) struct futex_data data;
  344. int error = futex_data_init (&data, addr, flags, FUTEX_DATA_WAIT);
  345. if (error)
  346. return (error);
  347. data.wait_obj = data.ops->acquire (&data.key);
  348. error = futex_map_addr (&data, value, FUTEX_OP_CMP);
  349. if (! error)
  350. error = data.ops->wait (&data, value, flags, ticks);
  351. return (error);
  352. }
  353. int
  354. futex_wake (int *addr, uint32_t flags, int value)
  355. {
  356. CLEANUP (futex_data_fini) struct futex_data data;
  357. int error = futex_data_init (&data, addr, flags, FUTEX_DATA_WAKE);
  358. if (error)
  359. return (error);
  360. data.wait_obj = data.ops->acquire (&data.key);
  361. if (flags & FUTEX_MUTATE)
  362. {
  363. error = futex_map_addr (&data, value, FUTEX_OP_SET);
  364. if (error)
  365. return (error);
  366. }
  367. if (data.wait_obj)
  368. error = data.ops->wake (data.wait_obj, flags);
  369. return (0);
  370. }
  371. int
  372. futex_requeue (int *dst_addr, int *src_addr, int wake_one, uint32_t flags)
  373. {
  374. if (flags & FUTEX_PI)
  375. return (EINVAL);
  376. int error = futex_check_addr (dst_addr);
  377. if (error)
  378. return (error);
  379. error = futex_check_addr (src_addr);
  380. if (error)
  381. return (error);
  382. union sync_key dkey, skey;
  383. struct vm_map *map = vm_map_self ();
  384. error = futex_key_init (&dkey, map, dst_addr, flags);
  385. if (error)
  386. return (error);
  387. error = futex_key_init (&skey, map, src_addr, flags);
  388. if (error)
  389. {
  390. if (flags & FUTEX_SHARED)
  391. vm_object_unref (dkey.shared.object);
  392. return (error);
  393. }
  394. sleepq_move (&dkey, &skey, wake_one, (flags & FUTEX_BROADCAST));
  395. if (flags & FUTEX_SHARED)
  396. {
  397. vm_object_unref (dkey.shared.object);
  398. vm_object_unref (skey.shared.object);
  399. }
  400. return (0);
  401. }
  402. static int
  403. futex_robust_list_handle (struct futex_robust_list *list,
  404. int *addr, int tid)
  405. {
  406. CLEANUP (futex_data_fini) struct futex_data data;
  407. int error = futex_data_init (&data, addr, list->flags, FUTEX_DATA_WAKE);
  408. if (error)
  409. return (error);
  410. data.wait_obj = data.ops->acquire (&data.key);
  411. error = futex_map_addr (&data, tid, FUTEX_OP_ROBUST_CLEAR);
  412. if (error < 0)
  413. { // There are waiters on this robust futex. Wake them all.
  414. if (data.wait_obj)
  415. data.ops->wake (data.wait_obj, list->flags | FUTEX_BROADCAST);
  416. error = 0;
  417. }
  418. return (error);
  419. }
  420. void
  421. futex_td_exit (struct futex_td *td)
  422. {
  423. int tid = thread_id (thread_self ());
  424. struct futex_td rtd;
  425. if (!td || user_copy_from (&rtd, td, sizeof (rtd)) != 0)
  426. return;
  427. if (rtd.pending)
  428. futex_robust_list_handle (rtd.pending, (int *)rtd.pending, tid);
  429. uint32_t nmax = 1024; // Handle this many robust futexes.
  430. while (rtd.list)
  431. {
  432. struct futex_robust_list tmp;
  433. if (user_copy_from (&tmp, rtd.list, sizeof (tmp)) != 0 ||
  434. futex_robust_list_handle (&tmp, (int *)rtd.list, tid) != 0 ||
  435. --nmax == 0)
  436. break;
  437. rtd.list = (void *)(uintptr_t)tmp.next;
  438. }
  439. }