capability.c 31 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264
  1. /*
  2. * Copyright (c) 2023 Agustina Arzille.
  3. *
  4. * This program is free software: you can redistribute it and/or modify
  5. * it under the terms of the GNU General Public License as published by
  6. * the Free Software Foundation, either version 3 of the License, or
  7. * (at your option) any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  16. *
  17. */
  18. #include <kern/capability.h>
  19. #include <kern/cspace.h>
  20. #include <kern/intr.h>
  21. #include <kern/kmem.h>
  22. #include <kern/rcu.h>
  23. #include <kern/thread.h>
  24. #include <kern/user.h>
  25. #include <machine/pmap.h>
  26. struct cap_alert
  27. {
  28. union
  29. {
  30. struct
  31. { // Valid for user alerts and when not pending.
  32. int task_id;
  33. int thread_id;
  34. uintptr_t tag;
  35. };
  36. struct hlist_node hnode;
  37. };
  38. struct pqueue_node pnode;
  39. union
  40. {
  41. char payload[CAP_ALERT_SIZE + 1];
  42. struct cap_kern_alert k_alert;
  43. };
  44. };
  45. #define CAP_F(name) __builtin_offsetof (struct ipc_msg_data, name)
  46. static_assert (CAP_F (caps_recv) - CAP_F (bytes_recv) ==
  47. CAP_F (caps_sent) - CAP_F (bytes_sent) &&
  48. CAP_F (vmes_recv) - CAP_F (bytes_recv) ==
  49. CAP_F (vmes_sent) - CAP_F (bytes_sent),
  50. "invalid layout for struct ipc_msg_data");
  51. #undef CAP_F
  52. #define CAP_VMES_OFF \
  53. (__builtin_offsetof (struct ipc_msg_data, vmes_recv) - \
  54. __builtin_offsetof (struct ipc_msg_data, bytes_recv))
  55. #define CAP_CAPS_OFF \
  56. (__builtin_offsetof (struct ipc_msg_data, caps_recv) - \
  57. __builtin_offsetof (struct ipc_msg_data, bytes_recv))
  58. struct cap_alert_async
  59. {
  60. struct cap_alert base;
  61. struct list xlink;
  62. struct cap_flow *flow;
  63. };
  64. #define cap_alert_type(alert) ((alert)->payload[CAP_ALERT_SIZE])
  65. struct cap_port_entry
  66. {
  67. struct slist_node snode;
  68. struct task *task;
  69. size_t size;
  70. uintptr_t ctx[3]; // SP and function arguments.
  71. struct ipc_msg_data mdata;
  72. struct cap_iters in_it;
  73. struct cap_iters *out_it;
  74. };
  75. struct cap_receiver
  76. {
  77. struct list lnode;
  78. struct thread *thread;
  79. void *buf;
  80. struct ipc_msg_data mdata;
  81. bool spurious;
  82. };
  83. struct cap_sender
  84. {
  85. struct list lnode;
  86. struct thread *thread;
  87. };
  88. static struct kmem_cache cap_flow_cache;
  89. static struct kmem_cache cap_misc_cache;
  90. static struct kmem_cache cap_port_cache;
  91. static struct list cap_intr_handlers[CPU_INTR_TABLE_SIZE];
  92. static struct adaptive_lock cap_intr_lock;
  93. // Priorities for kernel-generated alerts.
  94. #define CAP_ALERT_TASK_PRIO ((THREAD_SCHED_RT_PRIO_MAX + 2) << 1)
  95. #define CAP_ALERT_THREAD_PRIO (CAP_ALERT_TASK_PRIO << 1)
  96. #define CAP_ALERT_INTR_PRIO (CAP_ALERT_THREAD_PRIO << 1)
  97. #define CAP_ALERT_CHANNEL_PRIO (1u)
  98. #define CAP_FROM_SREF(ptr, type) structof (ptr, type, base.sref)
  99. static void
  100. cap_base_init (struct cap_base *base, unsigned int type, sref_noref_fn_t noref)
  101. {
  102. assert (type < CAP_TYPE_MAX);
  103. base->type = type;
  104. sref_counter_init (&base->sref, 1, NULL, noref);
  105. }
  106. static void
  107. cap_task_fini (struct sref_counter *sref)
  108. {
  109. _Auto tp = CAP_FROM_SREF (sref, struct cap_task);
  110. task_unref (tp->task);
  111. kmem_cache_free (&cap_misc_cache, tp);
  112. }
  113. int
  114. cap_task_create (struct cap_task **outp, struct task *task)
  115. {
  116. struct cap_task *ret = kmem_cache_alloc (&cap_misc_cache);
  117. if (! ret)
  118. return (ENOMEM);
  119. cap_base_init (&ret->base, CAP_TYPE_TASK, cap_task_fini);
  120. task_ref (task);
  121. ret->task = task;
  122. *outp = ret;
  123. return (0);
  124. }
  125. static void
  126. cap_thread_fini (struct sref_counter *sref)
  127. {
  128. _Auto tp = CAP_FROM_SREF (sref, struct cap_thread);
  129. thread_unref (tp->thread);
  130. kmem_cache_free (&cap_misc_cache, tp);
  131. }
  132. int
  133. cap_thread_create (struct cap_thread **outp, struct thread *thread)
  134. {
  135. struct cap_thread *ret = kmem_cache_alloc (&cap_misc_cache);
  136. if (! ret)
  137. return (ENOMEM);
  138. cap_base_init (&ret->base, CAP_TYPE_THREAD, cap_thread_fini);
  139. thread_ref (thread);
  140. ret->thread = thread;
  141. *outp = ret;
  142. return (0);
  143. }
  144. static void cap_recv_wakeup_fast (struct cap_flow *);
  145. static void
  146. cap_channel_fini (struct sref_counter *sref)
  147. {
  148. _Auto chp = CAP_FROM_SREF (sref, struct cap_channel);
  149. _Auto flow = chp->flow;
  150. if (! flow)
  151. {
  152. kmem_cache_free (&cap_misc_cache, chp);
  153. return;
  154. }
  155. uintptr_t tag = chp->tag;
  156. // Mutate the type.
  157. struct cap_alert *alert = (void *)chp;
  158. alert->k_alert.type = cap_alert_type(alert) = CAP_ALERT_CHAN_CLOSED;
  159. alert->k_alert.tag = tag;
  160. pqueue_node_init (&alert->pnode, CAP_ALERT_CHANNEL_PRIO);
  161. hlist_node_init (&alert->hnode);
  162. spinlock_lock (&flow->lock);
  163. hlist_insert_head (&flow->alloc_alerts, &alert->hnode);
  164. pqueue_insert (&flow->pending_alerts, &alert->pnode);
  165. cap_recv_wakeup_fast (flow);
  166. spinlock_unlock (&flow->lock);
  167. cap_base_rel (flow);
  168. }
  169. int
  170. cap_channel_create (struct cap_channel **outp, struct cap_flow *flow,
  171. uintptr_t tag)
  172. {
  173. struct cap_channel *ret = kmem_cache_alloc (&cap_misc_cache);
  174. if (! ret)
  175. return (ENOMEM);
  176. cap_base_init (&ret->base, CAP_TYPE_CHANNEL, cap_channel_fini);
  177. if (flow)
  178. cap_base_acq (flow);
  179. ret->flow = flow;
  180. ret->tag = tag;
  181. *outp = ret;
  182. return (0);
  183. }
  184. static void cap_intr_rem (uint32_t irq, struct list *link);
  185. static void
  186. cap_task_thread_rem (int id, int type, struct list *link)
  187. {
  188. _Auto kuid = kuid_find (id, type == CAP_ALERT_THREAD_DIED ?
  189. KUID_THREAD : KUID_TASK);
  190. #define cap_unlink_alert(obj, type, unref) \
  191. do \
  192. { \
  193. _Auto ptr = structof (obj, type, kuid); \
  194. spinlock_lock (&ptr->dead_subs.lock); \
  195. list_remove (link); \
  196. spinlock_unlock (&ptr->dead_subs.lock); \
  197. unref (ptr); \
  198. } \
  199. while (0)
  200. if (! kuid)
  201. return;
  202. else if (type == CAP_ALERT_THREAD_DIED)
  203. cap_unlink_alert (kuid, struct thread, thread_unref);
  204. else
  205. cap_unlink_alert (kuid, struct task, task_unref);
  206. #undef cap_unlink_alert
  207. }
  208. static void
  209. cap_alert_free (struct cap_alert *alert)
  210. {
  211. _Auto async = (struct cap_alert_async *)alert;
  212. _Auto k_alert = &alert->k_alert;
  213. int type = cap_alert_type (alert);
  214. if (type == CAP_ALERT_INTR)
  215. cap_intr_rem (k_alert->intr.irq, &async->xlink);
  216. else if (type == CAP_ALERT_THREAD_DIED || type == CAP_ALERT_TASK_DIED)
  217. cap_task_thread_rem (k_alert->any_id, type, &async->xlink);
  218. kmem_cache_free (&cap_misc_cache, alert);
  219. }
  220. static void
  221. cap_port_entry_fini (struct cap_port_entry *port)
  222. {
  223. task_unref (port->task);
  224. kmem_cache_free (&cap_port_cache, port);
  225. }
  226. static void
  227. cap_flow_fini (struct sref_counter *sref)
  228. {
  229. _Auto flow = CAP_FROM_SREF (sref, struct cap_flow);
  230. struct cap_alert *alert, *tmp;
  231. pqueue_for_each_entry_safe (&flow->pending_alerts, alert, tmp, pnode)
  232. if (cap_alert_type (alert) == CAP_ALERT_USER)
  233. kmem_free (alert, sizeof (*alert));
  234. hlist_for_each_entry_safe (&flow->alloc_alerts, alert, tmp, hnode)
  235. cap_alert_free (alert);
  236. struct cap_port_entry *port, *pt;
  237. slist_for_each_entry_safe (&flow->lpads, port, pt, snode)
  238. cap_port_entry_fini (port);
  239. kmem_cache_free (&cap_flow_cache, flow);
  240. }
  241. int
  242. cap_flow_create (struct cap_flow **outp, uint32_t flags,
  243. uintptr_t tag, uintptr_t entry)
  244. {
  245. struct cap_flow *ret = kmem_cache_alloc (&cap_flow_cache);
  246. if (! ret)
  247. return (ENOMEM);
  248. cap_base_init (&ret->base, CAP_TYPE_FLOW, cap_flow_fini);
  249. spinlock_init (&ret->lock);
  250. list_init (&ret->waiters);
  251. list_init (&ret->receivers);
  252. slist_init (&ret->lpads);
  253. hlist_init (&ret->alloc_alerts);
  254. pqueue_init (&ret->pending_alerts);
  255. ret->flags = flags;
  256. ret->tag = tag;
  257. ret->entry = entry;
  258. *outp = ret;
  259. return (0);
  260. }
  261. /*
  262. * Attempt to set the tag to a new value. The only valid transition is
  263. * from zero to any value.
  264. */
  265. static int
  266. cap_cas_tag (uintptr_t *tagp, uintptr_t value)
  267. {
  268. while (1)
  269. {
  270. uintptr_t tmp = atomic_load_rlx (tagp);
  271. if (tmp != 0)
  272. return (EEXIST);
  273. else if (atomic_cas_bool_rlx (tagp, tmp, value))
  274. return (0);
  275. cpu_pause ();
  276. }
  277. }
  278. int
  279. (cap_set_tag) (struct cap_base *cap, uintptr_t tag)
  280. {
  281. if (! tag)
  282. return (EINVAL);
  283. else if (cap->type == CAP_TYPE_CHANNEL)
  284. return (cap_cas_tag (&((struct cap_channel *)cap)->tag, tag));
  285. else if (cap->type == CAP_TYPE_FLOW)
  286. return (cap_cas_tag (&((struct cap_flow *)cap)->tag, tag));
  287. return (EINVAL);
  288. }
  289. int
  290. (cap_get_tag) (const struct cap_base *cap, uintptr_t *tagp)
  291. {
  292. if (cap->type == CAP_TYPE_CHANNEL)
  293. *tagp = ((const struct cap_channel *)cap)->tag;
  294. else if (cap->type == CAP_TYPE_FLOW)
  295. *tagp = ((const struct cap_flow *)cap)->tag;
  296. else
  297. return (EINVAL);
  298. return (0);
  299. }
  300. int
  301. cap_channel_link (struct cap_channel *channel, struct cap_flow *flow)
  302. {
  303. while (1)
  304. {
  305. _Auto prev = atomic_load_rlx (&channel->flow);
  306. if (prev && flow)
  307. return (EAGAIN);
  308. else if (atomic_cas_bool_acq (&channel->flow, prev, flow))
  309. return (0);
  310. cpu_pause ();
  311. }
  312. }
  313. int
  314. cap_flow_hook (struct cap_channel **outp, struct task *task, int capx)
  315. {
  316. struct cap_base *base = cspace_get (&task->caps, capx);
  317. if (! base)
  318. return (EBADF);
  319. else if (base->type != CAP_TYPE_FLOW)
  320. {
  321. cap_base_rel (base);
  322. return (EINVAL);
  323. }
  324. _Auto flow = (struct cap_flow *)base;
  325. int ret = cap_channel_create (outp, flow, flow->tag);
  326. cap_base_rel (flow);
  327. return (ret);
  328. }
  329. /*
  330. * Transfer all 3 iterators between a local and a remote task.
  331. * Updates the metadata if succesful. Returns the number of
  332. * raw bytes transmitted on success; a negative errno value on failure.
  333. */
  334. static ssize_t
  335. cap_transfer_iters (struct task *task, struct cap_iters *r_it,
  336. struct cap_iters *l_it, int dir, ssize_t *bytesp)
  337. {
  338. ssize_t ret = ipc_iov_iter_copy (task, &r_it->iov, &l_it->iov, dir);
  339. if (ret < 0)
  340. return (ret);
  341. *bytesp += ret;
  342. if (ipc_cap_iter_size (&r_it->cap) && ipc_cap_iter_size (&l_it->cap))
  343. {
  344. int nr_caps = ipc_cap_iter_copy (task, &r_it->cap, &l_it->cap, dir);
  345. if (nr_caps < 0)
  346. return (nr_caps);
  347. *(uint32_t *)((char *)bytesp + CAP_CAPS_OFF) += nr_caps;
  348. }
  349. if (ipc_vme_iter_size (&r_it->vme) && ipc_vme_iter_size (&l_it->vme))
  350. {
  351. int nr_vmes = ipc_vme_iter_copy (task, &r_it->vme, &l_it->vme, dir);
  352. if (nr_vmes < 0)
  353. return (nr_vmes);
  354. *(uint32_t *)((char *)bytesp + CAP_VMES_OFF) += nr_vmes;
  355. }
  356. return (ret);
  357. }
  358. static struct cap_alert*
  359. cap_flow_alloc_alert (struct cap_flow *flow, uint32_t flg)
  360. {
  361. spinlock_unlock (&flow->lock);
  362. void *ptr = kmem_cache_alloc2 (&cap_misc_cache, (flg & CAP_ALERT_NONBLOCK) ?
  363. 0 : KMEM_ALLOC_SLEEP);
  364. spinlock_lock (&flow->lock);
  365. return (ptr);
  366. }
  367. static void
  368. cap_receiver_add (struct cap_flow *flow, struct cap_receiver *recv, void *buf)
  369. {
  370. recv->thread = thread_self ();
  371. recv->buf = buf;
  372. recv->spurious = false;
  373. memset (&recv->mdata, 0, sizeof (recv->mdata));
  374. list_insert_tail (&flow->receivers, &recv->lnode);
  375. }
  376. static void
  377. cap_recv_wakeup_fast (struct cap_flow *flow)
  378. {
  379. if (list_empty (&flow->receivers))
  380. return;
  381. _Auto recv = list_pop (&flow->receivers, struct cap_receiver, lnode);
  382. recv->spurious = true;
  383. thread_wakeup (recv->thread);
  384. }
  385. static struct cap_alert*
  386. cap_recv_pop_alert (struct cap_flow *flow, void *buf, uint32_t flags,
  387. struct ipc_msg_data *mdata, int *outp)
  388. {
  389. if (!pqueue_empty (&flow->pending_alerts))
  390. return (pqueue_pop_entry (&flow->pending_alerts, struct cap_alert, pnode));
  391. else if (flags & CAP_ALERT_NONBLOCK)
  392. {
  393. spinlock_unlock (&flow->lock);
  394. *outp = EAGAIN;
  395. return (NULL);
  396. }
  397. struct cap_receiver recv;
  398. cap_receiver_add (flow, &recv, buf);
  399. do
  400. thread_sleep (&flow->lock, flow, "flow-alert");
  401. while (pqueue_empty (&flow->pending_alerts));
  402. if (recv.spurious)
  403. return (pqueue_pop_entry (&flow->pending_alerts, struct cap_alert, pnode));
  404. spinlock_unlock (&flow->lock);
  405. if (recv.mdata.bytes_recv >= 0 && mdata)
  406. {
  407. recv.mdata.bytes_recv = CAP_ALERT_SIZE;
  408. user_copy_to (mdata, &recv.mdata, sizeof (*mdata));
  409. }
  410. *outp = recv.mdata.bytes_recv >= 0 ? 0 : (int)-recv.mdata.bytes_recv;
  411. return (NULL);
  412. }
  413. int
  414. cap_recv_alert (struct cap_flow *flow, void *buf,
  415. uint32_t flags, struct ipc_msg_data *mdata)
  416. {
  417. uint32_t ids[2] = { 0, 0 };
  418. uintptr_t tag = 0;
  419. spinlock_lock (&flow->lock);
  420. int error;
  421. _Auto entry = cap_recv_pop_alert (flow, buf, flags, mdata, &error);
  422. if (! entry)
  423. return (error);
  424. void *payload = entry->payload;
  425. int type = cap_alert_type (entry);
  426. if (type == CAP_ALERT_INTR)
  427. { // Copy into a temp buffer so we may reset the counter.
  428. payload = alloca (sizeof (entry->k_alert));
  429. *(struct cap_kern_alert *)payload = entry->k_alert;
  430. entry->k_alert.intr.count = 0;
  431. }
  432. else if (type != CAP_ALERT_USER)
  433. hlist_remove (&entry->hnode);
  434. else
  435. {
  436. ids[0] = entry->task_id;
  437. ids[1] = entry->thread_id;
  438. tag = entry->tag;
  439. }
  440. pqueue_inc (&flow->pending_alerts, 1);
  441. spinlock_unlock (&flow->lock);
  442. if (unlikely (user_copy_to (buf, payload, CAP_ALERT_SIZE) != 0))
  443. {
  444. SPINLOCK_GUARD (&flow->lock);
  445. pqueue_insert (&flow->pending_alerts, &entry->pnode);
  446. if (type == CAP_ALERT_INTR)
  447. entry->k_alert.intr.count +=
  448. ((struct cap_kern_alert *)payload)->intr.count;
  449. else if (type != CAP_ALERT_USER)
  450. hlist_insert_head (&flow->alloc_alerts, &entry->hnode);
  451. cap_recv_wakeup_fast (flow);
  452. return (EFAULT);
  453. }
  454. else if (mdata)
  455. {
  456. struct ipc_msg_data tmp;
  457. memset (&tmp, 0, sizeof (tmp));
  458. tmp.bytes_recv = CAP_ALERT_SIZE;
  459. tmp.tag = tag;
  460. tmp.task_id = ids[0], tmp.thread_id = ids[1];
  461. user_copy_to (mdata, &tmp, sizeof (tmp));
  462. }
  463. return (0);
  464. }
  465. static void
  466. cap_fill_ids (int *thr_idp, int *task_idp, struct thread *thr)
  467. {
  468. *thr_idp = thread_id (thr);
  469. *task_idp = task_id (thr->task);
  470. }
  471. int
  472. (cap_send_alert) (struct cap_base *cap, const void *buf,
  473. uint32_t flags, uint32_t prio)
  474. {
  475. struct cap_flow *flow;
  476. uintptr_t tag;
  477. if (cap->type == CAP_TYPE_CHANNEL)
  478. {
  479. flow = ((struct cap_channel *)cap)->flow;
  480. if (! flow)
  481. return (EINVAL);
  482. tag = ((struct cap_channel *)cap)->tag;
  483. }
  484. else if (cap->type == CAP_TYPE_FLOW)
  485. {
  486. flow = (struct cap_flow *)cap;
  487. tag = flow->tag;
  488. }
  489. else
  490. return (EBADF);
  491. /*
  492. * Copy into a temporary buffer, since the code below may otherwise
  493. * generate a page fault while holding a spinlock.
  494. */
  495. char abuf[CAP_ALERT_SIZE] = { 0 };
  496. if (user_copy_from (abuf, buf, CAP_ALERT_SIZE) != 0)
  497. return (EFAULT);
  498. struct cap_receiver *recv;
  499. {
  500. SPINLOCK_GUARD (&flow->lock);
  501. if (list_empty (&flow->receivers))
  502. {
  503. _Auto alert = cap_flow_alloc_alert (flow, flags);
  504. if (! alert)
  505. return (ENOMEM);
  506. memcpy (alert->payload, abuf, CAP_ALERT_SIZE);
  507. pqueue_node_init (&alert->pnode, prio);
  508. pqueue_insert (&flow->pending_alerts, &alert->pnode);
  509. cap_alert_type(alert) = CAP_ALERT_USER;
  510. cap_fill_ids (&alert->thread_id, &alert->task_id, thread_self ());
  511. alert->tag = tag;
  512. return (0);
  513. }
  514. recv = list_pop (&flow->receivers, typeof (*recv), lnode);
  515. }
  516. cap_fill_ids (&recv->mdata.thread_id, &recv->mdata.task_id, thread_self ());
  517. recv->mdata.tag = tag;
  518. ssize_t rv = ipc_bcopy (recv->thread->task, recv->buf, sizeof (abuf),
  519. abuf, sizeof (abuf), IPC_COPY_TO);
  520. thread_wakeup (recv->thread);
  521. recv->mdata.bytes_recv = rv;
  522. return (rv < 0 ? (int)-rv : 0);
  523. }
  524. static void
  525. cap_sender_add (struct cap_flow *flow, struct cap_sender *sender,
  526. struct thread *thread)
  527. {
  528. sender->thread = thread;
  529. list_insert_tail (&flow->waiters, &sender->lnode);
  530. }
  531. static void
  532. cap_task_swap (struct task **taskp, struct thread *self)
  533. {
  534. cpu_flags_t flags;
  535. thread_preempt_disable_intr_save (&flags);
  536. struct task *xtask = self->xtask;
  537. self->xtask = *taskp;
  538. *taskp = xtask;
  539. pmap_load (self->xtask->map->pmap);
  540. thread_preempt_enable_intr_restore (flags);
  541. }
  542. static void
  543. cap_ipc_msg_data_init (struct ipc_msg_data *data, uintptr_t tag)
  544. {
  545. data->size = sizeof (*data);
  546. data->tag = tag;
  547. data->bytes_recv = data->bytes_sent = 0;
  548. data->flags = 0;
  549. data->vmes_sent = data->caps_sent = 0;
  550. data->vmes_recv = data->caps_recv = 0;
  551. }
  552. static void
  553. cap_iters_copy (struct cap_iters *dst, const struct cap_iters *src)
  554. {
  555. #define cap_copy_simple(d, s, type) \
  556. d->type.begin = s->type.begin; \
  557. d->type.cur = s->type.cur; \
  558. d->type.end = s->type.end
  559. memcpy (dst->iov.cache, src->iov.cache,
  560. (src->iov.cache_idx - IPC_IOV_ITER_CACHE_SIZE) *
  561. sizeof (struct iovec));
  562. dst->iov.cache_idx = src->iov.cache_idx;
  563. dst->iov.head = src->iov.head;
  564. cap_copy_simple (dst, src, iov);
  565. cap_copy_simple (dst, src, cap);
  566. cap_copy_simple (dst, src, vme);
  567. #undef copy_simple
  568. }
  569. static void
  570. cap_flow_push_port (struct cap_flow *flow, struct cap_port_entry *port)
  571. {
  572. SPINLOCK_GUARD (&flow->lock);
  573. slist_insert_head (&flow->lpads, &port->snode);
  574. if (list_empty (&flow->waiters))
  575. return;
  576. _Auto sender = list_first_entry (&flow->waiters, struct cap_sender, lnode);
  577. thread_wakeup (sender->thread);
  578. }
  579. static struct cap_port_entry*
  580. cap_pop_port (struct cap_flow *flow, struct thread *self)
  581. {
  582. SPINLOCK_GUARD (&flow->lock);
  583. if (slist_empty (&flow->lpads))
  584. {
  585. struct cap_sender sender;
  586. cap_sender_add (flow, &sender, self);
  587. do
  588. thread_sleep (&flow->lock, flow, "flow-sender");
  589. while (slist_empty (&flow->lpads));
  590. list_remove (&sender.lnode);
  591. }
  592. _Auto port = slist_first_entry (&flow->lpads, struct cap_port_entry, snode);
  593. slist_remove (&flow->lpads, NULL);
  594. return (port);
  595. }
  596. static ssize_t
  597. cap_sender_impl (struct cap_flow *flow, uintptr_t tag, struct cap_iters *in,
  598. struct cap_iters *out, struct ipc_msg_data *data)
  599. {
  600. struct thread *self = thread_self ();
  601. _Auto port = cap_pop_port (flow, self);
  602. cap_ipc_msg_data_init (&port->mdata, tag);
  603. ssize_t nb = cap_transfer_iters (port->task, &port->in_it, in,
  604. IPC_COPY_TO, &port->mdata.bytes_recv);
  605. if (nb < 0)
  606. port->mdata.bytes_recv = nb;
  607. cap_iters_copy (&port->in_it, in);
  608. port->out_it = out;
  609. struct cap_port_entry *cur_port = self->cur_port;
  610. self->cur_port = port;
  611. cap_fill_ids (&port->mdata.thread_id, &port->mdata.task_id, self);
  612. // Switch task (also sets the pmap).
  613. cap_task_swap (&port->task, self);
  614. user_copy_to ((void *)port->ctx[2], &port->mdata, sizeof (port->mdata));
  615. // Jump to new PC and SP.
  616. ssize_t ret = cpu_port_swap (port->ctx, cur_port, (void *)flow->entry);
  617. // We're back.
  618. if (data && user_copy_to (data, &port->mdata, sizeof (*data)) != 0)
  619. ret = -EFAULT;
  620. cap_flow_push_port (flow, port);
  621. self->cur_port = cur_port;
  622. return (ret);
  623. }
  624. ssize_t
  625. cap_send_iters (struct cap_base *cap, struct cap_iters *in,
  626. struct cap_iters *out, struct ipc_msg_data *data)
  627. {
  628. struct cap_flow *flow;
  629. uintptr_t tag;
  630. struct ipc_msg_data mdata;
  631. if (! cap)
  632. return (-EBADF);
  633. switch (cap->type)
  634. {
  635. case CAP_TYPE_FLOW:
  636. flow = (struct cap_flow *)cap;
  637. tag = flow->tag;
  638. break;
  639. case CAP_TYPE_CHANNEL:
  640. flow = ((struct cap_channel *)cap)->flow;
  641. if (! flow)
  642. return (-EINVAL);
  643. tag = ((struct cap_channel *)cap)->tag;
  644. break;
  645. case CAP_TYPE_THREAD:
  646. return (thread_handle_msg (((struct cap_thread *)cap)->thread,
  647. in, out, &mdata));
  648. case CAP_TYPE_TASK:
  649. return (task_handle_msg (((struct cap_task *)cap)->task,
  650. in, out, &mdata));
  651. case CAP_TYPE_KERNEL:
  652. // TODO: Implement.
  653. default:
  654. return (-EINVAL);
  655. }
  656. return (cap_sender_impl (flow, tag, in, out, data));
  657. }
  658. ssize_t
  659. cap_pull_iters (struct cap_iters *it, struct ipc_msg_data *mdata)
  660. {
  661. struct cap_port_entry *port = thread_self()->cur_port;
  662. if (! port)
  663. return (-EINVAL);
  664. struct ipc_msg_data tmp;
  665. cap_ipc_msg_data_init (&tmp, port->mdata.tag);
  666. ssize_t ret = cap_transfer_iters (port->task, &port->in_it, it,
  667. IPC_COPY_FROM, &tmp.bytes_recv);
  668. if (ret > 0)
  669. port->mdata.bytes_recv += ret;
  670. port->mdata.vmes_recv += tmp.vmes_recv;
  671. port->mdata.caps_recv += tmp.caps_recv;
  672. if (mdata)
  673. user_copy_to (mdata, &tmp, sizeof (tmp));
  674. return (ret);
  675. }
  676. ssize_t
  677. cap_push_iters (struct cap_iters *it, struct ipc_msg_data *mdata)
  678. {
  679. struct cap_port_entry *port = thread_self()->cur_port;
  680. if (! port)
  681. return (-EINVAL);
  682. struct ipc_msg_data tmp;
  683. cap_ipc_msg_data_init (&tmp, port->mdata.tag);
  684. ssize_t ret = cap_transfer_iters (port->task, port->out_it, it,
  685. IPC_COPY_TO, &tmp.bytes_sent);
  686. if (ret > 0)
  687. port->mdata.bytes_sent += ret;
  688. port->mdata.vmes_sent += tmp.vmes_sent;
  689. port->mdata.caps_sent += tmp.caps_sent;
  690. if (mdata)
  691. user_copy_to (mdata, &tmp, sizeof (tmp));
  692. return (ret);
  693. }
  694. static void
  695. cap_mdata_swap (struct ipc_msg_data *mdata)
  696. {
  697. SWAP (&mdata->bytes_sent, &mdata->bytes_recv);
  698. SWAP (&mdata->caps_sent, &mdata->caps_recv);
  699. SWAP (&mdata->vmes_sent, &mdata->vmes_recv);
  700. }
  701. ssize_t
  702. cap_reply_iters (struct cap_iters *it, int rv)
  703. {
  704. struct thread *self = thread_self ();
  705. struct cap_port_entry *port = self->cur_port;
  706. ssize_t ret;
  707. if (! port)
  708. return (-EINVAL);
  709. else if (rv >= 0)
  710. {
  711. struct ipc_msg_data tmp;
  712. tmp.bytes_sent = (tmp.caps_sent = tmp.vmes_sent = 0);
  713. ret = cap_transfer_iters (port->task, port->out_it, it,
  714. IPC_COPY_TO, &tmp.bytes_sent);
  715. if (ret > 0)
  716. {
  717. port->mdata.bytes_sent += ret;
  718. ret = port->mdata.bytes_sent;
  719. }
  720. port->mdata.caps_sent += tmp.caps_sent;
  721. port->mdata.vmes_sent += tmp.vmes_sent;
  722. cap_mdata_swap (&port->mdata);
  723. if (!ipc_iov_iter_empty (&it->iov) ||
  724. ipc_vme_iter_size (&it->vme) ||
  725. ipc_cap_iter_size (&it->cap))
  726. port->mdata.flags |= IPC_MSG_TRUNC;
  727. }
  728. else
  729. ret = rv;
  730. cap_task_swap (&port->task, self);
  731. cpu_port_return (port->ctx[0], ret);
  732. __builtin_unreachable ();
  733. }
  734. int
  735. cap_flow_add_port (struct cap_flow *flow, void *stack, size_t size,
  736. struct ipc_msg *msg, struct ipc_msg_data *mdata,
  737. struct cap_thread_info *info __unused)
  738. {
  739. struct cap_port_entry *entry = kmem_cache_alloc (&cap_port_cache);
  740. if (! entry)
  741. return (ENOMEM);
  742. entry->size = size;
  743. entry->ctx[0] = (uintptr_t)stack;
  744. entry->ctx[1] = (uintptr_t)msg;
  745. entry->ctx[2] = (uintptr_t)mdata;
  746. memset (&entry->mdata, 0, sizeof (entry->mdata));
  747. cap_iters_init_msg (&entry->in_it, msg);
  748. task_ref (entry->task = task_self ());
  749. cap_flow_push_port (flow, entry);
  750. return (0);
  751. }
  752. int
  753. cap_flow_rem_port (struct cap_flow *flow, uintptr_t stack)
  754. {
  755. spinlock_lock (&flow->lock);
  756. struct cap_port_entry *entry;
  757. struct slist_node *prev = NULL;
  758. slist_for_each_entry (&flow->lpads, entry, snode)
  759. {
  760. if (entry->task == task_self () &&
  761. (stack == ~(uintptr_t)0 || stack == entry->ctx[0]))
  762. break;
  763. prev = &entry->snode;
  764. }
  765. if (! entry)
  766. {
  767. spinlock_unlock (&flow->lock);
  768. return (ESRCH);
  769. }
  770. slist_remove (&flow->lpads, prev);
  771. spinlock_unlock (&flow->lock);
  772. // Unmap the stack if the user didn't specify one.
  773. int error = stack != ~(uintptr_t)0 ? 0 :
  774. vm_map_remove (vm_map_self (), entry->ctx[0], entry->size);
  775. if (! error)
  776. kmem_free (entry, sizeof (*entry));
  777. else
  778. cap_flow_push_port (flow, entry);
  779. return (error);
  780. }
  781. static int
  782. cap_handle_intr (void *arg)
  783. {
  784. struct list *list = arg;
  785. assert (list >= &cap_intr_handlers[0] &&
  786. list <= &cap_intr_handlers[ARRAY_SIZE (cap_intr_handlers) - 1]);
  787. RCU_GUARD ();
  788. list_rcu_for_each (list, tmp)
  789. {
  790. _Auto alert = list_entry (tmp, struct cap_alert_async, xlink);
  791. SPINLOCK_GUARD (&alert->flow->lock);
  792. if (++alert->base.k_alert.intr.count == 1)
  793. {
  794. pqueue_insert (&alert->flow->pending_alerts, &alert->base.pnode);
  795. cap_recv_wakeup_fast (alert->flow);
  796. }
  797. }
  798. return (EAGAIN);
  799. }
  800. static int
  801. cap_intr_add (uint32_t intr, struct list *node)
  802. {
  803. assert (intr >= CPU_EXC_INTR_FIRST &&
  804. intr - CPU_EXC_INTR_FIRST < ARRAY_SIZE (cap_intr_handlers));
  805. ADAPTIVE_LOCK_GUARD (&cap_intr_lock);
  806. struct list *list = &cap_intr_handlers[intr - CPU_EXC_INTR_FIRST];
  807. if (list_empty (list))
  808. {
  809. CPU_INTR_GUARD ();
  810. int error = intr_register (intr, cap_handle_intr, list);
  811. if (error)
  812. return (error);
  813. list_rcu_insert_head (list, node);
  814. return (0);
  815. }
  816. list_rcu_insert_head (list, node);
  817. return (0);
  818. }
  819. static void
  820. cap_intr_rem (uint32_t intr, struct list *node)
  821. {
  822. adaptive_lock_acquire (&cap_intr_lock);
  823. list_rcu_remove (node);
  824. if (list_empty (&cap_intr_handlers[intr - CPU_EXC_INTR_FIRST]))
  825. intr_unregister (intr, cap_handle_intr);
  826. adaptive_lock_release (&cap_intr_lock);
  827. rcu_wait ();
  828. }
  829. static struct cap_alert_async*
  830. cap_alert_async_find (struct cap_flow *flow, int type, int id)
  831. {
  832. struct cap_alert *tmp;
  833. hlist_for_each_entry (&flow->alloc_alerts, tmp, hnode)
  834. if (cap_alert_type (tmp) == type && tmp->k_alert.any_id == id)
  835. return ((void *)tmp);
  836. return (NULL);
  837. }
  838. int
  839. cap_intr_register (struct cap_flow *flow, uint32_t irq)
  840. {
  841. if (irq < CPU_EXC_INTR_FIRST || irq > CPU_EXC_INTR_LAST)
  842. return (EINVAL);
  843. struct cap_alert_async *ap = kmem_cache_alloc (&cap_misc_cache);
  844. if (! ap)
  845. return (ENOMEM);
  846. pqueue_node_init (&ap->base.pnode, CAP_ALERT_INTR_PRIO);
  847. cap_alert_type(&ap->base) = CAP_ALERT_INTR;
  848. hlist_node_init (&ap->base.hnode);
  849. list_node_init (&ap->xlink);
  850. ap->flow = flow;
  851. ap->base.k_alert = (struct cap_kern_alert)
  852. {
  853. .type = CAP_ALERT_INTR,
  854. .intr = { .irq = irq, .count = 0 }
  855. };
  856. int error = cap_intr_add (irq, &ap->xlink);
  857. if (error)
  858. {
  859. kmem_cache_free (&cap_misc_cache, ap);
  860. return (error);
  861. }
  862. spinlock_lock (&flow->lock);
  863. if (unlikely (cap_alert_async_find (flow, CAP_ALERT_INTR, irq)))
  864. {
  865. spinlock_unlock (&flow->lock);
  866. cap_intr_rem (irq, &ap->xlink);
  867. kmem_cache_free (&cap_misc_cache, ap);
  868. return (EALREADY);
  869. }
  870. hlist_insert_head (&flow->alloc_alerts, &ap->base.hnode);
  871. spinlock_unlock (&flow->lock);
  872. return (0);
  873. }
  874. static int
  875. cap_unregister_impl (struct cap_flow *flow, int type,
  876. uint32_t id, struct cap_alert_async **outp)
  877. {
  878. SPINLOCK_GUARD (&flow->lock);
  879. _Auto entry = cap_alert_async_find (flow, type, id);
  880. if (! entry)
  881. return (ESRCH);
  882. hlist_remove (&entry->base.hnode);
  883. if (!pqueue_node_unlinked (&entry->base.pnode))
  884. pqueue_remove (&flow->pending_alerts, &entry->base.pnode);
  885. *outp = entry;
  886. return (0);
  887. }
  888. int
  889. cap_intr_unregister (struct cap_flow *flow, uint32_t irq)
  890. {
  891. CPU_INTR_GUARD ();
  892. struct cap_alert_async *entry;
  893. int error = cap_unregister_impl (flow, CAP_ALERT_INTR, irq, &entry);
  894. if (! error)
  895. cap_intr_rem (irq, &entry->xlink);
  896. return (error);
  897. }
  898. static int
  899. cap_register_task_thread (struct cap_flow *flow, struct kuid_head *kuid,
  900. uint32_t prio, int type, struct bulletin *outp)
  901. {
  902. struct cap_alert_async *ap = kmem_cache_alloc (&cap_misc_cache);
  903. if (! ap)
  904. return (ENOMEM);
  905. pqueue_node_init (&ap->base.pnode, prio);
  906. cap_alert_type(&ap->base) = type;
  907. hlist_node_init (&ap->base.hnode);
  908. list_node_init (&ap->xlink);
  909. ap->flow = flow;
  910. ap->base.k_alert = (struct cap_kern_alert)
  911. {
  912. .type = type,
  913. .any_id = kuid->id
  914. };
  915. spinlock_lock (&flow->lock);
  916. if (unlikely (cap_alert_async_find (flow, type, kuid->id)))
  917. {
  918. spinlock_unlock (&flow->lock);
  919. kmem_cache_free (&cap_misc_cache, ap);
  920. return (EALREADY);
  921. }
  922. hlist_insert_head (&flow->alloc_alerts, &ap->base.hnode);
  923. spinlock_lock (&outp->lock);
  924. list_insert_tail (&outp->subs, &ap->xlink);
  925. if (atomic_load_rlx (&kuid->nr_refs) == 1)
  926. {
  927. pqueue_insert (&flow->pending_alerts, &ap->base.pnode);
  928. cap_recv_wakeup_fast (flow);
  929. }
  930. spinlock_unlock (&outp->lock);
  931. spinlock_unlock (&flow->lock);
  932. return (0);
  933. }
  934. static int
  935. cap_task_thread_unregister (struct cap_flow *flow, int type,
  936. int tid, struct bulletin *outp)
  937. {
  938. struct cap_alert_async *entry;
  939. int error = cap_unregister_impl (flow, type, tid, &entry);
  940. if (! error)
  941. {
  942. SPINLOCK_GUARD (&outp->lock);
  943. list_remove (&entry->xlink);
  944. }
  945. return (error);
  946. }
  947. int
  948. cap_thread_register (struct cap_flow *flow, struct thread *thr)
  949. {
  950. if (! thr)
  951. return (EINVAL);
  952. return (cap_register_task_thread (flow, &thr->kuid, CAP_ALERT_THREAD_PRIO,
  953. CAP_ALERT_THREAD_DIED, &thr->dead_subs));
  954. }
  955. int
  956. cap_task_register (struct cap_flow *flow, struct task *task)
  957. {
  958. if (! task)
  959. return (EINVAL);
  960. return (cap_register_task_thread (flow, &task->kuid, CAP_ALERT_TASK_PRIO,
  961. CAP_ALERT_TASK_DIED, &task->dead_subs));
  962. }
  963. int
  964. cap_thread_unregister (struct cap_flow *flow, struct thread *thr)
  965. {
  966. if (! thr)
  967. return (EINVAL);
  968. return (cap_task_thread_unregister (flow, CAP_ALERT_THREAD_DIED,
  969. thread_id (thr), &thr->dead_subs));
  970. }
  971. int
  972. cap_task_unregister (struct cap_flow *flow, struct task *task)
  973. {
  974. if (! task)
  975. return (EINVAL);
  976. return (cap_task_thread_unregister (flow, CAP_ALERT_TASK_DIED,
  977. task_id (task), &task->dead_subs));
  978. }
  979. void
  980. cap_notify_dead (struct bulletin *bulletin)
  981. {
  982. struct list dead_subs;
  983. spinlock_lock (&bulletin->lock);
  984. list_set_head (&dead_subs, &bulletin->subs);
  985. list_init (&bulletin->subs);
  986. spinlock_unlock (&bulletin->lock);
  987. struct cap_alert_async *ap;
  988. list_for_each_entry (&dead_subs, ap, xlink)
  989. {
  990. _Auto flow = ap->flow;
  991. SPINLOCK_GUARD (&flow->lock);
  992. if (!pqueue_node_unlinked (&ap->base.pnode))
  993. continue;
  994. pqueue_insert (&flow->pending_alerts, &ap->base.pnode);
  995. cap_recv_wakeup_fast (flow);
  996. }
  997. }
  998. int
  999. (cap_intern) (struct cap_base *cap, int flags)
  1000. {
  1001. return (cap ? cspace_add_free (cspace_self (), cap, flags) : -EINVAL);
  1002. }
  1003. static size_t
  1004. cap_get_max (const size_t *args, size_t n)
  1005. {
  1006. size_t ret = *args;
  1007. for (size_t i = 1; i < n; ++i)
  1008. if (args[i] > ret)
  1009. ret = args[i];
  1010. return (ret);
  1011. }
  1012. #define CAP_MAX(...) \
  1013. ({ \
  1014. const size_t args_[] = { __VA_ARGS__ }; \
  1015. cap_get_max (args_, ARRAY_SIZE (args_)); \
  1016. })
  1017. static int __init
  1018. cap_setup (void)
  1019. {
  1020. // Every capability type but flows are allocated from the same cache.
  1021. #define SZ(type) sizeof (struct cap_##type)
  1022. #define AL(type) alignof (struct cap_##type)
  1023. size_t size = CAP_MAX (SZ (task), SZ (thread), SZ (channel),
  1024. SZ (kernel), SZ (alert_async));
  1025. size_t alignment = CAP_MAX (AL (task), AL (thread), AL (channel),
  1026. AL (kernel), AL (alert_async));
  1027. kmem_cache_init (&cap_misc_cache, "cap_misc", size, alignment, NULL, 0);
  1028. kmem_cache_init (&cap_flow_cache, "cap_flow",
  1029. sizeof (struct cap_flow), 0, NULL, 0);
  1030. kmem_cache_init (&cap_port_cache, "cap_port",
  1031. sizeof (struct cap_port_entry), 0, NULL, 0);
  1032. adaptive_lock_init (&cap_intr_lock);
  1033. for (size_t i = 0; i < ARRAY_SIZE (cap_intr_handlers); ++i)
  1034. list_init (&cap_intr_handlers[i]);
  1035. return (0);
  1036. }
  1037. INIT_OP_DEFINE (cap_setup,
  1038. INIT_OP_DEP (intr_setup, true),
  1039. INIT_OP_DEP (kmem_setup, true));