capability.c 31 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252
  1. /*
  2. * Copyright (c) 2023 Agustina Arzille.
  3. *
  4. * This program is free software: you can redistribute it and/or modify
  5. * it under the terms of the GNU General Public License as published by
  6. * the Free Software Foundation, either version 3 of the License, or
  7. * (at your option) any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  16. *
  17. */
  18. #include <kern/capability.h>
  19. #include <kern/cspace.h>
  20. #include <kern/intr.h>
  21. #include <kern/kmem.h>
  22. #include <kern/rcu.h>
  23. #include <kern/thread.h>
  24. #include <kern/user.h>
  25. #include <machine/pmap.h>
  26. struct cap_alert
  27. {
  28. union
  29. {
  30. struct
  31. { // Valid for user alerts and when not pending.
  32. int task_id;
  33. int thread_id;
  34. uintptr_t tag;
  35. };
  36. struct hlist_node hnode;
  37. };
  38. struct pqueue_node pnode;
  39. union
  40. {
  41. char payload[CAP_ALERT_SIZE + 1];
  42. struct cap_kern_alert k_alert;
  43. };
  44. };
  45. #define CAP_F(name) __builtin_offsetof (struct ipc_msg_data, name)
  46. static_assert (CAP_F (caps_recv) - CAP_F (bytes_recv) ==
  47. CAP_F (caps_sent) - CAP_F (bytes_sent) &&
  48. CAP_F (vmes_recv) - CAP_F (bytes_recv) ==
  49. CAP_F (vmes_sent) - CAP_F (bytes_sent),
  50. "invalid layout for struct ipc_msg_data");
  51. #define CAP_VMES_OFF (CAP_F (vmes_recv) - CAP_F (bytes_recv))
  52. #define CAP_CAPS_OFF (CAP_F (caps_recv) - CAP_F (bytes_recv))
  53. struct cap_alert_async
  54. {
  55. struct cap_alert base;
  56. struct list xlink;
  57. struct cap_flow *flow;
  58. };
  59. #define cap_alert_type(alert) ((alert)->payload[CAP_ALERT_SIZE])
  60. struct cap_port
  61. {
  62. struct slist_node snode;
  63. struct task *task;
  64. size_t size;
  65. uintptr_t ctx[3]; // SP and function arguments.
  66. struct ipc_msg_data mdata;
  67. struct cap_iters in_it;
  68. struct cap_iters *out_it;
  69. };
  70. struct cap_receiver
  71. {
  72. struct list lnode;
  73. struct thread *thread;
  74. void *buf;
  75. struct ipc_msg_data mdata;
  76. bool spurious;
  77. };
  78. struct cap_sender
  79. {
  80. struct list lnode;
  81. struct thread *thread;
  82. };
  83. static struct kmem_cache cap_flow_cache;
  84. static struct kmem_cache cap_misc_cache;
  85. static struct kmem_cache cap_port_cache;
  86. static struct list cap_intr_handlers[CPU_INTR_TABLE_SIZE];
  87. static struct adaptive_lock cap_intr_lock;
  88. // Priorities for kernel-generated alerts.
  89. #define CAP_ALERT_TASK_PRIO ((THREAD_SCHED_RT_PRIO_MAX + 2) << 1)
  90. #define CAP_ALERT_THREAD_PRIO (CAP_ALERT_TASK_PRIO << 1)
  91. #define CAP_ALERT_INTR_PRIO (CAP_ALERT_THREAD_PRIO << 1)
  92. #define CAP_ALERT_CHANNEL_PRIO (1u)
  93. #define CAP_FROM_SREF(ptr, type) structof (ptr, type, base.sref)
  94. static void
  95. cap_base_init (struct cap_base *base, unsigned int type, sref_noref_fn_t noref)
  96. {
  97. assert (type < CAP_TYPE_MAX);
  98. base->type = type;
  99. sref_counter_init (&base->sref, 1, NULL, noref);
  100. }
  101. static void
  102. cap_task_fini (struct sref_counter *sref)
  103. {
  104. _Auto tp = CAP_FROM_SREF (sref, struct cap_task);
  105. task_unref (tp->task);
  106. kmem_cache_free (&cap_misc_cache, tp);
  107. }
  108. int
  109. cap_task_create (struct cap_task **outp, struct task *task)
  110. {
  111. struct cap_task *ret = kmem_cache_alloc (&cap_misc_cache);
  112. if (! ret)
  113. return (ENOMEM);
  114. cap_base_init (&ret->base, CAP_TYPE_TASK, cap_task_fini);
  115. task_ref (task);
  116. ret->task = task;
  117. *outp = ret;
  118. return (0);
  119. }
  120. static void
  121. cap_thread_fini (struct sref_counter *sref)
  122. {
  123. _Auto tp = CAP_FROM_SREF (sref, struct cap_thread);
  124. thread_unref (tp->thread);
  125. kmem_cache_free (&cap_misc_cache, tp);
  126. }
  127. int
  128. cap_thread_create (struct cap_thread **outp, struct thread *thread)
  129. {
  130. struct cap_thread *ret = kmem_cache_alloc (&cap_misc_cache);
  131. if (! ret)
  132. return (ENOMEM);
  133. cap_base_init (&ret->base, CAP_TYPE_THREAD, cap_thread_fini);
  134. thread_ref (thread);
  135. ret->thread = thread;
  136. *outp = ret;
  137. return (0);
  138. }
  139. static void cap_recv_wakeup_fast (struct cap_flow *);
  140. static void
  141. cap_channel_fini (struct sref_counter *sref)
  142. {
  143. _Auto chp = CAP_FROM_SREF (sref, struct cap_channel);
  144. _Auto flow = chp->flow;
  145. if (! flow)
  146. {
  147. kmem_cache_free (&cap_misc_cache, chp);
  148. return;
  149. }
  150. uintptr_t tag = chp->tag;
  151. // Mutate the type.
  152. struct cap_alert *alert = (void *)chp;
  153. alert->k_alert.type = cap_alert_type(alert) = CAP_ALERT_CHAN_CLOSED;
  154. alert->k_alert.tag = tag;
  155. pqueue_node_init (&alert->pnode, CAP_ALERT_CHANNEL_PRIO);
  156. hlist_node_init (&alert->hnode);
  157. spinlock_lock (&flow->lock);
  158. hlist_insert_head (&flow->alloc_alerts, &alert->hnode);
  159. pqueue_insert (&flow->pending_alerts, &alert->pnode);
  160. cap_recv_wakeup_fast (flow);
  161. spinlock_unlock (&flow->lock);
  162. cap_base_rel (flow);
  163. }
  164. int
  165. cap_channel_create (struct cap_channel **outp, struct cap_flow *flow,
  166. uintptr_t tag)
  167. {
  168. struct cap_channel *ret = kmem_cache_alloc (&cap_misc_cache);
  169. if (! ret)
  170. return (ENOMEM);
  171. cap_base_init (&ret->base, CAP_TYPE_CHANNEL, cap_channel_fini);
  172. if (flow)
  173. cap_base_acq (flow);
  174. ret->flow = flow;
  175. ret->tag = tag;
  176. *outp = ret;
  177. return (0);
  178. }
  179. static void cap_intr_rem (uint32_t irq, struct list *link);
  180. static void
  181. cap_task_thread_rem (int id, int type, struct list *link)
  182. {
  183. _Auto kuid = kuid_find (id, type == CAP_ALERT_THREAD_DIED ?
  184. KUID_THREAD : KUID_TASK);
  185. #define cap_unlink_alert(obj, type, unref) \
  186. do \
  187. { \
  188. _Auto ptr = structof (obj, type, kuid); \
  189. spinlock_lock (&ptr->dead_subs.lock); \
  190. list_remove (link); \
  191. spinlock_unlock (&ptr->dead_subs.lock); \
  192. unref (ptr); \
  193. } \
  194. while (0)
  195. if (! kuid)
  196. return;
  197. else if (type == CAP_ALERT_THREAD_DIED)
  198. cap_unlink_alert (kuid, struct thread, thread_unref);
  199. else
  200. cap_unlink_alert (kuid, struct task, task_unref);
  201. #undef cap_unlink_alert
  202. }
  203. static void
  204. cap_alert_free (struct cap_alert *alert)
  205. {
  206. _Auto async = (struct cap_alert_async *)alert;
  207. _Auto k_alert = &alert->k_alert;
  208. int type = cap_alert_type (alert);
  209. if (type == CAP_ALERT_INTR)
  210. cap_intr_rem (k_alert->intr.irq, &async->xlink);
  211. else if (type == CAP_ALERT_THREAD_DIED || type == CAP_ALERT_TASK_DIED)
  212. cap_task_thread_rem (k_alert->any_id, type, &async->xlink);
  213. kmem_cache_free (&cap_misc_cache, alert);
  214. }
  215. static void
  216. cap_port_fini (struct cap_port *port)
  217. {
  218. task_unref (port->task);
  219. kmem_cache_free (&cap_port_cache, port);
  220. }
  221. static void
  222. cap_flow_fini (struct sref_counter *sref)
  223. {
  224. _Auto flow = CAP_FROM_SREF (sref, struct cap_flow);
  225. struct cap_alert *alert, *tmp;
  226. pqueue_for_each_entry_safe (&flow->pending_alerts, alert, tmp, pnode)
  227. if (cap_alert_type (alert) == CAP_ALERT_USER)
  228. kmem_free (alert, sizeof (*alert));
  229. hlist_for_each_entry_safe (&flow->alloc_alerts, alert, tmp, hnode)
  230. cap_alert_free (alert);
  231. struct cap_port *port, *pt;
  232. slist_for_each_entry_safe (&flow->ports, port, pt, snode)
  233. cap_port_fini (port);
  234. kmem_cache_free (&cap_flow_cache, flow);
  235. }
  236. int
  237. cap_flow_create (struct cap_flow **outp, uint32_t flags,
  238. uintptr_t tag, uintptr_t entry)
  239. {
  240. struct cap_flow *ret = kmem_cache_alloc (&cap_flow_cache);
  241. if (! ret)
  242. return (ENOMEM);
  243. cap_base_init (&ret->base, CAP_TYPE_FLOW, cap_flow_fini);
  244. spinlock_init (&ret->lock);
  245. list_init (&ret->waiters);
  246. list_init (&ret->receivers);
  247. slist_init (&ret->ports);
  248. hlist_init (&ret->alloc_alerts);
  249. pqueue_init (&ret->pending_alerts);
  250. ret->flags = flags;
  251. ret->tag = tag;
  252. ret->entry = entry;
  253. *outp = ret;
  254. return (0);
  255. }
  256. /*
  257. * Attempt to set the tag to a new value. The only valid transition is
  258. * from zero to any value.
  259. */
  260. static int
  261. cap_cas_tag (uintptr_t *tagp, uintptr_t value)
  262. {
  263. while (1)
  264. {
  265. uintptr_t tmp = atomic_load_rlx (tagp);
  266. if (tmp != 0)
  267. return (EEXIST);
  268. else if (atomic_cas_bool_rlx (tagp, tmp, value))
  269. return (0);
  270. cpu_pause ();
  271. }
  272. }
  273. int
  274. (cap_set_tag) (struct cap_base *cap, uintptr_t tag)
  275. {
  276. if (! tag)
  277. return (EINVAL);
  278. else if (cap->type == CAP_TYPE_CHANNEL)
  279. return (cap_cas_tag (&((struct cap_channel *)cap)->tag, tag));
  280. else if (cap->type == CAP_TYPE_FLOW)
  281. return (cap_cas_tag (&((struct cap_flow *)cap)->tag, tag));
  282. return (EINVAL);
  283. }
  284. int
  285. (cap_get_tag) (const struct cap_base *cap, uintptr_t *tagp)
  286. {
  287. if (cap->type == CAP_TYPE_CHANNEL)
  288. *tagp = ((const struct cap_channel *)cap)->tag;
  289. else if (cap->type == CAP_TYPE_FLOW)
  290. *tagp = ((const struct cap_flow *)cap)->tag;
  291. else
  292. return (EINVAL);
  293. return (0);
  294. }
  295. int
  296. cap_channel_link (struct cap_channel *channel, struct cap_flow *flow)
  297. {
  298. while (1)
  299. {
  300. _Auto prev = atomic_load_rlx (&channel->flow);
  301. if (prev && flow)
  302. return (EAGAIN);
  303. else if (atomic_cas_bool_acq (&channel->flow, prev, flow))
  304. return (0);
  305. cpu_pause ();
  306. }
  307. }
  308. int
  309. cap_flow_hook (struct cap_channel **outp, struct task *task, int capx)
  310. {
  311. struct cap_base *base = cspace_get (&task->caps, capx);
  312. if (! base)
  313. return (EBADF);
  314. else if (base->type != CAP_TYPE_FLOW)
  315. {
  316. cap_base_rel (base);
  317. return (EINVAL);
  318. }
  319. _Auto flow = (struct cap_flow *)base;
  320. int ret = cap_channel_create (outp, flow, flow->tag);
  321. cap_base_rel (flow);
  322. return (ret);
  323. }
  324. /*
  325. * Transfer all 3 iterators between a local and a remote task.
  326. * Updates the metadata if succesful. Returns the number of
  327. * raw bytes transmitted on success; a negative errno value on failure.
  328. */
  329. static ssize_t
  330. cap_transfer_iters (struct task *task, struct cap_iters *r_it,
  331. struct cap_iters *l_it, int dir, ssize_t *bytesp)
  332. {
  333. ssize_t ret = ipc_iov_iter_copy (task, &r_it->iov, &l_it->iov, dir);
  334. if (ret < 0)
  335. return (ret);
  336. *bytesp += ret;
  337. if (ipc_cap_iter_size (&r_it->cap) && ipc_cap_iter_size (&l_it->cap))
  338. {
  339. int nr_caps = ipc_cap_iter_copy (task, &r_it->cap, &l_it->cap, dir);
  340. if (nr_caps < 0)
  341. return (nr_caps);
  342. *(uint32_t *)((char *)bytesp + CAP_CAPS_OFF) += nr_caps;
  343. }
  344. if (ipc_vme_iter_size (&r_it->vme) && ipc_vme_iter_size (&l_it->vme))
  345. {
  346. int nr_vmes = ipc_vme_iter_copy (task, &r_it->vme, &l_it->vme, dir);
  347. if (nr_vmes < 0)
  348. return (nr_vmes);
  349. *(uint32_t *)((char *)bytesp + CAP_VMES_OFF) += nr_vmes;
  350. }
  351. return (ret);
  352. }
  353. static struct cap_alert*
  354. cap_flow_alloc_alert (struct cap_flow *flow, uint32_t flg)
  355. {
  356. spinlock_unlock (&flow->lock);
  357. void *ptr = kmem_cache_alloc2 (&cap_misc_cache, (flg & CAP_ALERT_NONBLOCK) ?
  358. 0 : KMEM_ALLOC_SLEEP);
  359. spinlock_lock (&flow->lock);
  360. return (ptr);
  361. }
  362. static void
  363. cap_receiver_add (struct cap_flow *flow, struct cap_receiver *recv, void *buf)
  364. {
  365. recv->thread = thread_self ();
  366. recv->buf = buf;
  367. recv->spurious = false;
  368. memset (&recv->mdata, 0, sizeof (recv->mdata));
  369. list_insert_tail (&flow->receivers, &recv->lnode);
  370. }
  371. static void
  372. cap_recv_wakeup_fast (struct cap_flow *flow)
  373. {
  374. if (list_empty (&flow->receivers))
  375. return;
  376. _Auto recv = list_pop (&flow->receivers, struct cap_receiver, lnode);
  377. recv->spurious = true;
  378. thread_wakeup (recv->thread);
  379. }
  380. static struct cap_alert*
  381. cap_recv_pop_alert (struct cap_flow *flow, void *buf, uint32_t flags,
  382. struct ipc_msg_data *mdata, int *outp)
  383. {
  384. if (!pqueue_empty (&flow->pending_alerts))
  385. return (pqueue_pop_entry (&flow->pending_alerts, struct cap_alert, pnode));
  386. else if (flags & CAP_ALERT_NONBLOCK)
  387. {
  388. spinlock_unlock (&flow->lock);
  389. *outp = EAGAIN;
  390. return (NULL);
  391. }
  392. struct cap_receiver recv;
  393. cap_receiver_add (flow, &recv, buf);
  394. do
  395. thread_sleep (&flow->lock, flow, "flow-alert");
  396. while (pqueue_empty (&flow->pending_alerts));
  397. if (recv.spurious)
  398. return (pqueue_pop_entry (&flow->pending_alerts, struct cap_alert, pnode));
  399. spinlock_unlock (&flow->lock);
  400. if (recv.mdata.bytes_recv >= 0 && mdata)
  401. {
  402. recv.mdata.bytes_recv = CAP_ALERT_SIZE;
  403. user_copy_to (mdata, &recv.mdata, sizeof (*mdata));
  404. }
  405. *outp = recv.mdata.bytes_recv >= 0 ? 0 : (int)-recv.mdata.bytes_recv;
  406. return (NULL);
  407. }
  408. int
  409. cap_recv_alert (struct cap_flow *flow, void *buf,
  410. uint32_t flags, struct ipc_msg_data *mdata)
  411. {
  412. uint32_t ids[2] = { 0, 0 };
  413. uintptr_t tag = 0;
  414. spinlock_lock (&flow->lock);
  415. int error;
  416. _Auto entry = cap_recv_pop_alert (flow, buf, flags, mdata, &error);
  417. if (! entry)
  418. return (error);
  419. void *payload = entry->payload;
  420. int type = cap_alert_type (entry);
  421. if (type == CAP_ALERT_INTR)
  422. { // Copy into a temp buffer so we may reset the counter.
  423. payload = alloca (sizeof (entry->k_alert));
  424. *(struct cap_kern_alert *)payload = entry->k_alert;
  425. entry->k_alert.intr.count = 0;
  426. }
  427. else if (type != CAP_ALERT_USER)
  428. hlist_remove (&entry->hnode);
  429. else
  430. {
  431. ids[0] = entry->task_id;
  432. ids[1] = entry->thread_id;
  433. tag = entry->tag;
  434. }
  435. pqueue_inc (&flow->pending_alerts, 1);
  436. spinlock_unlock (&flow->lock);
  437. if (unlikely (user_copy_to (buf, payload, CAP_ALERT_SIZE) != 0))
  438. {
  439. SPINLOCK_GUARD (&flow->lock);
  440. pqueue_insert (&flow->pending_alerts, &entry->pnode);
  441. if (type == CAP_ALERT_INTR)
  442. entry->k_alert.intr.count +=
  443. ((struct cap_kern_alert *)payload)->intr.count;
  444. else if (type != CAP_ALERT_USER)
  445. hlist_insert_head (&flow->alloc_alerts, &entry->hnode);
  446. cap_recv_wakeup_fast (flow);
  447. return (EFAULT);
  448. }
  449. else if (mdata)
  450. {
  451. struct ipc_msg_data tmp;
  452. memset (&tmp, 0, sizeof (tmp));
  453. tmp.bytes_recv = CAP_ALERT_SIZE;
  454. tmp.tag = tag;
  455. tmp.task_id = ids[0], tmp.thread_id = ids[1];
  456. user_copy_to (mdata, &tmp, sizeof (tmp));
  457. }
  458. return (0);
  459. }
  460. static void
  461. cap_fill_ids (int *thr_idp, int *task_idp, struct thread *thr)
  462. {
  463. *thr_idp = thread_id (thr);
  464. *task_idp = task_id (thr->task);
  465. }
  466. int
  467. (cap_send_alert) (struct cap_base *cap, const void *buf,
  468. uint32_t flags, uint32_t prio)
  469. {
  470. struct cap_flow *flow;
  471. uintptr_t tag;
  472. if (cap->type == CAP_TYPE_CHANNEL)
  473. {
  474. flow = ((struct cap_channel *)cap)->flow;
  475. if (! flow)
  476. return (EINVAL);
  477. tag = ((struct cap_channel *)cap)->tag;
  478. }
  479. else if (cap->type == CAP_TYPE_FLOW)
  480. {
  481. flow = (struct cap_flow *)cap;
  482. tag = flow->tag;
  483. }
  484. else
  485. return (EBADF);
  486. /*
  487. * Copy into a temporary buffer, since the code below may otherwise
  488. * generate a page fault while holding a spinlock.
  489. */
  490. char abuf[CAP_ALERT_SIZE] = { 0 };
  491. if (user_copy_from (abuf, buf, CAP_ALERT_SIZE) != 0)
  492. return (EFAULT);
  493. struct cap_receiver *recv;
  494. {
  495. SPINLOCK_GUARD (&flow->lock);
  496. if (list_empty (&flow->receivers))
  497. {
  498. _Auto alert = cap_flow_alloc_alert (flow, flags);
  499. if (! alert)
  500. return (ENOMEM);
  501. memcpy (alert->payload, abuf, CAP_ALERT_SIZE);
  502. pqueue_node_init (&alert->pnode, prio);
  503. pqueue_insert (&flow->pending_alerts, &alert->pnode);
  504. cap_alert_type(alert) = CAP_ALERT_USER;
  505. cap_fill_ids (&alert->thread_id, &alert->task_id, thread_self ());
  506. alert->tag = tag;
  507. /*
  508. * Allocating an alert temporarily drops the flow lock. Since a
  509. * receiver could have been added in the meantime, we need to
  510. * check again before returning.
  511. */
  512. cap_recv_wakeup_fast (flow);
  513. return (0);
  514. }
  515. recv = list_pop (&flow->receivers, typeof (*recv), lnode);
  516. }
  517. cap_fill_ids (&recv->mdata.thread_id, &recv->mdata.task_id, thread_self ());
  518. recv->mdata.tag = tag;
  519. ssize_t rv = ipc_bcopy (recv->thread->task, recv->buf, sizeof (abuf),
  520. abuf, sizeof (abuf), IPC_COPY_TO);
  521. thread_wakeup (recv->thread);
  522. recv->mdata.bytes_recv = rv;
  523. return (rv < 0 ? (int)-rv : 0);
  524. }
  525. static void
  526. cap_sender_add (struct cap_flow *flow, struct cap_sender *sender,
  527. struct thread *thread)
  528. {
  529. sender->thread = thread;
  530. list_insert_tail (&flow->waiters, &sender->lnode);
  531. }
  532. static void
  533. cap_task_swap (struct task **taskp, struct thread *self)
  534. {
  535. cpu_flags_t flags;
  536. thread_preempt_disable_intr_save (&flags);
  537. struct task *xtask = self->xtask;
  538. self->xtask = *taskp;
  539. *taskp = xtask;
  540. pmap_load (self->xtask->map->pmap);
  541. thread_preempt_enable_intr_restore (flags);
  542. }
  543. static void
  544. cap_ipc_msg_data_init (struct ipc_msg_data *data, uintptr_t tag)
  545. {
  546. data->size = sizeof (*data);
  547. data->tag = tag;
  548. data->bytes_recv = data->bytes_sent = 0;
  549. data->flags = 0;
  550. data->vmes_sent = data->caps_sent = 0;
  551. data->vmes_recv = data->caps_recv = 0;
  552. }
  553. static void
  554. cap_iters_copy (struct cap_iters *dst, const struct cap_iters *src)
  555. {
  556. #define cap_copy_simple(d, s, type) \
  557. d->type.begin = s->type.begin; \
  558. d->type.cur = s->type.cur; \
  559. d->type.end = s->type.end
  560. dst->iov.cache_idx = src->iov.cache_idx;
  561. dst->iov.head = src->iov.head;
  562. cap_copy_simple (dst, src, iov);
  563. cap_copy_simple (dst, src, cap);
  564. cap_copy_simple (dst, src, vme);
  565. #undef copy_simple
  566. }
  567. static void
  568. cap_flow_push_port (struct cap_flow *flow, struct cap_port *port)
  569. {
  570. SPINLOCK_GUARD (&flow->lock);
  571. slist_insert_head (&flow->ports, &port->snode);
  572. if (list_empty (&flow->waiters))
  573. return;
  574. _Auto sender = list_first_entry (&flow->waiters, struct cap_sender, lnode);
  575. thread_wakeup (sender->thread);
  576. }
  577. static struct cap_port*
  578. cap_pop_port (struct cap_flow *flow, struct thread *self)
  579. {
  580. SPINLOCK_GUARD (&flow->lock);
  581. if (slist_empty (&flow->ports))
  582. {
  583. struct cap_sender sender;
  584. cap_sender_add (flow, &sender, self);
  585. do
  586. thread_sleep (&flow->lock, flow, "flow-sender");
  587. while (slist_empty (&flow->ports));
  588. list_remove (&sender.lnode);
  589. }
  590. _Auto port = slist_first_entry (&flow->ports, struct cap_port, snode);
  591. slist_remove (&flow->ports, NULL);
  592. return (port);
  593. }
  594. static ssize_t
  595. cap_sender_impl (struct cap_flow *flow, uintptr_t tag, struct cap_iters *in,
  596. struct cap_iters *out, struct ipc_msg_data *data)
  597. {
  598. struct thread *self = thread_self ();
  599. _Auto port = cap_pop_port (flow, self);
  600. cap_ipc_msg_data_init (&port->mdata, tag);
  601. ssize_t nb = cap_transfer_iters (port->task, &port->in_it, in,
  602. IPC_COPY_TO, &port->mdata.bytes_recv);
  603. if (nb < 0)
  604. port->mdata.flags |= IPC_MSG_ERROR;
  605. cap_iters_copy (&port->in_it, in);
  606. port->out_it = out;
  607. struct cap_port *cur_port = self->cur_port;
  608. self->cur_port = port;
  609. cap_fill_ids (&port->mdata.thread_id, &port->mdata.task_id, self);
  610. // Switch task (also sets the pmap).
  611. cap_task_swap (&port->task, self);
  612. user_copy_to ((void *)port->ctx[2], &port->mdata, sizeof (port->mdata));
  613. // Jump to new PC and SP.
  614. ssize_t ret = cpu_port_swap (port->ctx, cur_port, (void *)flow->entry);
  615. // We're back.
  616. if (data && user_copy_to (data, &port->mdata, sizeof (*data)) != 0)
  617. ret = -EFAULT;
  618. cap_flow_push_port (flow, port);
  619. self->cur_port = cur_port;
  620. return (ret);
  621. }
  622. ssize_t
  623. cap_send_iters (struct cap_base *cap, struct cap_iters *in,
  624. struct cap_iters *out, struct ipc_msg_data *data)
  625. {
  626. struct cap_flow *flow;
  627. uintptr_t tag;
  628. struct ipc_msg_data mdata;
  629. if (! cap)
  630. return (-EBADF);
  631. switch (cap->type)
  632. {
  633. case CAP_TYPE_FLOW:
  634. flow = (struct cap_flow *)cap;
  635. tag = flow->tag;
  636. break;
  637. case CAP_TYPE_CHANNEL:
  638. flow = ((struct cap_channel *)cap)->flow;
  639. if (! flow)
  640. return (-EINVAL);
  641. tag = ((struct cap_channel *)cap)->tag;
  642. break;
  643. case CAP_TYPE_THREAD:
  644. return (thread_handle_msg (((struct cap_thread *)cap)->thread,
  645. in, out, &mdata));
  646. case CAP_TYPE_TASK:
  647. return (task_handle_msg (((struct cap_task *)cap)->task,
  648. in, out, &mdata));
  649. case CAP_TYPE_KERNEL:
  650. // TODO: Implement.
  651. default:
  652. return (-EINVAL);
  653. }
  654. return (cap_sender_impl (flow, tag, in, out, data));
  655. }
  656. ssize_t
  657. cap_pull_iters (struct cap_iters *it, struct ipc_msg_data *mdata)
  658. {
  659. struct cap_port *port = thread_self()->cur_port;
  660. if (! port)
  661. return (-EINVAL);
  662. struct ipc_msg_data tmp;
  663. cap_ipc_msg_data_init (&tmp, port->mdata.tag);
  664. ssize_t ret = cap_transfer_iters (port->task, &port->in_it, it,
  665. IPC_COPY_FROM, &tmp.bytes_recv);
  666. port->mdata.bytes_recv += tmp.bytes_recv;
  667. port->mdata.vmes_recv += tmp.vmes_recv;
  668. port->mdata.caps_recv += tmp.caps_recv;
  669. if (mdata)
  670. user_copy_to (mdata, &tmp, sizeof (tmp));
  671. return (ret);
  672. }
  673. ssize_t
  674. cap_push_iters (struct cap_iters *it, struct ipc_msg_data *mdata)
  675. {
  676. struct cap_port *port = thread_self()->cur_port;
  677. if (! port)
  678. return (-EINVAL);
  679. struct ipc_msg_data tmp;
  680. cap_ipc_msg_data_init (&tmp, port->mdata.tag);
  681. ssize_t ret = cap_transfer_iters (port->task, port->out_it, it,
  682. IPC_COPY_TO, &tmp.bytes_sent);
  683. port->mdata.bytes_sent += tmp.bytes_sent;
  684. port->mdata.vmes_sent += tmp.vmes_sent;
  685. port->mdata.caps_sent += tmp.caps_sent;
  686. if (mdata)
  687. user_copy_to (mdata, &tmp, sizeof (tmp));
  688. return (ret);
  689. }
  690. static void
  691. cap_mdata_swap (struct ipc_msg_data *mdata)
  692. {
  693. SWAP (&mdata->bytes_sent, &mdata->bytes_recv);
  694. SWAP (&mdata->caps_sent, &mdata->caps_recv);
  695. SWAP (&mdata->vmes_sent, &mdata->vmes_recv);
  696. }
  697. ssize_t
  698. cap_reply_iters (struct cap_iters *it, int rv)
  699. {
  700. struct thread *self = thread_self ();
  701. struct cap_port *port = self->cur_port;
  702. ssize_t ret;
  703. if (! port)
  704. return (-EINVAL);
  705. else if (rv >= 0)
  706. {
  707. ret = cap_transfer_iters (port->task, port->out_it, it,
  708. IPC_COPY_TO, &port->mdata.bytes_sent);
  709. if (ret > 0)
  710. ret = port->mdata.bytes_sent;
  711. cap_mdata_swap (&port->mdata);
  712. if (!ipc_iov_iter_empty (&it->iov) ||
  713. ipc_vme_iter_size (&it->vme) ||
  714. ipc_cap_iter_size (&it->cap))
  715. port->mdata.flags |= IPC_MSG_TRUNC;
  716. }
  717. else
  718. ret = rv;
  719. cap_task_swap (&port->task, self);
  720. cpu_port_return (port->ctx[0], ret);
  721. __builtin_unreachable ();
  722. }
  723. int
  724. cap_flow_add_port (struct cap_flow *flow, void *stack, size_t size,
  725. struct ipc_msg *msg, struct ipc_msg_data *mdata,
  726. struct cap_thread_info *info __unused)
  727. {
  728. struct cap_port *entry = kmem_cache_alloc (&cap_port_cache);
  729. if (! entry)
  730. return (ENOMEM);
  731. entry->size = size;
  732. entry->ctx[0] = (uintptr_t)stack;
  733. entry->ctx[1] = (uintptr_t)msg;
  734. entry->ctx[2] = (uintptr_t)mdata;
  735. memset (&entry->mdata, 0, sizeof (entry->mdata));
  736. cap_iters_init_msg (&entry->in_it, msg);
  737. task_ref (entry->task = task_self ());
  738. cap_flow_push_port (flow, entry);
  739. return (0);
  740. }
  741. int
  742. cap_flow_rem_port (struct cap_flow *flow, uintptr_t stack)
  743. {
  744. spinlock_lock (&flow->lock);
  745. struct cap_port *entry;
  746. struct slist_node *prev = NULL;
  747. slist_for_each_entry (&flow->ports, entry, snode)
  748. {
  749. if (entry->task == task_self () &&
  750. (stack == ~(uintptr_t)0 || stack == entry->ctx[0]))
  751. break;
  752. prev = &entry->snode;
  753. }
  754. if (! entry)
  755. {
  756. spinlock_unlock (&flow->lock);
  757. return (ESRCH);
  758. }
  759. slist_remove (&flow->ports, prev);
  760. spinlock_unlock (&flow->lock);
  761. // Unmap the stack if the user didn't specify one.
  762. int error = stack != ~(uintptr_t)0 ? 0 :
  763. vm_map_remove (vm_map_self (), entry->ctx[0], entry->size);
  764. if (! error)
  765. kmem_cache_free (&cap_port_cache, entry);
  766. else
  767. cap_flow_push_port (flow, entry);
  768. return (error);
  769. }
  770. static int
  771. cap_handle_intr (void *arg)
  772. {
  773. struct list *list = arg;
  774. assert (list >= &cap_intr_handlers[0] &&
  775. list <= &cap_intr_handlers[ARRAY_SIZE (cap_intr_handlers) - 1]);
  776. RCU_GUARD ();
  777. list_rcu_for_each (list, tmp)
  778. {
  779. _Auto alert = list_entry (tmp, struct cap_alert_async, xlink);
  780. SPINLOCK_GUARD (&alert->flow->lock);
  781. if (++alert->base.k_alert.intr.count == 1)
  782. {
  783. pqueue_insert (&alert->flow->pending_alerts, &alert->base.pnode);
  784. cap_recv_wakeup_fast (alert->flow);
  785. }
  786. }
  787. return (EAGAIN);
  788. }
  789. static int
  790. cap_intr_add (uint32_t intr, struct list *node)
  791. {
  792. assert (intr >= CPU_EXC_INTR_FIRST &&
  793. intr - CPU_EXC_INTR_FIRST < ARRAY_SIZE (cap_intr_handlers));
  794. struct list *list = &cap_intr_handlers[intr - CPU_EXC_INTR_FIRST];
  795. ADAPTIVE_LOCK_GUARD (&cap_intr_lock);
  796. if (list_empty (list))
  797. {
  798. CPU_INTR_GUARD ();
  799. int error = intr_register (intr, cap_handle_intr, list);
  800. if (error)
  801. return (error);
  802. list_rcu_insert_head (list, node);
  803. return (0);
  804. }
  805. list_rcu_insert_head (list, node);
  806. return (0);
  807. }
  808. static void
  809. cap_intr_rem (uint32_t intr, struct list *node)
  810. {
  811. adaptive_lock_acquire (&cap_intr_lock);
  812. list_rcu_remove (node);
  813. if (list_empty (&cap_intr_handlers[intr - CPU_EXC_INTR_FIRST]))
  814. intr_unregister (intr, cap_handle_intr);
  815. adaptive_lock_release (&cap_intr_lock);
  816. rcu_wait ();
  817. }
  818. static struct cap_alert_async*
  819. cap_alert_async_find (struct cap_flow *flow, int type, int id)
  820. {
  821. struct cap_alert *tmp;
  822. hlist_for_each_entry (&flow->alloc_alerts, tmp, hnode)
  823. if (cap_alert_type (tmp) == type && tmp->k_alert.any_id == id)
  824. return ((void *)tmp);
  825. return (NULL);
  826. }
  827. int
  828. cap_intr_register (struct cap_flow *flow, uint32_t irq)
  829. {
  830. if (irq < CPU_EXC_INTR_FIRST || irq > CPU_EXC_INTR_LAST)
  831. return (EINVAL);
  832. struct cap_alert_async *ap = kmem_cache_alloc (&cap_misc_cache);
  833. if (! ap)
  834. return (ENOMEM);
  835. pqueue_node_init (&ap->base.pnode, CAP_ALERT_INTR_PRIO);
  836. cap_alert_type(&ap->base) = CAP_ALERT_INTR;
  837. hlist_node_init (&ap->base.hnode);
  838. list_node_init (&ap->xlink);
  839. ap->flow = flow;
  840. ap->base.k_alert = (struct cap_kern_alert)
  841. {
  842. .type = CAP_ALERT_INTR,
  843. .intr = { .irq = irq, .count = 0 }
  844. };
  845. int error = cap_intr_add (irq, &ap->xlink);
  846. if (error)
  847. {
  848. kmem_cache_free (&cap_misc_cache, ap);
  849. return (error);
  850. }
  851. spinlock_lock (&flow->lock);
  852. if (unlikely (cap_alert_async_find (flow, CAP_ALERT_INTR, irq)))
  853. {
  854. spinlock_unlock (&flow->lock);
  855. cap_intr_rem (irq, &ap->xlink);
  856. kmem_cache_free (&cap_misc_cache, ap);
  857. return (EALREADY);
  858. }
  859. hlist_insert_head (&flow->alloc_alerts, &ap->base.hnode);
  860. spinlock_unlock (&flow->lock);
  861. return (0);
  862. }
  863. static int
  864. cap_unregister_impl (struct cap_flow *flow, int type,
  865. uint32_t id, struct cap_alert_async **outp)
  866. {
  867. SPINLOCK_GUARD (&flow->lock);
  868. _Auto entry = cap_alert_async_find (flow, type, id);
  869. if (! entry)
  870. return (ESRCH);
  871. hlist_remove (&entry->base.hnode);
  872. if (!pqueue_node_unlinked (&entry->base.pnode))
  873. pqueue_remove (&flow->pending_alerts, &entry->base.pnode);
  874. *outp = entry;
  875. return (0);
  876. }
  877. int
  878. cap_intr_unregister (struct cap_flow *flow, uint32_t irq)
  879. {
  880. CPU_INTR_GUARD ();
  881. struct cap_alert_async *entry;
  882. int error = cap_unregister_impl (flow, CAP_ALERT_INTR, irq, &entry);
  883. if (! error)
  884. cap_intr_rem (irq, &entry->xlink);
  885. return (error);
  886. }
  887. static int
  888. cap_register_task_thread (struct cap_flow *flow, struct kuid_head *kuid,
  889. uint32_t prio, int type, struct bulletin *outp)
  890. {
  891. struct cap_alert_async *ap = kmem_cache_alloc (&cap_misc_cache);
  892. if (! ap)
  893. return (ENOMEM);
  894. pqueue_node_init (&ap->base.pnode, prio);
  895. cap_alert_type(&ap->base) = type;
  896. hlist_node_init (&ap->base.hnode);
  897. list_node_init (&ap->xlink);
  898. ap->flow = flow;
  899. ap->base.k_alert = (struct cap_kern_alert)
  900. {
  901. .type = type,
  902. .any_id = kuid->id
  903. };
  904. spinlock_lock (&flow->lock);
  905. if (unlikely (cap_alert_async_find (flow, type, kuid->id)))
  906. {
  907. spinlock_unlock (&flow->lock);
  908. kmem_cache_free (&cap_misc_cache, ap);
  909. return (EALREADY);
  910. }
  911. hlist_insert_head (&flow->alloc_alerts, &ap->base.hnode);
  912. spinlock_lock (&outp->lock);
  913. list_insert_tail (&outp->subs, &ap->xlink);
  914. if (atomic_load_rlx (&kuid->nr_refs) == 1)
  915. {
  916. pqueue_insert (&flow->pending_alerts, &ap->base.pnode);
  917. cap_recv_wakeup_fast (flow);
  918. }
  919. spinlock_unlock (&outp->lock);
  920. spinlock_unlock (&flow->lock);
  921. return (0);
  922. }
  923. static int
  924. cap_task_thread_unregister (struct cap_flow *flow, int type,
  925. int tid, struct bulletin *outp)
  926. {
  927. struct cap_alert_async *entry;
  928. int error = cap_unregister_impl (flow, type, tid, &entry);
  929. if (! error)
  930. {
  931. SPINLOCK_GUARD (&outp->lock);
  932. list_remove (&entry->xlink);
  933. }
  934. return (error);
  935. }
  936. int
  937. cap_thread_register (struct cap_flow *flow, struct thread *thr)
  938. {
  939. if (! thr)
  940. return (EINVAL);
  941. return (cap_register_task_thread (flow, &thr->kuid, CAP_ALERT_THREAD_PRIO,
  942. CAP_ALERT_THREAD_DIED, &thr->dead_subs));
  943. }
  944. int
  945. cap_task_register (struct cap_flow *flow, struct task *task)
  946. {
  947. if (! task)
  948. return (EINVAL);
  949. return (cap_register_task_thread (flow, &task->kuid, CAP_ALERT_TASK_PRIO,
  950. CAP_ALERT_TASK_DIED, &task->dead_subs));
  951. }
  952. int
  953. cap_thread_unregister (struct cap_flow *flow, struct thread *thr)
  954. {
  955. if (! thr)
  956. return (EINVAL);
  957. return (cap_task_thread_unregister (flow, CAP_ALERT_THREAD_DIED,
  958. thread_id (thr), &thr->dead_subs));
  959. }
  960. int
  961. cap_task_unregister (struct cap_flow *flow, struct task *task)
  962. {
  963. if (! task)
  964. return (EINVAL);
  965. return (cap_task_thread_unregister (flow, CAP_ALERT_TASK_DIED,
  966. task_id (task), &task->dead_subs));
  967. }
  968. void
  969. cap_notify_dead (struct bulletin *bulletin)
  970. {
  971. struct list dead_subs;
  972. spinlock_lock (&bulletin->lock);
  973. list_set_head (&dead_subs, &bulletin->subs);
  974. list_init (&bulletin->subs);
  975. spinlock_unlock (&bulletin->lock);
  976. struct cap_alert_async *ap;
  977. list_for_each_entry (&dead_subs, ap, xlink)
  978. {
  979. _Auto flow = ap->flow;
  980. SPINLOCK_GUARD (&flow->lock);
  981. if (!pqueue_node_unlinked (&ap->base.pnode))
  982. continue;
  983. pqueue_insert (&flow->pending_alerts, &ap->base.pnode);
  984. cap_recv_wakeup_fast (flow);
  985. }
  986. }
  987. int
  988. (cap_intern) (struct cap_base *cap, int flags)
  989. {
  990. return (cap ? cspace_add_free (cspace_self (), cap, flags) : -EINVAL);
  991. }
  992. static size_t
  993. cap_get_max (const size_t *args, size_t n)
  994. {
  995. size_t ret = *args;
  996. for (size_t i = 1; i < n; ++i)
  997. if (args[i] > ret)
  998. ret = args[i];
  999. return (ret);
  1000. }
  1001. #define CAP_MAX(...) \
  1002. ({ \
  1003. const size_t args_[] = { __VA_ARGS__ }; \
  1004. cap_get_max (args_, ARRAY_SIZE (args_)); \
  1005. })
  1006. static int __init
  1007. cap_setup (void)
  1008. {
  1009. // Every capability type but flows are allocated from the same cache.
  1010. #define SZ(type) sizeof (struct cap_##type)
  1011. #define AL(type) alignof (struct cap_##type)
  1012. size_t size = CAP_MAX (SZ (task), SZ (thread), SZ (channel),
  1013. SZ (kernel), SZ (alert_async));
  1014. size_t alignment = CAP_MAX (AL (task), AL (thread), AL (channel),
  1015. AL (kernel), AL (alert_async));
  1016. kmem_cache_init (&cap_misc_cache, "cap_misc", size, alignment, NULL, 0);
  1017. kmem_cache_init (&cap_flow_cache, "cap_flow",
  1018. sizeof (struct cap_flow), 0, NULL, 0);
  1019. kmem_cache_init (&cap_port_cache, "cap_port",
  1020. sizeof (struct cap_port), 0, NULL, 0);
  1021. adaptive_lock_init (&cap_intr_lock);
  1022. for (size_t i = 0; i < ARRAY_SIZE (cap_intr_handlers); ++i)
  1023. list_init (&cap_intr_handlers[i]);
  1024. return (0);
  1025. }
  1026. INIT_OP_DEFINE (cap_setup,
  1027. INIT_OP_DEP (intr_setup, true),
  1028. INIT_OP_DEP (kmem_setup, true));