capability.c 39 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506
  1. /*
  2. * Copyright (c) 2023 Agustina Arzille.
  3. *
  4. * This program is free software: you can redistribute it and/or modify
  5. * it under the terms of the GNU General Public License as published by
  6. * the Free Software Foundation, either version 3 of the License, or
  7. * (at your option) any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  16. *
  17. */
  18. #include <kern/capability.h>
  19. #include <kern/cspace.h>
  20. #include <kern/intr.h>
  21. #include <kern/kmem.h>
  22. #include <kern/kmessage.h>
  23. #include <kern/rcu.h>
  24. #include <kern/shell.h>
  25. #include <kern/stream.h>
  26. #include <kern/thread.h>
  27. #include <machine/pmap.h>
  28. #include <vm/map.h>
  29. #include <vm/page.h>
  30. #include <stdio.h>
  31. struct cap_alert
  32. {
  33. union
  34. {
  35. struct
  36. { // Valid for user alerts and when not pending.
  37. int task_id;
  38. int thread_id;
  39. uintptr_t tag;
  40. };
  41. struct hlist_node hnode;
  42. };
  43. struct pqueue_node pnode;
  44. int alert_type;
  45. union
  46. {
  47. char payload[CAP_ALERT_SIZE];
  48. struct cap_kern_alert k_alert;
  49. };
  50. };
  51. #define CAP_F(name) OFFSETOF (struct ipc_msg_data, name)
  52. static_assert (CAP_F (caps_recv) - CAP_F (bytes_recv) ==
  53. CAP_F (caps_sent) - CAP_F (bytes_sent) &&
  54. CAP_F (vmes_recv) - CAP_F (bytes_recv) ==
  55. CAP_F (vmes_sent) - CAP_F (bytes_sent),
  56. "invalid layout for struct ipc_msg_data");
  57. #define CAP_VMES_OFF (CAP_F (vmes_recv) - CAP_F (bytes_recv))
  58. #define CAP_CAPS_OFF (CAP_F (caps_recv) - CAP_F (bytes_recv))
  59. // An alert as generated by a kernel event (e.g: a task died).
  60. struct cap_alert_async
  61. {
  62. struct cap_alert base;
  63. struct list xlink;
  64. struct cap_flow *flow;
  65. };
  66. /*
  67. * Landing pads represent the environment on which foreign threads begin
  68. * their execution once the sending of a message is done.
  69. *
  70. * When a thread initiates message passing, a landing pad is added to
  71. * an internal list. Messaging operations update the iterators and
  72. * metadata that is contained within the "current" landing pad.
  73. */
  74. struct cap_lpad
  75. {
  76. struct cap_base *src;
  77. struct cap_lpad *next;
  78. struct task *task;
  79. size_t size;
  80. uintptr_t ctx[3]; // SP and function arguments.
  81. struct ipc_msg_data mdata;
  82. uint16_t nr_cached_iovs;
  83. uint16_t xflags;
  84. struct cap_iters in_it;
  85. struct cap_iters *cur_in;
  86. struct cap_iters *cur_out;
  87. };
  88. // A thread waiting on 'cap_recv_alert'.
  89. struct cap_receiver
  90. {
  91. struct list lnode;
  92. struct thread *thread;
  93. void *buf;
  94. struct ipc_msg_data mdata;
  95. bool spurious;
  96. };
  97. // A thread waiting for a landing pad to be available.
  98. struct cap_sender
  99. {
  100. struct list lnode;
  101. struct thread *thread;
  102. };
  103. static struct kmem_cache cap_flow_cache;
  104. static struct kmem_cache cap_misc_cache;
  105. static struct kmem_cache cap_lpad_cache;
  106. static struct list cap_intr_handlers[CPU_INTR_TABLE_SIZE];
  107. static struct adaptive_lock cap_intr_lock;
  108. // Priorities for kernel-generated alerts.
  109. #define CAP_ALERT_TASK_PRIO ((THREAD_SCHED_RT_PRIO_MAX + 2) << 1)
  110. #define CAP_ALERT_THREAD_PRIO (CAP_ALERT_TASK_PRIO << 1)
  111. #define CAP_ALERT_INTR_PRIO (CAP_ALERT_THREAD_PRIO << 1)
  112. #define CAP_ALERT_CHANNEL_PRIO (1u)
  113. #define CAP_CHANNEL_SHARED 0x01
  114. #define CAP_FROM_SREF(ptr, type) structof (ptr, type, base.sref)
  115. // Forward declarations.
  116. static void cap_recv_wakeup_fast (struct cap_flow *);
  117. static void cap_intr_rem (uint32_t irq, struct list *link);
  118. static void
  119. cap_base_init (struct cap_base *base, uint32_t type, sref_noref_fn_t noref)
  120. {
  121. assert (type < CAP_TYPE_MAX);
  122. base->tflags = ((uintptr_t)type << (sizeof (uintptr_t) * 8 - 8));
  123. sref_counter_init (&base->sref, 1, NULL, noref);
  124. }
  125. static void
  126. cap_task_fini (struct sref_counter *sref)
  127. {
  128. _Auto tp = CAP_FROM_SREF (sref, struct cap_task);
  129. task_unref (tp->task);
  130. kmem_cache_free (&cap_misc_cache, tp);
  131. }
  132. int
  133. cap_task_create (struct cap_task **outp, struct task *task)
  134. {
  135. struct cap_task *ret = kmem_cache_alloc (&cap_misc_cache);
  136. if (! ret)
  137. return (ENOMEM);
  138. cap_base_init (&ret->base, CAP_TYPE_TASK, cap_task_fini);
  139. task_ref (task);
  140. ret->task = task;
  141. *outp = ret;
  142. return (0);
  143. }
  144. static void
  145. cap_thread_fini (struct sref_counter *sref)
  146. {
  147. _Auto tp = CAP_FROM_SREF (sref, struct cap_thread);
  148. thread_unref (tp->thread);
  149. kmem_cache_free (&cap_misc_cache, tp);
  150. }
  151. int
  152. cap_thread_create (struct cap_thread **outp, struct thread *thread)
  153. {
  154. struct cap_thread *ret = kmem_cache_alloc (&cap_misc_cache);
  155. if (! ret)
  156. return (ENOMEM);
  157. cap_base_init (&ret->base, CAP_TYPE_THREAD, cap_thread_fini);
  158. thread_ref (thread);
  159. ret->thread = thread;
  160. *outp = ret;
  161. return (0);
  162. }
  163. static struct spinlock_guard
  164. cap_flow_guard_make (struct cap_flow *flow)
  165. {
  166. bool save_intr = (flow->base.tflags & CAP_FLOW_HANDLE_INTR) != 0;
  167. return (spinlock_guard_make (&flow->alerts.lock, save_intr));
  168. }
  169. #define cap_flow_guard_lock spinlock_guard_lock
  170. #define cap_flow_guard_fini spinlock_guard_fini
  171. static int
  172. cap_alert_type (const struct cap_alert *alert)
  173. {
  174. return (alert->alert_type);
  175. }
  176. static void
  177. cap_alert_init_nodes (struct cap_alert *alert, uint32_t type, uint32_t prio)
  178. {
  179. pqueue_node_init (&alert->pnode, prio);
  180. alert->alert_type = (int)type;
  181. hlist_node_init (&alert->hnode);
  182. }
  183. #define CAP_FLOW_GUARD(flow) \
  184. CLEANUP (cap_flow_guard_fini) _Auto __unused UNIQ (cfg) = \
  185. cap_flow_guard_make (flow)
  186. static void
  187. cap_channel_fini (struct sref_counter *sref)
  188. {
  189. _Auto chp = CAP_FROM_SREF (sref, struct cap_channel);
  190. _Auto flow = chp->flow;
  191. uintptr_t tag = chp->tag;
  192. // Mutate the type.
  193. struct cap_alert *alert __attribute__ ((may_alias)) = (void *)chp;
  194. alert->k_alert.type = CAP_ALERT_CHAN_CLOSED;
  195. alert->k_alert.tag = tag;
  196. cap_alert_init_nodes (alert, CAP_ALERT_CHAN_CLOSED, CAP_ALERT_CHANNEL_PRIO);
  197. _Auto guard = cap_flow_guard_make (flow);
  198. hlist_insert_head (&flow->alerts.alloc, &alert->hnode);
  199. pqueue_insert (&flow->alerts.pending, &alert->pnode);
  200. cap_recv_wakeup_fast (flow);
  201. cap_flow_guard_fini (&guard);
  202. cap_base_rel (flow);
  203. }
  204. int
  205. cap_channel_create (struct cap_channel **outp, struct cap_flow *flow,
  206. uintptr_t tag)
  207. {
  208. struct cap_channel *ret = kmem_cache_alloc (&cap_misc_cache);
  209. if (! ret)
  210. return (ENOMEM);
  211. cap_base_init (&ret->base, CAP_TYPE_CHANNEL, cap_channel_fini);
  212. if (flow)
  213. cap_base_acq (flow);
  214. ret->flow = flow;
  215. ret->tag = tag;
  216. ret->vmobj = NULL;
  217. *outp = ret;
  218. return (0);
  219. }
  220. static void
  221. cap_task_thread_rem (int id, int type, struct list *link)
  222. {
  223. _Auto kuid = kuid_find (id, type == CAP_ALERT_THREAD_DIED ?
  224. KUID_THREAD : KUID_TASK);
  225. #define cap_unlink_alert(obj, type, unref) \
  226. do \
  227. { \
  228. _Auto ptr = structof (obj, type, kuid); \
  229. spinlock_lock (&ptr->dead_subs.lock); \
  230. list_remove (link); \
  231. spinlock_unlock (&ptr->dead_subs.lock); \
  232. unref (ptr); \
  233. } \
  234. while (0)
  235. if (! kuid)
  236. return;
  237. else if (type == CAP_ALERT_THREAD_DIED)
  238. cap_unlink_alert (kuid, struct thread, thread_unref);
  239. else
  240. cap_unlink_alert (kuid, struct task, task_unref);
  241. #undef cap_unlink_alert
  242. }
  243. static void
  244. cap_alert_free (struct cap_alert *alert)
  245. {
  246. _Auto async = (struct cap_alert_async *)alert;
  247. _Auto k_alert = &alert->k_alert;
  248. int type = cap_alert_type (alert);
  249. if (type == CAP_ALERT_INTR)
  250. cap_intr_rem (k_alert->intr.irq, &async->xlink);
  251. else if (type == CAP_ALERT_THREAD_DIED || type == CAP_ALERT_TASK_DIED)
  252. cap_task_thread_rem (k_alert->any_id, type, &async->xlink);
  253. kmem_cache_free (&cap_misc_cache, alert);
  254. }
  255. static void
  256. cap_flow_fini (struct sref_counter *sref)
  257. {
  258. _Auto flow = CAP_FROM_SREF (sref, struct cap_flow);
  259. struct cap_alert *alert, *tmp;
  260. pqueue_for_each_entry_safe (&flow->alerts.pending, alert, tmp, pnode)
  261. if (cap_alert_type (alert) == CAP_ALERT_USER)
  262. kmem_cache_free (&cap_misc_cache, alert);
  263. hlist_for_each_entry_safe (&flow->alerts.alloc, alert, tmp, hnode)
  264. cap_alert_free (alert);
  265. for (_Auto lpad = flow->lpads.free_list; lpad; )
  266. {
  267. _Auto next = lpad->next;
  268. task_unref (lpad->task);
  269. kmem_cache_free (&cap_lpad_cache, lpad);
  270. lpad = next;
  271. }
  272. kmem_cache_free (&cap_flow_cache, flow);
  273. }
  274. #define CAP_FLOW_VALID_FLAGS \
  275. (CAP_FLOW_HANDLE_INTR | CAP_FLOW_EXT_PAGER | CAP_FLOW_PAGER_FLUSHES)
  276. int
  277. cap_flow_create (struct cap_flow **outp, uint32_t flags,
  278. uintptr_t tag, uintptr_t entry)
  279. {
  280. if (flags & ~CAP_FLOW_VALID_FLAGS)
  281. return (EINVAL);
  282. struct cap_flow *ret = kmem_cache_alloc (&cap_flow_cache);
  283. if (! ret)
  284. return (ENOMEM);
  285. cap_base_init (&ret->base, CAP_TYPE_FLOW, cap_flow_fini);
  286. ret->base.tflags |= flags;
  287. ret->tag = tag;
  288. ret->entry = entry;
  289. spinlock_init (&ret->alerts.lock);
  290. list_init (&ret->alerts.receivers);
  291. hlist_init (&ret->alerts.alloc);
  292. pqueue_init (&ret->alerts.pending);
  293. ret->lpads.free_list = NULL;
  294. list_init (&ret->lpads.waiters);
  295. spinlock_init (&ret->lpads.lock);
  296. *outp = ret;
  297. return (0);
  298. }
  299. int
  300. (cap_get_tag) (const struct cap_base *cap, uintptr_t *tagp)
  301. {
  302. switch (cap_type (cap))
  303. {
  304. case CAP_TYPE_CHANNEL:
  305. *tagp = ((const struct cap_channel *)cap)->tag;
  306. return (0);
  307. case CAP_TYPE_FLOW:
  308. *tagp = ((const struct cap_flow *)cap)->tag;
  309. return (0);
  310. default:
  311. return (EINVAL);
  312. }
  313. }
  314. int
  315. cap_flow_hook (struct cap_channel **outp, struct task *task, int capx)
  316. {
  317. struct cap_base *base = cspace_get (&task->caps, capx);
  318. if (! base)
  319. return (EBADF);
  320. else if (cap_type (base) != CAP_TYPE_FLOW)
  321. {
  322. cap_base_rel (base);
  323. return (EINVAL);
  324. }
  325. _Auto flow = (struct cap_flow *)base;
  326. int ret = cap_channel_create (outp, flow, flow->tag);
  327. cap_base_rel (flow);
  328. return (ret);
  329. }
  330. static void
  331. cap_ipc_msg_data_init (struct ipc_msg_data *data, uintptr_t tag)
  332. {
  333. data->size = sizeof (*data);
  334. data->tag = tag;
  335. data->bytes_recv = data->bytes_sent = 0;
  336. data->flags = 0;
  337. data->vmes_sent = data->caps_sent = 0;
  338. data->vmes_recv = data->caps_recv = 0;
  339. }
  340. /*
  341. * Transfer all 3 iterators between a local and a remote task.
  342. * Updates the metadata if succesful. Returns the number of
  343. * raw bytes transmitted on success; a negative errno value on failure.
  344. */
  345. static ssize_t
  346. cap_transfer_iters (struct task *task, struct cap_iters *r_it,
  347. struct cap_iters *l_it, uint32_t flags, ssize_t *bytesp)
  348. {
  349. ssize_t ret = ipc_iov_iter_copy (task, &r_it->iov, &l_it->iov, flags);
  350. if (ret < 0)
  351. return (ret);
  352. *bytesp += ret;
  353. if (ipc_cap_iter_size (&r_it->cap) && ipc_cap_iter_size (&l_it->cap))
  354. {
  355. int nr_caps = ipc_cap_iter_copy (task, &r_it->cap, &l_it->cap, flags);
  356. if (nr_caps < 0)
  357. return (nr_caps);
  358. *(uint32_t *)((char *)bytesp + CAP_CAPS_OFF) += nr_caps;
  359. }
  360. if (ipc_vme_iter_size (&r_it->vme) && ipc_vme_iter_size (&l_it->vme))
  361. {
  362. int nr_vmes = ipc_vme_iter_copy (task, &r_it->vme, &l_it->vme, flags);
  363. if (nr_vmes < 0)
  364. return (nr_vmes);
  365. *(uint32_t *)((char *)bytesp + CAP_VMES_OFF) += nr_vmes;
  366. }
  367. return (ret);
  368. }
  369. static struct cap_alert*
  370. cap_flow_alloc_alert (struct spinlock_guard *guard, uint32_t flg)
  371. {
  372. cap_flow_guard_fini (guard);
  373. uint32_t alflags = (flg & CAP_ALERT_NONBLOCK) ? 0 : KMEM_ALLOC_SLEEP;
  374. void *ptr = kmem_cache_alloc2 (&cap_misc_cache, alflags);
  375. cap_flow_guard_lock (guard);
  376. return (ptr);
  377. }
  378. static void
  379. cap_receiver_add (struct cap_flow *flow, struct cap_receiver *recv, void *buf)
  380. {
  381. recv->thread = thread_self ();
  382. recv->buf = buf;
  383. recv->spurious = false;
  384. cap_ipc_msg_data_init (&recv->mdata, 0);
  385. list_insert_tail (&flow->alerts.receivers, &recv->lnode);
  386. }
  387. static void
  388. cap_recv_wakeup_fast (struct cap_flow *flow)
  389. {
  390. if (list_empty (&flow->alerts.receivers))
  391. return;
  392. _Auto recv = list_pop (&flow->alerts.receivers, struct cap_receiver, lnode);
  393. recv->spurious = true;
  394. thread_wakeup (recv->thread);
  395. }
  396. static struct cap_alert*
  397. cap_recv_pop_alert (struct cap_flow *flow, void *buf, uint32_t flags,
  398. struct ipc_msg_data *mdata, int *outp,
  399. struct spinlock_guard *guard)
  400. {
  401. if (!pqueue_empty (&flow->alerts.pending))
  402. return (pqueue_pop_entry (&flow->alerts.pending, struct cap_alert, pnode));
  403. else if (flags & CAP_ALERT_NONBLOCK)
  404. {
  405. cap_flow_guard_fini (guard);
  406. *outp = EAGAIN;
  407. return (NULL);
  408. }
  409. struct cap_receiver recv;
  410. cap_receiver_add (flow, &recv, buf);
  411. do
  412. thread_sleep (&flow->alerts.lock, flow, "flow-alert");
  413. while (pqueue_empty (&flow->alerts.pending));
  414. if (recv.spurious)
  415. return (pqueue_pop_entry (&flow->alerts.pending, struct cap_alert, pnode));
  416. cap_flow_guard_fini (guard);
  417. if (recv.mdata.bytes_recv >= 0 && mdata)
  418. {
  419. recv.mdata.bytes_recv = CAP_ALERT_SIZE;
  420. user_write_struct (mdata, &recv.mdata, sizeof (recv.mdata));
  421. }
  422. *outp = recv.mdata.bytes_recv >= 0 ? 0 : (int)-recv.mdata.bytes_recv;
  423. return (NULL);
  424. }
  425. int
  426. cap_recv_alert (struct cap_flow *flow, void *buf,
  427. uint32_t flags, struct ipc_msg_data *mdata)
  428. {
  429. uint32_t ids[2] = { 0, 0 };
  430. uintptr_t tag = 0;
  431. _Auto guard = cap_flow_guard_make (flow);
  432. int error;
  433. _Auto entry = cap_recv_pop_alert (flow, buf, flags, mdata, &error, &guard);
  434. if (! entry)
  435. return (error);
  436. void *payload = entry->payload;
  437. struct cap_kern_alert tmp_alert;
  438. int type = cap_alert_type (entry);
  439. if (type == CAP_ALERT_INTR)
  440. { // Copy into a temp buffer so we may reset the counter.
  441. tmp_alert = entry->k_alert;
  442. entry->k_alert.intr.count = 0;
  443. payload = &tmp_alert;
  444. }
  445. else if (type != CAP_ALERT_USER)
  446. hlist_remove (&entry->hnode);
  447. else
  448. {
  449. ids[0] = entry->task_id;
  450. ids[1] = entry->thread_id;
  451. tag = entry->tag;
  452. }
  453. pqueue_inc (&flow->alerts.pending, 1);
  454. cap_flow_guard_fini (&guard);
  455. if (unlikely (user_copy_to (buf, payload, CAP_ALERT_SIZE) != 0))
  456. {
  457. cap_flow_guard_lock (&guard);
  458. pqueue_insert (&flow->alerts.pending, &entry->pnode);
  459. if (type == CAP_ALERT_INTR)
  460. entry->k_alert.intr.count += tmp_alert.intr.count;
  461. else if (type != CAP_ALERT_USER)
  462. hlist_insert_head (&flow->alerts.alloc, &entry->hnode);
  463. cap_recv_wakeup_fast (flow);
  464. cap_flow_guard_fini (&guard);
  465. return (EFAULT);
  466. }
  467. else if (mdata)
  468. {
  469. struct ipc_msg_data tmp;
  470. cap_ipc_msg_data_init (&tmp, tag);
  471. tmp.bytes_recv = CAP_ALERT_SIZE;
  472. tmp.task_id = ids[0], tmp.thread_id = ids[1];
  473. user_write_struct (mdata, &tmp, sizeof (tmp));
  474. }
  475. return (0);
  476. }
  477. static void
  478. cap_fill_ids (int *thr_idp, int *task_idp, struct thread *thr)
  479. {
  480. *thr_idp = thread_id (thr);
  481. *task_idp = task_id (thr->task);
  482. }
  483. int
  484. (cap_send_alert) (struct cap_base *cap, const void *buf,
  485. uint32_t flags, uint32_t prio)
  486. {
  487. struct cap_flow *flow;
  488. uintptr_t tag;
  489. switch (cap_type (cap))
  490. {
  491. case CAP_TYPE_CHANNEL:
  492. flow = ((struct cap_channel *)cap)->flow;
  493. tag = ((struct cap_channel *)cap)->tag;
  494. break;
  495. case CAP_TYPE_FLOW:
  496. flow = (struct cap_flow *)cap;
  497. tag = flow->tag;
  498. break;
  499. default:
  500. return (EBADF);
  501. }
  502. /*
  503. * Copy into a temporary buffer, since the code below may otherwise
  504. * generate a page fault while holding a spinlock.
  505. */
  506. char abuf[CAP_ALERT_SIZE] = { 0 };
  507. if (user_copy_from (abuf, buf, CAP_ALERT_SIZE) != 0)
  508. return (EFAULT);
  509. struct cap_receiver *recv;
  510. {
  511. CLEANUP (cap_flow_guard_fini) _Auto guard = cap_flow_guard_make (flow);
  512. if (list_empty (&flow->alerts.receivers))
  513. {
  514. _Auto alert = cap_flow_alloc_alert (&guard, flags);
  515. if (! alert)
  516. return (ENOMEM);
  517. memcpy (alert->payload, abuf, CAP_ALERT_SIZE);
  518. cap_alert_init_nodes (alert, CAP_ALERT_USER, prio);
  519. pqueue_insert (&flow->alerts.pending, &alert->pnode);
  520. cap_fill_ids (&alert->thread_id, &alert->task_id, thread_self ());
  521. alert->tag = tag;
  522. /*
  523. * Allocating an alert temporarily drops the flow lock. Since a
  524. * receiver could have been added in the meantime, we need to
  525. * check again before returning.
  526. */
  527. cap_recv_wakeup_fast (flow);
  528. return (0);
  529. }
  530. recv = list_pop (&flow->alerts.receivers, typeof (*recv), lnode);
  531. }
  532. cap_fill_ids (&recv->mdata.thread_id, &recv->mdata.task_id, thread_self ());
  533. recv->mdata.tag = tag;
  534. ssize_t rv = ipc_bcopy (recv->thread->task, recv->buf, sizeof (abuf),
  535. abuf, sizeof (abuf), IPC_COPY_TO | IPC_CHECK_REMOTE);
  536. thread_wakeup (recv->thread);
  537. recv->mdata.bytes_recv = rv;
  538. return (rv < 0 ? (int)-rv : 0);
  539. }
  540. static void
  541. cap_task_swap (struct task **taskp, struct thread *self)
  542. {
  543. cpu_flags_t flags;
  544. thread_preempt_disable_intr_save (&flags);
  545. struct task *xtask = self->xtask;
  546. self->xtask = *taskp;
  547. *taskp = xtask;
  548. pmap_load (self->xtask->map->pmap);
  549. thread_preempt_enable_intr_restore (flags);
  550. }
  551. static void
  552. cap_flow_push_lpad (struct cap_flow *flow, struct cap_lpad *lpad)
  553. {
  554. while (1)
  555. {
  556. _Auto next = lpad->next = atomic_load_rlx (&flow->lpads.free_list);
  557. if (!atomic_cas_bool_rel (&flow->lpads.free_list, next, lpad))
  558. {
  559. atomic_spin_nop ();
  560. continue;
  561. }
  562. atomic_fence_acq ();
  563. if (!next || !list_empty (&flow->lpads.waiters))
  564. {
  565. SPINLOCK_GUARD (&flow->lpads.lock);
  566. if (list_empty (&flow->lpads.waiters))
  567. return;
  568. _Auto sn = list_first_entry (&flow->lpads.waiters,
  569. struct cap_sender, lnode);
  570. thread_wakeup (sn->thread);
  571. }
  572. return;
  573. }
  574. }
  575. static struct cap_lpad*
  576. cap_lpad_pop_free (struct cap_lpad **ptr)
  577. {
  578. RCU_GUARD ();
  579. while (1)
  580. {
  581. _Auto tmp = atomic_load_rlx (ptr);
  582. if (! tmp)
  583. return (tmp);
  584. else if (atomic_cas_bool_acq (ptr, tmp, tmp->next))
  585. {
  586. tmp->next = NULL;
  587. return (tmp);
  588. }
  589. atomic_spin_nop ();
  590. }
  591. }
  592. static struct cap_lpad*
  593. cap_flow_pop_lpad (struct cap_flow *flow, struct thread *self)
  594. {
  595. _Auto ret = cap_lpad_pop_free (&flow->lpads.free_list);
  596. if (ret)
  597. return (ret);
  598. struct cap_sender sender = { .thread = self };
  599. SPINLOCK_GUARD (&flow->lpads.lock);
  600. list_insert_tail (&flow->lpads.waiters, &sender.lnode);
  601. atomic_fence_rel ();
  602. while ((ret = cap_lpad_pop_free (&flow->lpads.free_list)) == NULL)
  603. thread_sleep (&flow->lpads.lock, flow, "flow-send");
  604. list_remove (&sender.lnode);
  605. return (ret);
  606. }
  607. #define CAP_MSG_MASK (IPC_MSG_TRUNC | IPC_MSG_ERROR | IPC_MSG_KERNEL)
  608. #define CAP_MSG_REQ_PAGES 0x1000
  609. static_assert ((CAP_MSG_REQ_PAGES & CAP_MSG_MASK) == 0,
  610. "CAP_MSG_REQ_PAGES must not intersect message mask");
  611. static ssize_t
  612. cap_sender_impl (struct cap_flow *flow, uintptr_t tag, struct cap_iters *in,
  613. struct cap_iters *out, struct ipc_msg_data *data,
  614. uint32_t xflags, struct cap_base *src)
  615. {
  616. struct thread *self = thread_self ();
  617. _Auto lpad = cap_flow_pop_lpad (flow, self);
  618. uint32_t dirf = IPC_COPY_TO | IPC_CHECK_REMOTE |
  619. ((xflags & IPC_MSG_KERNEL) ? 0 : IPC_CHECK_LOCAL);
  620. cap_ipc_msg_data_init (&lpad->mdata, tag);
  621. ssize_t nb = cap_transfer_iters (lpad->task, &lpad->in_it, in,
  622. dirf, &lpad->mdata.bytes_recv);
  623. lpad->mdata.flags |= (xflags & CAP_MSG_MASK) | (nb < 0 ? IPC_MSG_ERROR : 0);
  624. lpad->cur_in = in;
  625. lpad->cur_out = out;
  626. lpad->xflags = xflags & ~CAP_MSG_MASK;
  627. struct cap_lpad *cur_lpad = self->cur_lpad;
  628. self->cur_lpad = lpad;
  629. cap_fill_ids (&lpad->mdata.thread_id, &lpad->mdata.task_id, self);
  630. lpad->src = src;
  631. // Switch task (also sets the pmap).
  632. cap_task_swap (&lpad->task, self);
  633. user_write_struct ((void *)lpad->ctx[2], &lpad->mdata, sizeof (lpad->mdata));
  634. // Jump to new PC and SP.
  635. uintptr_t prev_stack = *lpad->ctx;
  636. ssize_t ret = cpu_lpad_swap (lpad->ctx, cur_lpad, (void *)flow->entry);
  637. // We're back.
  638. *lpad->ctx = prev_stack;
  639. if (data && user_write_struct (data, &lpad->mdata, sizeof (*data)) != 0)
  640. ret = -EFAULT;
  641. cap_flow_push_lpad (flow, lpad);
  642. self->cur_lpad = cur_lpad;
  643. return (ret);
  644. }
  645. ssize_t
  646. cap_send_iters (struct cap_base *cap, struct cap_iters *in,
  647. struct cap_iters *out, struct ipc_msg_data *data,
  648. uint32_t xflags)
  649. {
  650. struct cap_flow *flow;
  651. uintptr_t tag;
  652. struct ipc_msg_data mdata;
  653. if (! cap)
  654. return (-EBADF);
  655. switch (cap_type (cap))
  656. {
  657. case CAP_TYPE_FLOW:
  658. flow = (struct cap_flow *)cap;
  659. tag = flow->tag;
  660. break;
  661. case CAP_TYPE_CHANNEL:
  662. flow = ((struct cap_channel *)cap)->flow;
  663. tag = ((struct cap_channel *)cap)->tag;
  664. break;
  665. case CAP_TYPE_THREAD:
  666. return (thread_handle_msg (((struct cap_thread *)cap)->thread,
  667. in, out, &mdata));
  668. case CAP_TYPE_TASK:
  669. return (task_handle_msg (((struct cap_task *)cap)->task,
  670. in, out, &mdata));
  671. case CAP_TYPE_KERNEL:
  672. // TODO: Implement.
  673. default:
  674. return (-EINVAL);
  675. }
  676. return (cap_sender_impl (flow, tag, in, out, data, xflags, cap));
  677. }
  678. ssize_t
  679. cap_pull_iters (struct cap_iters *it, struct ipc_msg_data *mdata)
  680. {
  681. struct cap_lpad *lpad = thread_self()->cur_lpad;
  682. if (! lpad)
  683. return (-EINVAL);
  684. struct ipc_msg_data tmp;
  685. cap_ipc_msg_data_init (&tmp, lpad->mdata.tag);
  686. ssize_t ret = cap_transfer_iters (lpad->task, lpad->cur_in, it,
  687. IPC_COPY_FROM | IPC_CHECK_BOTH,
  688. &tmp.bytes_recv);
  689. lpad->mdata.bytes_recv += tmp.bytes_recv;
  690. lpad->mdata.vmes_recv += tmp.vmes_recv;
  691. lpad->mdata.caps_recv += tmp.caps_recv;
  692. if (mdata)
  693. user_write_struct (mdata, &tmp, sizeof (tmp));
  694. return (ret);
  695. }
  696. ssize_t
  697. cap_push_iters (struct cap_iters *it, struct ipc_msg_data *mdata)
  698. {
  699. struct cap_lpad *lpad = thread_self()->cur_lpad;
  700. if (! lpad)
  701. return (-EINVAL);
  702. struct ipc_msg_data tmp;
  703. cap_ipc_msg_data_init (&tmp, lpad->mdata.tag);
  704. ssize_t ret = cap_transfer_iters (lpad->task, lpad->cur_out, it,
  705. IPC_COPY_TO | IPC_CHECK_BOTH,
  706. &tmp.bytes_sent);
  707. lpad->mdata.bytes_sent += tmp.bytes_sent;
  708. lpad->mdata.vmes_sent += tmp.vmes_sent;
  709. lpad->mdata.caps_sent += tmp.caps_sent;
  710. if (mdata)
  711. user_write_struct (mdata, &tmp, sizeof (tmp));
  712. return (ret);
  713. }
  714. static void
  715. cap_mdata_swap (struct ipc_msg_data *mdata)
  716. {
  717. SWAP (&mdata->bytes_sent, &mdata->bytes_recv);
  718. SWAP (&mdata->caps_sent, &mdata->caps_recv);
  719. SWAP (&mdata->vmes_sent, &mdata->vmes_recv);
  720. }
  721. static void
  722. cap_lpad_iters_reset (struct cap_lpad *lpad)
  723. {
  724. #define cap_reset_iter(name) \
  725. ipc_##name##_iter_init (&lpad->in_it.name, lpad->in_it.name.begin, \
  726. lpad->in_it.name.end)
  727. cap_reset_iter (iov);
  728. cap_reset_iter (cap);
  729. cap_reset_iter (vme);
  730. #undef cap_reset_iter
  731. lpad->in_it.iov.cur = lpad->nr_cached_iovs;
  732. lpad->in_it.iov.cache_idx = IPC_IOV_ITER_CACHE_SIZE - lpad->nr_cached_iovs;
  733. }
  734. noreturn static void
  735. cap_lpad_return (struct cap_lpad *lpad, struct thread *self, ssize_t rv)
  736. {
  737. cap_lpad_iters_reset (lpad);
  738. cap_task_swap (&lpad->task, self);
  739. cpu_lpad_return (lpad->ctx[0], rv);
  740. }
  741. ssize_t
  742. cap_reply_iters (struct cap_iters *it, int rv)
  743. {
  744. struct thread *self = thread_self ();
  745. struct cap_lpad *lpad = self->cur_lpad;
  746. ssize_t ret;
  747. if (!lpad || lpad->xflags)
  748. return (-EINVAL);
  749. else if (rv >= 0)
  750. {
  751. ret = cap_transfer_iters (lpad->task, lpad->cur_out, it,
  752. IPC_COPY_TO | IPC_CHECK_BOTH,
  753. &lpad->mdata.bytes_sent);
  754. if (ret > 0)
  755. ret = lpad->mdata.bytes_sent;
  756. cap_mdata_swap (&lpad->mdata);
  757. if (!ipc_iov_iter_empty (&it->iov) ||
  758. ipc_vme_iter_size (&it->vme) ||
  759. ipc_cap_iter_size (&it->cap))
  760. lpad->mdata.flags |= IPC_MSG_TRUNC;
  761. }
  762. else
  763. ret = rv;
  764. cap_lpad_return (lpad, self, ret);
  765. }
  766. static void
  767. cap_lpad_fill_cache (struct cap_lpad *lpad, struct ipc_msg *msg)
  768. {
  769. uint32_t nmax = MIN (msg->iov_cnt, IPC_IOV_ITER_CACHE_SIZE);
  770. _Auto outv = lpad->in_it.iov.cache + IPC_IOV_ITER_CACHE_SIZE;
  771. if (likely (user_copy_from (outv - nmax, msg->iovs,
  772. nmax * sizeof (*outv)) == 0))
  773. {
  774. lpad->in_it.iov.cur += nmax;
  775. lpad->in_it.iov.cache_idx = IPC_IOV_ITER_CACHE_SIZE - nmax;
  776. lpad->nr_cached_iovs = nmax;
  777. }
  778. }
  779. int
  780. cap_flow_add_lpad (struct cap_flow *flow, void *stack, size_t size,
  781. struct ipc_msg *msg, struct ipc_msg_data *mdata,
  782. struct cap_thread_info *info __unused)
  783. {
  784. /*
  785. * TODO: The user check for the stack can't be made here (yet),
  786. * as the tests run with blocks that reside in kernel space.
  787. */
  788. struct cap_lpad *entry = kmem_cache_alloc (&cap_lpad_cache);
  789. if (! entry)
  790. return (ENOMEM);
  791. entry->size = size;
  792. entry->ctx[0] = (uintptr_t)stack;
  793. entry->ctx[1] = (uintptr_t)msg;
  794. entry->ctx[2] = (uintptr_t)mdata;
  795. memset (&entry->mdata, 0, sizeof (entry->mdata));
  796. cap_iters_init_msg (&entry->in_it, msg);
  797. cap_lpad_fill_cache (entry, msg);
  798. task_ref (entry->task = task_self ());
  799. cap_flow_push_lpad (flow, entry);
  800. return (0);
  801. }
  802. int
  803. cap_flow_rem_lpad (struct cap_flow *flow, uintptr_t stack, bool unmap)
  804. {
  805. _Auto self = task_self ();
  806. struct cap_lpad *entry;
  807. {
  808. RCU_GUARD ();
  809. for (_Auto pptr = &flow->lpads.free_list ; ; pptr = &entry->next)
  810. {
  811. entry = atomic_load_rlx (pptr);
  812. if (! entry)
  813. return (ESRCH);
  814. else if (entry->task == self &&
  815. (stack == ~(uintptr_t)0 || stack == *entry->ctx))
  816. {
  817. if (!atomic_cas_bool_acq (pptr, entry, entry->next))
  818. return (ESRCH);
  819. break;
  820. }
  821. }
  822. }
  823. int error = stack != ~(uintptr_t)0 || !unmap ? 0 :
  824. vm_map_remove (vm_map_self (), stack, entry->size);
  825. if (! error)
  826. {
  827. rcu_wait ();
  828. task_unref (entry->task);
  829. kmem_cache_free (&cap_lpad_cache, entry);
  830. }
  831. else
  832. cap_flow_push_lpad (flow, entry);
  833. return (error);
  834. }
  835. static int
  836. cap_handle_intr (void *arg)
  837. {
  838. struct list *list = arg;
  839. assert (list >= &cap_intr_handlers[0] &&
  840. list <= &cap_intr_handlers[ARRAY_SIZE (cap_intr_handlers) - 1]);
  841. RCU_GUARD ();
  842. list_rcu_for_each (list, tmp)
  843. {
  844. _Auto alert = list_entry (tmp, struct cap_alert_async, xlink);
  845. SPINLOCK_GUARD (&alert->flow->alerts.lock);
  846. if (++alert->base.k_alert.intr.count == 1)
  847. {
  848. pqueue_insert (&alert->flow->alerts.pending, &alert->base.pnode);
  849. cap_recv_wakeup_fast (alert->flow);
  850. }
  851. }
  852. return (EAGAIN);
  853. }
  854. static int
  855. cap_intr_add (uint32_t intr, struct list *node)
  856. {
  857. assert (intr >= CPU_EXC_INTR_FIRST &&
  858. intr - CPU_EXC_INTR_FIRST < ARRAY_SIZE (cap_intr_handlers));
  859. struct list *list = &cap_intr_handlers[intr - CPU_EXC_INTR_FIRST];
  860. ADAPTIVE_LOCK_GUARD (&cap_intr_lock);
  861. if (list_empty (list))
  862. {
  863. CPU_INTR_GUARD ();
  864. int error = intr_register (intr, cap_handle_intr, list);
  865. if (error)
  866. return (error);
  867. list_rcu_insert_head (list, node);
  868. return (0);
  869. }
  870. list_rcu_insert_head (list, node);
  871. return (0);
  872. }
  873. static void
  874. cap_intr_rem (uint32_t intr, struct list *node)
  875. {
  876. ADAPTIVE_LOCK_GUARD (&cap_intr_lock);
  877. list_rcu_remove (node);
  878. if (list_empty (&cap_intr_handlers[intr - CPU_EXC_INTR_FIRST]))
  879. intr_unregister (intr, cap_handle_intr);
  880. }
  881. static struct cap_alert_async*
  882. cap_alert_async_find (struct cap_flow *flow, int type, int id)
  883. {
  884. struct cap_alert *tmp;
  885. hlist_for_each_entry (&flow->alerts.alloc, tmp, hnode)
  886. if (cap_alert_type (tmp) == type && tmp->k_alert.any_id == id)
  887. return ((void *)tmp);
  888. return (NULL);
  889. }
  890. int
  891. cap_intr_register (struct cap_flow *flow, uint32_t irq)
  892. {
  893. if (irq < CPU_EXC_INTR_FIRST || irq > CPU_EXC_INTR_LAST)
  894. return (EINVAL);
  895. else if (!(flow->base.tflags & CAP_FLOW_HANDLE_INTR))
  896. return (EPERM);
  897. struct cap_alert_async *ap = kmem_cache_alloc (&cap_misc_cache);
  898. if (! ap)
  899. return (ENOMEM);
  900. cap_alert_init_nodes (&ap->base, CAP_ALERT_INTR, CAP_ALERT_INTR_PRIO);
  901. list_node_init (&ap->xlink);
  902. ap->flow = flow;
  903. ap->base.k_alert.type = CAP_ALERT_INTR;
  904. ap->base.k_alert.intr.irq = irq;
  905. ap->base.k_alert.intr.count = 0;
  906. int error = cap_intr_add (irq, &ap->xlink);
  907. if (error)
  908. {
  909. kmem_cache_free (&cap_misc_cache, ap);
  910. return (error);
  911. }
  912. _Auto guard = cap_flow_guard_make (flow);
  913. if (unlikely (cap_alert_async_find (flow, CAP_ALERT_INTR, irq)))
  914. {
  915. cap_flow_guard_fini (&guard);
  916. cap_intr_rem (irq, &ap->xlink);
  917. rcu_wait ();
  918. kmem_cache_free (&cap_misc_cache, ap);
  919. return (EALREADY);
  920. }
  921. hlist_insert_head (&flow->alerts.alloc, &ap->base.hnode);
  922. cap_flow_guard_fini (&guard);
  923. return (0);
  924. }
  925. static int
  926. cap_unregister_impl (struct cap_flow *flow, int type,
  927. uint32_t id, struct cap_alert_async **outp)
  928. {
  929. CAP_FLOW_GUARD (flow);
  930. _Auto entry = cap_alert_async_find (flow, type, id);
  931. if (! entry)
  932. return (ESRCH);
  933. hlist_remove (&entry->base.hnode);
  934. if (!pqueue_node_unlinked (&entry->base.pnode))
  935. pqueue_remove (&flow->alerts.pending, &entry->base.pnode);
  936. *outp = entry;
  937. return (0);
  938. }
  939. int
  940. cap_intr_unregister (struct cap_flow *flow, uint32_t irq)
  941. {
  942. cpu_flags_t flags;
  943. struct cap_alert_async *entry;
  944. cpu_intr_save (&flags);
  945. int error = cap_unregister_impl (flow, CAP_ALERT_INTR, irq, &entry);
  946. if (! error)
  947. {
  948. cap_intr_rem (irq, &entry->xlink);
  949. cpu_intr_restore (flags);
  950. rcu_wait ();
  951. kmem_cache_free (&cap_misc_cache, entry);
  952. }
  953. else
  954. cpu_intr_restore (flags);
  955. return (error);
  956. }
  957. static int
  958. cap_register_task_thread (struct cap_flow *flow, struct kuid_head *kuid,
  959. uint32_t prio, int type, struct bulletin *outp)
  960. {
  961. struct cap_alert_async *ap = kmem_cache_alloc (&cap_misc_cache);
  962. if (! ap)
  963. return (ENOMEM);
  964. cap_alert_init_nodes (&ap->base, type, prio);
  965. list_node_init (&ap->xlink);
  966. ap->flow = flow;
  967. ap->base.k_alert.type = type;
  968. ap->base.k_alert.any_id = kuid->id;
  969. _Auto guard = cap_flow_guard_make (flow);
  970. if (unlikely (cap_alert_async_find (flow, type, kuid->id)))
  971. {
  972. cap_flow_guard_fini (&guard);
  973. kmem_cache_free (&cap_misc_cache, ap);
  974. return (EALREADY);
  975. }
  976. hlist_insert_head (&flow->alerts.alloc, &ap->base.hnode);
  977. spinlock_lock (&outp->lock);
  978. list_insert_tail (&outp->subs, &ap->xlink);
  979. spinlock_unlock (&outp->lock);
  980. cap_flow_guard_fini (&guard);
  981. return (0);
  982. }
  983. static int
  984. cap_task_thread_unregister (struct cap_flow *flow, int type,
  985. int tid, struct bulletin *outp)
  986. {
  987. struct cap_alert_async *entry;
  988. int error = cap_unregister_impl (flow, type, tid, &entry);
  989. if (error)
  990. return (error);
  991. spinlock_lock (&outp->lock);
  992. list_remove (&entry->xlink);
  993. spinlock_unlock (&outp->lock);
  994. kmem_cache_free (&cap_misc_cache, entry);
  995. return (0);
  996. }
  997. int
  998. cap_thread_register (struct cap_flow *flow, struct thread *thr)
  999. {
  1000. if (! thr)
  1001. return (EINVAL);
  1002. return (cap_register_task_thread (flow, &thr->kuid, CAP_ALERT_THREAD_PRIO,
  1003. CAP_ALERT_THREAD_DIED, &thr->dead_subs));
  1004. }
  1005. int
  1006. cap_task_register (struct cap_flow *flow, struct task *task)
  1007. {
  1008. if (! task)
  1009. return (EINVAL);
  1010. return (cap_register_task_thread (flow, &task->kuid, CAP_ALERT_TASK_PRIO,
  1011. CAP_ALERT_TASK_DIED, &task->dead_subs));
  1012. }
  1013. int
  1014. cap_thread_unregister (struct cap_flow *flow, struct thread *thr)
  1015. {
  1016. if (! thr)
  1017. return (EINVAL);
  1018. return (cap_task_thread_unregister (flow, CAP_ALERT_THREAD_DIED,
  1019. thread_id (thr), &thr->dead_subs));
  1020. }
  1021. int
  1022. cap_task_unregister (struct cap_flow *flow, struct task *task)
  1023. {
  1024. if (! task)
  1025. return (EINVAL);
  1026. return (cap_task_thread_unregister (flow, CAP_ALERT_TASK_DIED,
  1027. task_id (task), &task->dead_subs));
  1028. }
  1029. void
  1030. cap_notify_dead (struct bulletin *bulletin)
  1031. {
  1032. struct list dead_subs;
  1033. spinlock_lock (&bulletin->lock);
  1034. list_set_head (&dead_subs, &bulletin->subs);
  1035. list_init (&bulletin->subs);
  1036. spinlock_unlock (&bulletin->lock);
  1037. struct cap_alert_async *ap;
  1038. list_for_each_entry (&dead_subs, ap, xlink)
  1039. {
  1040. _Auto flow = ap->flow;
  1041. CAP_FLOW_GUARD (flow);
  1042. if (!pqueue_node_unlinked (&ap->base.pnode))
  1043. continue;
  1044. pqueue_insert (&flow->alerts.pending, &ap->base.pnode);
  1045. cap_recv_wakeup_fast (flow);
  1046. }
  1047. }
  1048. int
  1049. (cap_intern) (struct cap_base *cap, uint32_t flags)
  1050. {
  1051. return (cap ? cspace_add_free (cspace_self (), cap, flags) : -EINVAL);
  1052. }
  1053. ssize_t
  1054. cap_request_pages (struct cap_channel *chp, uint64_t off,
  1055. uint32_t nr_pages, struct vm_page **pages)
  1056. {
  1057. struct kmessage msg;
  1058. msg.type = KMSG_TYPE_PAGE_REQ;
  1059. msg.msg_flags = 0;
  1060. msg.page_req.start = off;
  1061. msg.page_req.end = off + nr_pages * PAGE_SIZE;
  1062. struct cap_iters in, out;
  1063. cap_iters_init_buf (&in, &msg, sizeof (msg));
  1064. cap_iters_init_buf (&out, pages, nr_pages * sizeof (**pages));
  1065. return (cap_send_iters (CAP (chp), &in, &out, NULL,
  1066. IPC_MSG_KERNEL | CAP_MSG_REQ_PAGES));
  1067. }
  1068. ssize_t
  1069. cap_reply_pagereq (const uintptr_t *usrc, uint32_t cnt)
  1070. {
  1071. _Auto self = thread_self ();
  1072. struct cap_lpad *lpad = self->cur_lpad;
  1073. if (!lpad || !(lpad->xflags & CAP_MSG_REQ_PAGES))
  1074. return (-EINVAL);
  1075. uint32_t npg = lpad->cur_out->iov.head.iov_len / sizeof (struct vm_page);
  1076. if (npg < cnt)
  1077. cnt = npg;
  1078. assert (cnt <= VM_MAP_MAX_FRAMES);
  1079. uintptr_t src[VM_MAP_MAX_FRAMES];
  1080. if (user_copy_from (src, usrc, cnt * sizeof (*usrc)) != 0)
  1081. return (-EFAULT);
  1082. struct vm_page **pages = lpad->cur_out->iov.head.iov_base;
  1083. int rv = vm_map_reply_pagereq (src, cnt, pages);
  1084. if (rv < 0)
  1085. return (rv);
  1086. cap_lpad_return (lpad, self, rv);
  1087. }
  1088. static struct vm_object*
  1089. cap_channel_load_vmobj (struct cap_channel *chp)
  1090. {
  1091. RCU_GUARD ();
  1092. _Auto prev = atomic_load_rlx (&chp->vmobj);
  1093. return (!prev || vm_object_tryref (prev) ? prev : NULL);
  1094. }
  1095. struct vm_object*
  1096. cap_channel_get_vmobj (struct cap_channel *chp)
  1097. {
  1098. uint32_t flags = VM_OBJECT_EXTERNAL |
  1099. ((chp->flow->base.tflags & CAP_FLOW_PAGER_FLUSHES) ?
  1100. VM_OBJECT_FLUSHES : 0);
  1101. while (1)
  1102. {
  1103. _Auto prev = cap_channel_load_vmobj (chp);
  1104. if (prev)
  1105. return (prev);
  1106. struct vm_object *obj;
  1107. if (vm_object_create (&obj, flags, chp) != 0)
  1108. // We couldn't create the object but maybe someone else could.
  1109. return (cap_channel_load_vmobj (chp));
  1110. else if (atomic_cas_bool_acq (&chp->vmobj, NULL, obj))
  1111. {
  1112. cap_base_acq (chp);
  1113. return (obj);
  1114. }
  1115. vm_object_destroy (obj);
  1116. }
  1117. }
  1118. void
  1119. cap_channel_put_vmobj (struct cap_channel *chp)
  1120. {
  1121. rcu_read_enter ();
  1122. _Auto prev = atomic_load_rlx (&chp->vmobj);
  1123. if (prev && vm_object_unref_nofree (prev, 1))
  1124. {
  1125. atomic_store_rel (&chp->vmobj, NULL);
  1126. rcu_read_leave ();
  1127. vm_object_destroy (prev);
  1128. }
  1129. else
  1130. rcu_read_leave ();
  1131. }
  1132. bool
  1133. cap_channel_mark_shared (struct cap_base *cap)
  1134. {
  1135. while (1)
  1136. {
  1137. uintptr_t tmp = atomic_load_rlx (&cap->tflags);
  1138. if (tmp & CAP_CHANNEL_SHARED)
  1139. return (false);
  1140. else if (atomic_cas_bool_acq_rel (&cap->tflags, tmp,
  1141. tmp | CAP_CHANNEL_SHARED))
  1142. return (true);
  1143. atomic_spin_nop ();
  1144. }
  1145. }
  1146. static size_t
  1147. cap_get_max (const size_t *args, size_t n)
  1148. {
  1149. size_t ret = *args;
  1150. for (size_t i = 1; i < n; ++i)
  1151. if (args[i] > ret)
  1152. ret = args[i];
  1153. return (ret);
  1154. }
  1155. #define CAP_MAX(...) \
  1156. ({ \
  1157. const size_t args_[] = { __VA_ARGS__ }; \
  1158. cap_get_max (args_, ARRAY_SIZE (args_)); \
  1159. })
  1160. static int __init
  1161. cap_setup (void)
  1162. {
  1163. // Every capability type but flows are allocated from the same cache.
  1164. #define SZ(type) sizeof (struct cap_##type)
  1165. #define AL(type) alignof (struct cap_##type)
  1166. size_t size = CAP_MAX (SZ (task), SZ (thread), SZ (channel),
  1167. SZ (kernel), SZ (alert_async));
  1168. size_t alignment = CAP_MAX (AL (task), AL (thread), AL (channel),
  1169. AL (kernel), AL (alert_async));
  1170. kmem_cache_init (&cap_misc_cache, "cap_misc", size, alignment, NULL, 0);
  1171. kmem_cache_init (&cap_lpad_cache, "cap_lpad",
  1172. sizeof (struct cap_lpad), 0, NULL, 0);
  1173. kmem_cache_init (&cap_flow_cache, "cap_flow",
  1174. sizeof (struct cap_flow), 0, NULL, 0);
  1175. adaptive_lock_init (&cap_intr_lock);
  1176. for (size_t i = 0; i < ARRAY_SIZE (cap_intr_handlers); ++i)
  1177. list_init (&cap_intr_handlers[i]);
  1178. return (0);
  1179. }
  1180. INIT_OP_DEFINE (cap_setup,
  1181. INIT_OP_DEP (intr_setup, true),
  1182. INIT_OP_DEP (kmem_setup, true));
  1183. #ifdef CONFIG_SHELL
  1184. #include <kern/panic.h>
  1185. static void
  1186. cap_shell_info (struct shell *shell, int argc, char **argv)
  1187. {
  1188. _Auto stream = shell->stream;
  1189. if (argc < 2)
  1190. {
  1191. stream_puts (stream, "usage: cap_info task\n");
  1192. return;
  1193. }
  1194. const _Auto task = task_lookup (argv[1]);
  1195. if (! task)
  1196. {
  1197. stream_puts (stream, "cap_info: task not found\n");
  1198. return;
  1199. }
  1200. fmt_xprintf (stream, "capabilities:\nindex\ttype\textra\n");
  1201. ADAPTIVE_LOCK_GUARD (&task->caps.lock);
  1202. struct rdxtree_iter it;
  1203. struct cap_base *cap;
  1204. rdxtree_for_each (&task->caps.tree, &it, cap)
  1205. {
  1206. fmt_xprintf (stream, "%llu\t", it.key);
  1207. switch (cap_type (cap))
  1208. {
  1209. case CAP_TYPE_CHANNEL:
  1210. fmt_xprintf (stream, "channel\t{tag: %lu}\n",
  1211. ((struct cap_channel *)cap)->tag);
  1212. break;
  1213. case CAP_TYPE_FLOW:
  1214. fmt_xprintf (stream, "flow\t{entry: %lu}\n",
  1215. ((struct cap_flow *)cap)->entry);
  1216. break;
  1217. case CAP_TYPE_TASK:
  1218. fmt_xprintf (stream, "task\t{task: %s}\n",
  1219. ((struct cap_task *)cap)->task->name);
  1220. break;
  1221. case CAP_TYPE_THREAD:
  1222. fmt_xprintf (stream, "thread\t{thread: %s}\n",
  1223. ((struct cap_thread *)cap)->thread->name);
  1224. break;
  1225. case CAP_TYPE_KERNEL:
  1226. fmt_xprintf (stream, "kernel\t{kind: %d}\n",
  1227. ((struct cap_kernel *)cap)->kind);
  1228. break;
  1229. default:
  1230. panic ("unknown capability type: %u\n", cap_type (cap));
  1231. }
  1232. }
  1233. task_unref (task);
  1234. }
  1235. static struct shell_cmd cap_shell_cmds[] =
  1236. {
  1237. SHELL_CMD_INITIALIZER ("cap_info", cap_shell_info,
  1238. "cap_info <task_name>",
  1239. "display capabilities of a task"),
  1240. };
  1241. static int __init
  1242. cap_setup_shell (void)
  1243. {
  1244. SHELL_REGISTER_CMDS (cap_shell_cmds, shell_get_main_cmd_set ());
  1245. return (0);
  1246. }
  1247. INIT_OP_DEFINE (cap_setup_shell,
  1248. INIT_OP_DEP (printf_setup, true),
  1249. INIT_OP_DEP (shell_setup, true),
  1250. INIT_OP_DEP (task_setup, true),
  1251. INIT_OP_DEP (cap_setup, true));
  1252. #endif