capability.c 39 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505
  1. /*
  2. * Copyright (c) 2023 Agustina Arzille.
  3. *
  4. * This program is free software: you can redistribute it and/or modify
  5. * it under the terms of the GNU General Public License as published by
  6. * the Free Software Foundation, either version 3 of the License, or
  7. * (at your option) any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  16. *
  17. */
  18. #include <kern/capability.h>
  19. #include <kern/cspace.h>
  20. #include <kern/intr.h>
  21. #include <kern/kmem.h>
  22. #include <kern/kmessage.h>
  23. #include <kern/rcu.h>
  24. #include <kern/shell.h>
  25. #include <kern/stream.h>
  26. #include <kern/thread.h>
  27. #include <machine/pmap.h>
  28. #include <vm/map.h>
  29. #include <vm/page.h>
  30. #include <stdio.h>
  31. struct cap_alert
  32. {
  33. union
  34. {
  35. struct
  36. { // Valid for user alerts and when not pending.
  37. int task_id;
  38. int thread_id;
  39. uintptr_t tag;
  40. };
  41. struct hlist_node hnode;
  42. };
  43. struct pqueue_node pnode;
  44. union
  45. {
  46. char payload[CAP_ALERT_SIZE];
  47. struct cap_kern_alert k_alert;
  48. };
  49. };
  50. #define CAP_F(name) OFFSETOF (struct ipc_msg_data, name)
  51. static_assert (CAP_F (caps_recv) - CAP_F (bytes_recv) ==
  52. CAP_F (caps_sent) - CAP_F (bytes_sent) &&
  53. CAP_F (vmes_recv) - CAP_F (bytes_recv) ==
  54. CAP_F (vmes_sent) - CAP_F (bytes_sent),
  55. "invalid layout for struct ipc_msg_data");
  56. #define CAP_VMES_OFF (CAP_F (vmes_recv) - CAP_F (bytes_recv))
  57. #define CAP_CAPS_OFF (CAP_F (caps_recv) - CAP_F (bytes_recv))
  58. // An alert as generated by a kernel event (e.g: a task died).
  59. struct cap_alert_async
  60. {
  61. struct cap_alert base;
  62. struct list xlink;
  63. struct cap_flow *flow;
  64. };
  65. /*
  66. * Landing pads represent the environment on which foreign threads begin
  67. * their execution once the sending of a message is done.
  68. *
  69. * When a thread initiates message passing, a landing pad is added to
  70. * an internal list. Messaging operations update the iterators and
  71. * metadata that is contained within the "current" landing pad.
  72. */
  73. struct cap_lpad
  74. {
  75. struct cap_base *src;
  76. struct cap_lpad *next;
  77. struct task *task;
  78. size_t size;
  79. uintptr_t ctx[3]; // SP and function arguments.
  80. struct ipc_msg_data mdata;
  81. uint16_t nr_cached_iovs;
  82. uint16_t xflags;
  83. struct cap_iters in_it;
  84. struct cap_iters *cur_in;
  85. struct cap_iters *cur_out;
  86. };
  87. // A thread waiting on 'cap_recv_alert'.
  88. struct cap_receiver
  89. {
  90. struct list lnode;
  91. struct thread *thread;
  92. void *buf;
  93. struct ipc_msg_data mdata;
  94. bool spurious;
  95. };
  96. // A thread waiting for a landing pad to be available.
  97. struct cap_sender
  98. {
  99. struct list lnode;
  100. struct thread *thread;
  101. };
  102. static struct kmem_cache cap_flow_cache;
  103. static struct kmem_cache cap_misc_cache;
  104. static struct kmem_cache cap_lpad_cache;
  105. static struct list cap_intr_handlers[CPU_INTR_TABLE_SIZE];
  106. static struct adaptive_lock cap_intr_lock;
  107. // Priorities for kernel-generated alerts.
  108. #define CAP_ALERT_TASK_PRIO ((THREAD_SCHED_RT_PRIO_MAX + 2) << 1)
  109. #define CAP_ALERT_THREAD_PRIO (CAP_ALERT_TASK_PRIO << 1)
  110. #define CAP_ALERT_INTR_PRIO (CAP_ALERT_THREAD_PRIO << 1)
  111. #define CAP_ALERT_CHANNEL_PRIO (1u)
  112. #define CAP_CHANNEL_SHARED 0x01
  113. #define CAP_FROM_SREF(ptr, type) structof (ptr, type, base.sref)
  114. // Forward declarations.
  115. static void cap_recv_wakeup_fast (struct cap_flow *);
  116. static void cap_intr_rem (uint32_t irq, struct list *link);
  117. static void
  118. cap_base_init (struct cap_base *base, uint32_t type, sref_noref_fn_t noref)
  119. {
  120. assert (type < CAP_TYPE_MAX);
  121. base->tflags = ((uintptr_t)type << (sizeof (uintptr_t) * 8 - 8));
  122. sref_counter_init (&base->sref, 1, NULL, noref);
  123. }
  124. static void
  125. cap_task_fini (struct sref_counter *sref)
  126. {
  127. _Auto tp = CAP_FROM_SREF (sref, struct cap_task);
  128. task_unref (tp->task);
  129. kmem_cache_free (&cap_misc_cache, tp);
  130. }
  131. int
  132. cap_task_create (struct cap_task **outp, struct task *task)
  133. {
  134. struct cap_task *ret = kmem_cache_alloc (&cap_misc_cache);
  135. if (! ret)
  136. return (ENOMEM);
  137. cap_base_init (&ret->base, CAP_TYPE_TASK, cap_task_fini);
  138. task_ref (task);
  139. ret->task = task;
  140. *outp = ret;
  141. return (0);
  142. }
  143. static void
  144. cap_thread_fini (struct sref_counter *sref)
  145. {
  146. _Auto tp = CAP_FROM_SREF (sref, struct cap_thread);
  147. thread_unref (tp->thread);
  148. kmem_cache_free (&cap_misc_cache, tp);
  149. }
  150. int
  151. cap_thread_create (struct cap_thread **outp, struct thread *thread)
  152. {
  153. struct cap_thread *ret = kmem_cache_alloc (&cap_misc_cache);
  154. if (! ret)
  155. return (ENOMEM);
  156. cap_base_init (&ret->base, CAP_TYPE_THREAD, cap_thread_fini);
  157. thread_ref (thread);
  158. ret->thread = thread;
  159. *outp = ret;
  160. return (0);
  161. }
  162. static struct spinlock_guard
  163. cap_flow_guard_make (struct cap_flow *flow)
  164. {
  165. bool save_intr = (flow->base.tflags & CAP_FLOW_HANDLE_INTR) != 0;
  166. return (spinlock_guard_make (&flow->alerts.lock, save_intr));
  167. }
  168. #define cap_flow_guard_lock spinlock_guard_lock
  169. #define cap_flow_guard_fini spinlock_guard_fini
  170. static int
  171. cap_alert_type (const struct cap_alert *alert)
  172. {
  173. return (alert->pnode.extra);
  174. }
  175. static void
  176. cap_alert_init_nodes (struct cap_alert *alert, uint32_t type, uint32_t prio)
  177. {
  178. pqueue_node_init (&alert->pnode, prio);
  179. alert->pnode.extra = type;
  180. hlist_node_init (&alert->hnode);
  181. }
  182. #define CAP_FLOW_GUARD(flow) \
  183. CLEANUP (cap_flow_guard_fini) _Auto __unused UNIQ (cfg) = \
  184. cap_flow_guard_make (flow)
  185. static void
  186. cap_channel_fini (struct sref_counter *sref)
  187. {
  188. _Auto chp = CAP_FROM_SREF (sref, struct cap_channel);
  189. _Auto flow = chp->flow;
  190. uintptr_t tag = chp->tag;
  191. // Mutate the type.
  192. struct cap_alert *alert __attribute__ ((may_alias)) = (void *)chp;
  193. alert->k_alert.type = CAP_ALERT_CHAN_CLOSED;
  194. alert->k_alert.tag = tag;
  195. cap_alert_init_nodes (alert, CAP_ALERT_CHAN_CLOSED, CAP_ALERT_CHANNEL_PRIO);
  196. _Auto guard = cap_flow_guard_make (flow);
  197. hlist_insert_head (&flow->alerts.alloc, &alert->hnode);
  198. pqueue_insert (&flow->alerts.pending, &alert->pnode);
  199. cap_recv_wakeup_fast (flow);
  200. cap_flow_guard_fini (&guard);
  201. cap_base_rel (flow);
  202. }
  203. int
  204. cap_channel_create (struct cap_channel **outp, struct cap_flow *flow,
  205. uintptr_t tag)
  206. {
  207. struct cap_channel *ret = kmem_cache_alloc (&cap_misc_cache);
  208. if (! ret)
  209. return (ENOMEM);
  210. cap_base_init (&ret->base, CAP_TYPE_CHANNEL, cap_channel_fini);
  211. if (flow)
  212. cap_base_acq (flow);
  213. ret->flow = flow;
  214. ret->tag = tag;
  215. ret->vmobj = NULL;
  216. *outp = ret;
  217. return (0);
  218. }
  219. static void
  220. cap_task_thread_rem (int id, int type, struct list *link)
  221. {
  222. _Auto kuid = kuid_find (id, type == CAP_ALERT_THREAD_DIED ?
  223. KUID_THREAD : KUID_TASK);
  224. #define cap_unlink_alert(obj, type, unref) \
  225. do \
  226. { \
  227. _Auto ptr = structof (obj, type, kuid); \
  228. spinlock_lock (&ptr->dead_subs.lock); \
  229. list_remove (link); \
  230. spinlock_unlock (&ptr->dead_subs.lock); \
  231. unref (ptr); \
  232. } \
  233. while (0)
  234. if (! kuid)
  235. return;
  236. else if (type == CAP_ALERT_THREAD_DIED)
  237. cap_unlink_alert (kuid, struct thread, thread_unref);
  238. else
  239. cap_unlink_alert (kuid, struct task, task_unref);
  240. #undef cap_unlink_alert
  241. }
  242. static void
  243. cap_alert_free (struct cap_alert *alert)
  244. {
  245. _Auto async = (struct cap_alert_async *)alert;
  246. _Auto k_alert = &alert->k_alert;
  247. int type = cap_alert_type (alert);
  248. if (type == CAP_ALERT_INTR)
  249. cap_intr_rem (k_alert->intr.irq, &async->xlink);
  250. else if (type == CAP_ALERT_THREAD_DIED || type == CAP_ALERT_TASK_DIED)
  251. cap_task_thread_rem (k_alert->any_id, type, &async->xlink);
  252. kmem_cache_free (&cap_misc_cache, alert);
  253. }
  254. static void
  255. cap_flow_fini (struct sref_counter *sref)
  256. {
  257. _Auto flow = CAP_FROM_SREF (sref, struct cap_flow);
  258. struct cap_alert *alert, *tmp;
  259. pqueue_for_each_entry_safe (&flow->alerts.pending, alert, tmp, pnode)
  260. if (cap_alert_type (alert) == CAP_ALERT_USER)
  261. kmem_cache_free (&cap_misc_cache, alert);
  262. hlist_for_each_entry_safe (&flow->alerts.alloc, alert, tmp, hnode)
  263. cap_alert_free (alert);
  264. for (_Auto lpad = flow->lpads.free_list; lpad; )
  265. {
  266. _Auto next = lpad->next;
  267. task_unref (lpad->task);
  268. kmem_cache_free (&cap_lpad_cache, lpad);
  269. lpad = next;
  270. }
  271. kmem_cache_free (&cap_flow_cache, flow);
  272. }
  273. #define CAP_FLOW_VALID_FLAGS \
  274. (CAP_FLOW_HANDLE_INTR | CAP_FLOW_EXT_PAGER | CAP_FLOW_PAGER_FLUSHES)
  275. int
  276. cap_flow_create (struct cap_flow **outp, uint32_t flags,
  277. uintptr_t tag, uintptr_t entry)
  278. {
  279. if (flags & ~CAP_FLOW_VALID_FLAGS)
  280. return (EINVAL);
  281. struct cap_flow *ret = kmem_cache_alloc (&cap_flow_cache);
  282. if (! ret)
  283. return (ENOMEM);
  284. cap_base_init (&ret->base, CAP_TYPE_FLOW, cap_flow_fini);
  285. ret->base.tflags |= flags;
  286. ret->tag = tag;
  287. ret->entry = entry;
  288. spinlock_init (&ret->alerts.lock);
  289. list_init (&ret->alerts.receivers);
  290. hlist_init (&ret->alerts.alloc);
  291. pqueue_init (&ret->alerts.pending);
  292. ret->lpads.free_list = NULL;
  293. list_init (&ret->lpads.waiters);
  294. spinlock_init (&ret->lpads.lock);
  295. *outp = ret;
  296. return (0);
  297. }
  298. int
  299. (cap_get_tag) (const struct cap_base *cap, uintptr_t *tagp)
  300. {
  301. switch (cap_type (cap))
  302. {
  303. case CAP_TYPE_CHANNEL:
  304. *tagp = ((const struct cap_channel *)cap)->tag;
  305. return (0);
  306. case CAP_TYPE_FLOW:
  307. *tagp = ((const struct cap_flow *)cap)->tag;
  308. return (0);
  309. default:
  310. return (EINVAL);
  311. }
  312. }
  313. int
  314. cap_flow_hook (struct cap_channel **outp, struct task *task, int capx)
  315. {
  316. struct cap_base *base = cspace_get (&task->caps, capx);
  317. if (! base)
  318. return (EBADF);
  319. else if (cap_type (base) != CAP_TYPE_FLOW)
  320. {
  321. cap_base_rel (base);
  322. return (EINVAL);
  323. }
  324. _Auto flow = (struct cap_flow *)base;
  325. int ret = cap_channel_create (outp, flow, flow->tag);
  326. cap_base_rel (flow);
  327. return (ret);
  328. }
  329. static void
  330. cap_ipc_msg_data_init (struct ipc_msg_data *data, uintptr_t tag)
  331. {
  332. data->size = sizeof (*data);
  333. data->tag = tag;
  334. data->bytes_recv = data->bytes_sent = 0;
  335. data->flags = 0;
  336. data->vmes_sent = data->caps_sent = 0;
  337. data->vmes_recv = data->caps_recv = 0;
  338. }
  339. /*
  340. * Transfer all 3 iterators between a local and a remote task.
  341. * Updates the metadata if succesful. Returns the number of
  342. * raw bytes transmitted on success; a negative errno value on failure.
  343. */
  344. static ssize_t
  345. cap_transfer_iters (struct task *task, struct cap_iters *r_it,
  346. struct cap_iters *l_it, uint32_t flags, ssize_t *bytesp)
  347. {
  348. ssize_t ret = ipc_iov_iter_copy (task, &r_it->iov, &l_it->iov, flags);
  349. if (ret < 0)
  350. return (ret);
  351. *bytesp += ret;
  352. if (ipc_cap_iter_size (&r_it->cap) && ipc_cap_iter_size (&l_it->cap))
  353. {
  354. int nr_caps = ipc_cap_iter_copy (task, &r_it->cap, &l_it->cap, flags);
  355. if (nr_caps < 0)
  356. return (nr_caps);
  357. *(uint32_t *)((char *)bytesp + CAP_CAPS_OFF) += nr_caps;
  358. }
  359. if (ipc_vme_iter_size (&r_it->vme) && ipc_vme_iter_size (&l_it->vme))
  360. {
  361. int nr_vmes = ipc_vme_iter_copy (task, &r_it->vme, &l_it->vme, flags);
  362. if (nr_vmes < 0)
  363. return (nr_vmes);
  364. *(uint32_t *)((char *)bytesp + CAP_VMES_OFF) += nr_vmes;
  365. }
  366. return (ret);
  367. }
  368. static struct cap_alert*
  369. cap_flow_alloc_alert (struct spinlock_guard *guard, uint32_t flg)
  370. {
  371. cap_flow_guard_fini (guard);
  372. uint32_t alflags = (flg & CAP_ALERT_NONBLOCK) ? 0 : KMEM_ALLOC_SLEEP;
  373. void *ptr = kmem_cache_alloc2 (&cap_misc_cache, alflags);
  374. cap_flow_guard_lock (guard);
  375. return (ptr);
  376. }
  377. static void
  378. cap_receiver_add (struct cap_flow *flow, struct cap_receiver *recv, void *buf)
  379. {
  380. recv->thread = thread_self ();
  381. recv->buf = buf;
  382. recv->spurious = false;
  383. cap_ipc_msg_data_init (&recv->mdata, 0);
  384. list_insert_tail (&flow->alerts.receivers, &recv->lnode);
  385. }
  386. static void
  387. cap_recv_wakeup_fast (struct cap_flow *flow)
  388. {
  389. if (list_empty (&flow->alerts.receivers))
  390. return;
  391. _Auto recv = list_pop (&flow->alerts.receivers, struct cap_receiver, lnode);
  392. recv->spurious = true;
  393. thread_wakeup (recv->thread);
  394. }
  395. static struct cap_alert*
  396. cap_recv_pop_alert (struct cap_flow *flow, void *buf, uint32_t flags,
  397. struct ipc_msg_data *mdata, int *outp,
  398. struct spinlock_guard *guard)
  399. {
  400. if (!pqueue_empty (&flow->alerts.pending))
  401. return (pqueue_pop_entry (&flow->alerts.pending, struct cap_alert, pnode));
  402. else if (flags & CAP_ALERT_NONBLOCK)
  403. {
  404. cap_flow_guard_fini (guard);
  405. *outp = EAGAIN;
  406. return (NULL);
  407. }
  408. struct cap_receiver recv;
  409. cap_receiver_add (flow, &recv, buf);
  410. do
  411. thread_sleep (&flow->alerts.lock, flow, "flow-alert");
  412. while (pqueue_empty (&flow->alerts.pending));
  413. if (recv.spurious)
  414. return (pqueue_pop_entry (&flow->alerts.pending, struct cap_alert, pnode));
  415. cap_flow_guard_fini (guard);
  416. if (recv.mdata.bytes_recv >= 0 && mdata)
  417. {
  418. recv.mdata.bytes_recv = CAP_ALERT_SIZE;
  419. user_write_struct (mdata, &recv.mdata, sizeof (recv.mdata));
  420. }
  421. *outp = recv.mdata.bytes_recv >= 0 ? 0 : (int)-recv.mdata.bytes_recv;
  422. return (NULL);
  423. }
  424. int
  425. cap_recv_alert (struct cap_flow *flow, void *buf,
  426. uint32_t flags, struct ipc_msg_data *mdata)
  427. {
  428. uint32_t ids[2] = { 0, 0 };
  429. uintptr_t tag = 0;
  430. _Auto guard = cap_flow_guard_make (flow);
  431. int error;
  432. _Auto entry = cap_recv_pop_alert (flow, buf, flags, mdata, &error, &guard);
  433. if (! entry)
  434. return (error);
  435. void *payload = entry->payload;
  436. struct cap_kern_alert tmp_alert;
  437. int type = cap_alert_type (entry);
  438. if (type == CAP_ALERT_INTR)
  439. { // Copy into a temp buffer so we may reset the counter.
  440. tmp_alert = entry->k_alert;
  441. entry->k_alert.intr.count = 0;
  442. payload = &tmp_alert;
  443. }
  444. else if (type != CAP_ALERT_USER)
  445. hlist_remove (&entry->hnode);
  446. else
  447. {
  448. ids[0] = entry->task_id;
  449. ids[1] = entry->thread_id;
  450. tag = entry->tag;
  451. }
  452. pqueue_inc (&flow->alerts.pending, 1);
  453. cap_flow_guard_fini (&guard);
  454. if (unlikely (user_copy_to (buf, payload, CAP_ALERT_SIZE) != 0))
  455. {
  456. cap_flow_guard_lock (&guard);
  457. pqueue_insert (&flow->alerts.pending, &entry->pnode);
  458. if (type == CAP_ALERT_INTR)
  459. entry->k_alert.intr.count += tmp_alert.intr.count;
  460. else if (type != CAP_ALERT_USER)
  461. hlist_insert_head (&flow->alerts.alloc, &entry->hnode);
  462. cap_recv_wakeup_fast (flow);
  463. cap_flow_guard_fini (&guard);
  464. return (EFAULT);
  465. }
  466. else if (mdata)
  467. {
  468. struct ipc_msg_data tmp;
  469. cap_ipc_msg_data_init (&tmp, tag);
  470. tmp.bytes_recv = CAP_ALERT_SIZE;
  471. tmp.task_id = ids[0], tmp.thread_id = ids[1];
  472. user_write_struct (mdata, &tmp, sizeof (tmp));
  473. }
  474. return (0);
  475. }
  476. static void
  477. cap_fill_ids (int *thr_idp, int *task_idp, struct thread *thr)
  478. {
  479. *thr_idp = thread_id (thr);
  480. *task_idp = task_id (thr->task);
  481. }
  482. int
  483. (cap_send_alert) (struct cap_base *cap, const void *buf,
  484. uint32_t flags, uint32_t prio)
  485. {
  486. struct cap_flow *flow;
  487. uintptr_t tag;
  488. switch (cap_type (cap))
  489. {
  490. case CAP_TYPE_CHANNEL:
  491. flow = ((struct cap_channel *)cap)->flow;
  492. tag = ((struct cap_channel *)cap)->tag;
  493. break;
  494. case CAP_TYPE_FLOW:
  495. flow = (struct cap_flow *)cap;
  496. tag = flow->tag;
  497. break;
  498. default:
  499. return (EBADF);
  500. }
  501. /*
  502. * Copy into a temporary buffer, since the code below may otherwise
  503. * generate a page fault while holding a spinlock.
  504. */
  505. char abuf[CAP_ALERT_SIZE] = { 0 };
  506. if (user_copy_from (abuf, buf, CAP_ALERT_SIZE) != 0)
  507. return (EFAULT);
  508. struct cap_receiver *recv;
  509. {
  510. CLEANUP (cap_flow_guard_fini) _Auto guard = cap_flow_guard_make (flow);
  511. if (list_empty (&flow->alerts.receivers))
  512. {
  513. _Auto alert = cap_flow_alloc_alert (&guard, flags);
  514. if (! alert)
  515. return (ENOMEM);
  516. memcpy (alert->payload, abuf, CAP_ALERT_SIZE);
  517. cap_alert_init_nodes (alert, CAP_ALERT_USER, prio);
  518. pqueue_insert (&flow->alerts.pending, &alert->pnode);
  519. cap_fill_ids (&alert->thread_id, &alert->task_id, thread_self ());
  520. alert->tag = tag;
  521. /*
  522. * Allocating an alert temporarily drops the flow lock. Since a
  523. * receiver could have been added in the meantime, we need to
  524. * check again before returning.
  525. */
  526. cap_recv_wakeup_fast (flow);
  527. return (0);
  528. }
  529. recv = list_pop (&flow->alerts.receivers, typeof (*recv), lnode);
  530. }
  531. cap_fill_ids (&recv->mdata.thread_id, &recv->mdata.task_id, thread_self ());
  532. recv->mdata.tag = tag;
  533. ssize_t rv = ipc_bcopy (recv->thread->task, recv->buf, sizeof (abuf),
  534. abuf, sizeof (abuf), IPC_COPY_TO | IPC_CHECK_REMOTE);
  535. thread_wakeup (recv->thread);
  536. recv->mdata.bytes_recv = rv;
  537. return (rv < 0 ? (int)-rv : 0);
  538. }
  539. static void
  540. cap_task_swap (struct task **taskp, struct thread *self)
  541. {
  542. cpu_flags_t flags;
  543. thread_preempt_disable_intr_save (&flags);
  544. struct task *xtask = self->xtask;
  545. self->xtask = *taskp;
  546. *taskp = xtask;
  547. pmap_load (self->xtask->map->pmap);
  548. thread_preempt_enable_intr_restore (flags);
  549. }
  550. static void
  551. cap_flow_push_lpad (struct cap_flow *flow, struct cap_lpad *lpad)
  552. {
  553. while (1)
  554. {
  555. _Auto next = lpad->next = atomic_load_rlx (&flow->lpads.free_list);
  556. if (!atomic_cas_bool_rel (&flow->lpads.free_list, next, lpad))
  557. {
  558. atomic_spin_nop ();
  559. continue;
  560. }
  561. atomic_fence_acq ();
  562. if (!next || !list_empty (&flow->lpads.waiters))
  563. {
  564. SPINLOCK_GUARD (&flow->lpads.lock);
  565. if (list_empty (&flow->lpads.waiters))
  566. return;
  567. _Auto sn = list_first_entry (&flow->lpads.waiters,
  568. struct cap_sender, lnode);
  569. thread_wakeup (sn->thread);
  570. }
  571. return;
  572. }
  573. }
  574. static struct cap_lpad*
  575. cap_lpad_pop_free (struct cap_lpad **ptr)
  576. {
  577. RCU_GUARD ();
  578. while (1)
  579. {
  580. _Auto tmp = atomic_load_rlx (ptr);
  581. if (! tmp)
  582. return (tmp);
  583. else if (atomic_cas_bool_acq (ptr, tmp, tmp->next))
  584. {
  585. tmp->next = NULL;
  586. return (tmp);
  587. }
  588. atomic_spin_nop ();
  589. }
  590. }
  591. static struct cap_lpad*
  592. cap_flow_pop_lpad (struct cap_flow *flow, struct thread *self)
  593. {
  594. _Auto ret = cap_lpad_pop_free (&flow->lpads.free_list);
  595. if (ret)
  596. return (ret);
  597. struct cap_sender sender = { .thread = self };
  598. SPINLOCK_GUARD (&flow->lpads.lock);
  599. list_insert_tail (&flow->lpads.waiters, &sender.lnode);
  600. atomic_fence_rel ();
  601. while ((ret = cap_lpad_pop_free (&flow->lpads.free_list)) == NULL)
  602. thread_sleep (&flow->lpads.lock, flow, "flow-send");
  603. list_remove (&sender.lnode);
  604. return (ret);
  605. }
  606. #define CAP_MSG_MASK (IPC_MSG_TRUNC | IPC_MSG_ERROR | IPC_MSG_KERNEL)
  607. #define CAP_MSG_REQ_PAGES 0x1000
  608. static_assert ((CAP_MSG_REQ_PAGES & CAP_MSG_MASK) == 0,
  609. "CAP_MSG_REQ_PAGES must not intersect message mask");
  610. static ssize_t
  611. cap_sender_impl (struct cap_flow *flow, uintptr_t tag, struct cap_iters *in,
  612. struct cap_iters *out, struct ipc_msg_data *data,
  613. uint32_t xflags, struct cap_base *src)
  614. {
  615. struct thread *self = thread_self ();
  616. _Auto lpad = cap_flow_pop_lpad (flow, self);
  617. uint32_t dirf = IPC_COPY_TO | IPC_CHECK_REMOTE |
  618. ((xflags & IPC_MSG_KERNEL) ? 0 : IPC_CHECK_LOCAL);
  619. cap_ipc_msg_data_init (&lpad->mdata, tag);
  620. ssize_t nb = cap_transfer_iters (lpad->task, &lpad->in_it, in,
  621. dirf, &lpad->mdata.bytes_recv);
  622. lpad->mdata.flags |= (xflags & CAP_MSG_MASK) | (nb < 0 ? IPC_MSG_ERROR : 0);
  623. lpad->cur_in = in;
  624. lpad->cur_out = out;
  625. lpad->xflags = xflags & ~CAP_MSG_MASK;
  626. struct cap_lpad *cur_lpad = self->cur_lpad;
  627. self->cur_lpad = lpad;
  628. cap_fill_ids (&lpad->mdata.thread_id, &lpad->mdata.task_id, self);
  629. lpad->src = src;
  630. // Switch task (also sets the pmap).
  631. cap_task_swap (&lpad->task, self);
  632. user_write_struct ((void *)lpad->ctx[2], &lpad->mdata, sizeof (lpad->mdata));
  633. // Jump to new PC and SP.
  634. uintptr_t prev_stack = *lpad->ctx;
  635. ssize_t ret = cpu_lpad_swap (lpad->ctx, cur_lpad, (void *)flow->entry);
  636. // We're back.
  637. *lpad->ctx = prev_stack;
  638. if (data && user_write_struct (data, &lpad->mdata, sizeof (*data)) != 0)
  639. ret = -EFAULT;
  640. cap_flow_push_lpad (flow, lpad);
  641. self->cur_lpad = cur_lpad;
  642. return (ret);
  643. }
  644. ssize_t
  645. cap_send_iters (struct cap_base *cap, struct cap_iters *in,
  646. struct cap_iters *out, struct ipc_msg_data *data,
  647. uint32_t xflags)
  648. {
  649. struct cap_flow *flow;
  650. uintptr_t tag;
  651. struct ipc_msg_data mdata;
  652. if (! cap)
  653. return (-EBADF);
  654. switch (cap_type (cap))
  655. {
  656. case CAP_TYPE_FLOW:
  657. flow = (struct cap_flow *)cap;
  658. tag = flow->tag;
  659. break;
  660. case CAP_TYPE_CHANNEL:
  661. flow = ((struct cap_channel *)cap)->flow;
  662. tag = ((struct cap_channel *)cap)->tag;
  663. break;
  664. case CAP_TYPE_THREAD:
  665. return (thread_handle_msg (((struct cap_thread *)cap)->thread,
  666. in, out, &mdata));
  667. case CAP_TYPE_TASK:
  668. return (task_handle_msg (((struct cap_task *)cap)->task,
  669. in, out, &mdata));
  670. case CAP_TYPE_KERNEL:
  671. // TODO: Implement.
  672. default:
  673. return (-EINVAL);
  674. }
  675. return (cap_sender_impl (flow, tag, in, out, data, xflags, cap));
  676. }
  677. ssize_t
  678. cap_pull_iters (struct cap_iters *it, struct ipc_msg_data *mdata)
  679. {
  680. struct cap_lpad *lpad = thread_self()->cur_lpad;
  681. if (! lpad)
  682. return (-EINVAL);
  683. struct ipc_msg_data tmp;
  684. cap_ipc_msg_data_init (&tmp, lpad->mdata.tag);
  685. ssize_t ret = cap_transfer_iters (lpad->task, lpad->cur_in, it,
  686. IPC_COPY_FROM | IPC_CHECK_BOTH,
  687. &tmp.bytes_recv);
  688. lpad->mdata.bytes_recv += tmp.bytes_recv;
  689. lpad->mdata.vmes_recv += tmp.vmes_recv;
  690. lpad->mdata.caps_recv += tmp.caps_recv;
  691. if (mdata)
  692. user_write_struct (mdata, &tmp, sizeof (tmp));
  693. return (ret);
  694. }
  695. ssize_t
  696. cap_push_iters (struct cap_iters *it, struct ipc_msg_data *mdata)
  697. {
  698. struct cap_lpad *lpad = thread_self()->cur_lpad;
  699. if (! lpad)
  700. return (-EINVAL);
  701. struct ipc_msg_data tmp;
  702. cap_ipc_msg_data_init (&tmp, lpad->mdata.tag);
  703. ssize_t ret = cap_transfer_iters (lpad->task, lpad->cur_out, it,
  704. IPC_COPY_TO | IPC_CHECK_BOTH,
  705. &tmp.bytes_sent);
  706. lpad->mdata.bytes_sent += tmp.bytes_sent;
  707. lpad->mdata.vmes_sent += tmp.vmes_sent;
  708. lpad->mdata.caps_sent += tmp.caps_sent;
  709. if (mdata)
  710. user_write_struct (mdata, &tmp, sizeof (tmp));
  711. return (ret);
  712. }
  713. static void
  714. cap_mdata_swap (struct ipc_msg_data *mdata)
  715. {
  716. SWAP (&mdata->bytes_sent, &mdata->bytes_recv);
  717. SWAP (&mdata->caps_sent, &mdata->caps_recv);
  718. SWAP (&mdata->vmes_sent, &mdata->vmes_recv);
  719. }
  720. static void
  721. cap_lpad_iters_reset (struct cap_lpad *lpad)
  722. {
  723. #define cap_reset_iter(name) \
  724. ipc_##name##_iter_init (&lpad->in_it.name, lpad->in_it.name.begin, \
  725. lpad->in_it.name.end)
  726. cap_reset_iter (iov);
  727. cap_reset_iter (cap);
  728. cap_reset_iter (vme);
  729. #undef cap_reset_iter
  730. lpad->in_it.iov.cur = lpad->nr_cached_iovs;
  731. lpad->in_it.iov.cache_idx = IPC_IOV_ITER_CACHE_SIZE - lpad->nr_cached_iovs;
  732. }
  733. noreturn static void
  734. cap_lpad_return (struct cap_lpad *lpad, struct thread *self, ssize_t rv)
  735. {
  736. cap_lpad_iters_reset (lpad);
  737. cap_task_swap (&lpad->task, self);
  738. cpu_lpad_return (lpad->ctx[0], rv);
  739. }
  740. ssize_t
  741. cap_reply_iters (struct cap_iters *it, int rv)
  742. {
  743. struct thread *self = thread_self ();
  744. struct cap_lpad *lpad = self->cur_lpad;
  745. ssize_t ret;
  746. if (!lpad || lpad->xflags)
  747. return (-EINVAL);
  748. else if (rv >= 0)
  749. {
  750. ret = cap_transfer_iters (lpad->task, lpad->cur_out, it,
  751. IPC_COPY_TO | IPC_CHECK_BOTH,
  752. &lpad->mdata.bytes_sent);
  753. if (ret > 0)
  754. ret = lpad->mdata.bytes_sent;
  755. cap_mdata_swap (&lpad->mdata);
  756. if (!ipc_iov_iter_empty (&it->iov) ||
  757. ipc_vme_iter_size (&it->vme) ||
  758. ipc_cap_iter_size (&it->cap))
  759. lpad->mdata.flags |= IPC_MSG_TRUNC;
  760. }
  761. else
  762. ret = rv;
  763. cap_lpad_return (lpad, self, ret);
  764. }
  765. static void
  766. cap_lpad_fill_cache (struct cap_lpad *lpad, struct ipc_msg *msg)
  767. {
  768. uint32_t nmax = MIN (msg->iov_cnt, IPC_IOV_ITER_CACHE_SIZE);
  769. _Auto outv = lpad->in_it.iov.cache + IPC_IOV_ITER_CACHE_SIZE;
  770. if (likely (user_copy_from (outv - nmax, msg->iovs,
  771. nmax * sizeof (*outv)) == 0))
  772. {
  773. lpad->in_it.iov.cur += nmax;
  774. lpad->in_it.iov.cache_idx = IPC_IOV_ITER_CACHE_SIZE - nmax;
  775. lpad->nr_cached_iovs = nmax;
  776. }
  777. }
  778. int
  779. cap_flow_add_lpad (struct cap_flow *flow, void *stack, size_t size,
  780. struct ipc_msg *msg, struct ipc_msg_data *mdata,
  781. struct cap_thread_info *info __unused)
  782. {
  783. /*
  784. * TODO: The user check for the stack can't be made here (yet),
  785. * as the tests run with blocks that reside in kernel space.
  786. */
  787. struct cap_lpad *entry = kmem_cache_alloc (&cap_lpad_cache);
  788. if (! entry)
  789. return (ENOMEM);
  790. entry->size = size;
  791. entry->ctx[0] = (uintptr_t)stack;
  792. entry->ctx[1] = (uintptr_t)msg;
  793. entry->ctx[2] = (uintptr_t)mdata;
  794. memset (&entry->mdata, 0, sizeof (entry->mdata));
  795. cap_iters_init_msg (&entry->in_it, msg);
  796. cap_lpad_fill_cache (entry, msg);
  797. task_ref (entry->task = task_self ());
  798. cap_flow_push_lpad (flow, entry);
  799. return (0);
  800. }
  801. int
  802. cap_flow_rem_lpad (struct cap_flow *flow, uintptr_t stack, bool unmap)
  803. {
  804. _Auto self = task_self ();
  805. struct cap_lpad *entry;
  806. {
  807. RCU_GUARD ();
  808. for (_Auto pptr = &flow->lpads.free_list ; ; pptr = &entry->next)
  809. {
  810. entry = atomic_load_rlx (pptr);
  811. if (! entry)
  812. return (ESRCH);
  813. else if (entry->task == self &&
  814. (stack == ~(uintptr_t)0 || stack == *entry->ctx))
  815. {
  816. if (!atomic_cas_bool_acq (pptr, entry, entry->next))
  817. return (ESRCH);
  818. break;
  819. }
  820. }
  821. }
  822. int error = stack != ~(uintptr_t)0 || !unmap ? 0 :
  823. vm_map_remove (vm_map_self (), stack, entry->size);
  824. if (! error)
  825. {
  826. rcu_wait ();
  827. task_unref (entry->task);
  828. kmem_cache_free (&cap_lpad_cache, entry);
  829. }
  830. else
  831. cap_flow_push_lpad (flow, entry);
  832. return (error);
  833. }
  834. static int
  835. cap_handle_intr (void *arg)
  836. {
  837. struct list *list = arg;
  838. assert (list >= &cap_intr_handlers[0] &&
  839. list <= &cap_intr_handlers[ARRAY_SIZE (cap_intr_handlers) - 1]);
  840. RCU_GUARD ();
  841. list_rcu_for_each (list, tmp)
  842. {
  843. _Auto alert = list_entry (tmp, struct cap_alert_async, xlink);
  844. SPINLOCK_GUARD (&alert->flow->alerts.lock);
  845. if (++alert->base.k_alert.intr.count == 1)
  846. {
  847. pqueue_insert (&alert->flow->alerts.pending, &alert->base.pnode);
  848. cap_recv_wakeup_fast (alert->flow);
  849. }
  850. }
  851. return (EAGAIN);
  852. }
  853. static int
  854. cap_intr_add (uint32_t intr, struct list *node)
  855. {
  856. assert (intr >= CPU_EXC_INTR_FIRST &&
  857. intr - CPU_EXC_INTR_FIRST < ARRAY_SIZE (cap_intr_handlers));
  858. struct list *list = &cap_intr_handlers[intr - CPU_EXC_INTR_FIRST];
  859. ADAPTIVE_LOCK_GUARD (&cap_intr_lock);
  860. if (list_empty (list))
  861. {
  862. CPU_INTR_GUARD ();
  863. int error = intr_register (intr, cap_handle_intr, list);
  864. if (error)
  865. return (error);
  866. list_rcu_insert_head (list, node);
  867. return (0);
  868. }
  869. list_rcu_insert_head (list, node);
  870. return (0);
  871. }
  872. static void
  873. cap_intr_rem (uint32_t intr, struct list *node)
  874. {
  875. ADAPTIVE_LOCK_GUARD (&cap_intr_lock);
  876. list_rcu_remove (node);
  877. if (list_empty (&cap_intr_handlers[intr - CPU_EXC_INTR_FIRST]))
  878. intr_unregister (intr, cap_handle_intr);
  879. }
  880. static struct cap_alert_async*
  881. cap_alert_async_find (struct cap_flow *flow, int type, int id)
  882. {
  883. struct cap_alert *tmp;
  884. hlist_for_each_entry (&flow->alerts.alloc, tmp, hnode)
  885. if (cap_alert_type (tmp) == type && tmp->k_alert.any_id == id)
  886. return ((void *)tmp);
  887. return (NULL);
  888. }
  889. int
  890. cap_intr_register (struct cap_flow *flow, uint32_t irq)
  891. {
  892. if (irq < CPU_EXC_INTR_FIRST || irq > CPU_EXC_INTR_LAST)
  893. return (EINVAL);
  894. else if (!(flow->base.tflags & CAP_FLOW_HANDLE_INTR))
  895. return (EPERM);
  896. struct cap_alert_async *ap = kmem_cache_alloc (&cap_misc_cache);
  897. if (! ap)
  898. return (ENOMEM);
  899. cap_alert_init_nodes (&ap->base, CAP_ALERT_INTR, CAP_ALERT_INTR_PRIO);
  900. list_node_init (&ap->xlink);
  901. ap->flow = flow;
  902. ap->base.k_alert.type = CAP_ALERT_INTR;
  903. ap->base.k_alert.intr.irq = irq;
  904. ap->base.k_alert.intr.count = 0;
  905. int error = cap_intr_add (irq, &ap->xlink);
  906. if (error)
  907. {
  908. kmem_cache_free (&cap_misc_cache, ap);
  909. return (error);
  910. }
  911. _Auto guard = cap_flow_guard_make (flow);
  912. if (unlikely (cap_alert_async_find (flow, CAP_ALERT_INTR, irq)))
  913. {
  914. cap_flow_guard_fini (&guard);
  915. cap_intr_rem (irq, &ap->xlink);
  916. rcu_wait ();
  917. kmem_cache_free (&cap_misc_cache, ap);
  918. return (EALREADY);
  919. }
  920. hlist_insert_head (&flow->alerts.alloc, &ap->base.hnode);
  921. cap_flow_guard_fini (&guard);
  922. return (0);
  923. }
  924. static int
  925. cap_unregister_impl (struct cap_flow *flow, int type,
  926. uint32_t id, struct cap_alert_async **outp)
  927. {
  928. CAP_FLOW_GUARD (flow);
  929. _Auto entry = cap_alert_async_find (flow, type, id);
  930. if (! entry)
  931. return (ESRCH);
  932. hlist_remove (&entry->base.hnode);
  933. if (!pqueue_node_unlinked (&entry->base.pnode))
  934. pqueue_remove (&flow->alerts.pending, &entry->base.pnode);
  935. *outp = entry;
  936. return (0);
  937. }
  938. int
  939. cap_intr_unregister (struct cap_flow *flow, uint32_t irq)
  940. {
  941. cpu_flags_t flags;
  942. struct cap_alert_async *entry;
  943. cpu_intr_save (&flags);
  944. int error = cap_unregister_impl (flow, CAP_ALERT_INTR, irq, &entry);
  945. if (! error)
  946. {
  947. cap_intr_rem (irq, &entry->xlink);
  948. cpu_intr_restore (flags);
  949. rcu_wait ();
  950. kmem_cache_free (&cap_misc_cache, entry);
  951. }
  952. else
  953. cpu_intr_restore (flags);
  954. return (error);
  955. }
  956. static int
  957. cap_register_task_thread (struct cap_flow *flow, struct kuid_head *kuid,
  958. uint32_t prio, int type, struct bulletin *outp)
  959. {
  960. struct cap_alert_async *ap = kmem_cache_alloc (&cap_misc_cache);
  961. if (! ap)
  962. return (ENOMEM);
  963. cap_alert_init_nodes (&ap->base, type, prio);
  964. list_node_init (&ap->xlink);
  965. ap->flow = flow;
  966. ap->base.k_alert.type = type;
  967. ap->base.k_alert.any_id = kuid->id;
  968. _Auto guard = cap_flow_guard_make (flow);
  969. if (unlikely (cap_alert_async_find (flow, type, kuid->id)))
  970. {
  971. cap_flow_guard_fini (&guard);
  972. kmem_cache_free (&cap_misc_cache, ap);
  973. return (EALREADY);
  974. }
  975. hlist_insert_head (&flow->alerts.alloc, &ap->base.hnode);
  976. spinlock_lock (&outp->lock);
  977. list_insert_tail (&outp->subs, &ap->xlink);
  978. spinlock_unlock (&outp->lock);
  979. cap_flow_guard_fini (&guard);
  980. return (0);
  981. }
  982. static int
  983. cap_task_thread_unregister (struct cap_flow *flow, int type,
  984. int tid, struct bulletin *outp)
  985. {
  986. struct cap_alert_async *entry;
  987. int error = cap_unregister_impl (flow, type, tid, &entry);
  988. if (error)
  989. return (error);
  990. spinlock_lock (&outp->lock);
  991. list_remove (&entry->xlink);
  992. spinlock_unlock (&outp->lock);
  993. kmem_cache_free (&cap_misc_cache, entry);
  994. return (0);
  995. }
  996. int
  997. cap_thread_register (struct cap_flow *flow, struct thread *thr)
  998. {
  999. if (! thr)
  1000. return (EINVAL);
  1001. return (cap_register_task_thread (flow, &thr->kuid, CAP_ALERT_THREAD_PRIO,
  1002. CAP_ALERT_THREAD_DIED, &thr->dead_subs));
  1003. }
  1004. int
  1005. cap_task_register (struct cap_flow *flow, struct task *task)
  1006. {
  1007. if (! task)
  1008. return (EINVAL);
  1009. return (cap_register_task_thread (flow, &task->kuid, CAP_ALERT_TASK_PRIO,
  1010. CAP_ALERT_TASK_DIED, &task->dead_subs));
  1011. }
  1012. int
  1013. cap_thread_unregister (struct cap_flow *flow, struct thread *thr)
  1014. {
  1015. if (! thr)
  1016. return (EINVAL);
  1017. return (cap_task_thread_unregister (flow, CAP_ALERT_THREAD_DIED,
  1018. thread_id (thr), &thr->dead_subs));
  1019. }
  1020. int
  1021. cap_task_unregister (struct cap_flow *flow, struct task *task)
  1022. {
  1023. if (! task)
  1024. return (EINVAL);
  1025. return (cap_task_thread_unregister (flow, CAP_ALERT_TASK_DIED,
  1026. task_id (task), &task->dead_subs));
  1027. }
  1028. void
  1029. cap_notify_dead (struct bulletin *bulletin)
  1030. {
  1031. struct list dead_subs;
  1032. spinlock_lock (&bulletin->lock);
  1033. list_set_head (&dead_subs, &bulletin->subs);
  1034. list_init (&bulletin->subs);
  1035. spinlock_unlock (&bulletin->lock);
  1036. struct cap_alert_async *ap;
  1037. list_for_each_entry (&dead_subs, ap, xlink)
  1038. {
  1039. _Auto flow = ap->flow;
  1040. CAP_FLOW_GUARD (flow);
  1041. if (!pqueue_node_unlinked (&ap->base.pnode))
  1042. continue;
  1043. pqueue_insert (&flow->alerts.pending, &ap->base.pnode);
  1044. cap_recv_wakeup_fast (flow);
  1045. }
  1046. }
  1047. int
  1048. (cap_intern) (struct cap_base *cap, uint32_t flags)
  1049. {
  1050. return (cap ? cspace_add_free (cspace_self (), cap, flags) : -EINVAL);
  1051. }
  1052. ssize_t
  1053. cap_request_pages (struct cap_channel *chp, uint64_t off,
  1054. uint32_t nr_pages, struct vm_page **pages)
  1055. {
  1056. struct kmessage msg;
  1057. msg.type = KMSG_TYPE_PAGE_REQ;
  1058. msg.msg_flags = 0;
  1059. msg.page_req.start = off;
  1060. msg.page_req.end = off + nr_pages * PAGE_SIZE;
  1061. struct cap_iters in, out;
  1062. cap_iters_init_buf (&in, &msg, sizeof (msg));
  1063. cap_iters_init_buf (&out, pages, nr_pages * sizeof (**pages));
  1064. return (cap_send_iters (CAP (chp), &in, &out, NULL,
  1065. IPC_MSG_KERNEL | CAP_MSG_REQ_PAGES));
  1066. }
  1067. ssize_t
  1068. cap_reply_pagereq (const uintptr_t *usrc, uint32_t cnt)
  1069. {
  1070. _Auto self = thread_self ();
  1071. struct cap_lpad *lpad = self->cur_lpad;
  1072. if (!lpad || !(lpad->xflags & CAP_MSG_REQ_PAGES))
  1073. return (-EINVAL);
  1074. uint32_t npg = lpad->cur_out->iov.head.iov_len / sizeof (struct vm_page);
  1075. if (npg < cnt)
  1076. cnt = npg;
  1077. assert (cnt <= VM_MAP_MAX_FRAMES);
  1078. uintptr_t src[VM_MAP_MAX_FRAMES];
  1079. if (user_copy_from (src, usrc, cnt * sizeof (*usrc)) != 0)
  1080. return (-EFAULT);
  1081. struct vm_page **pages = lpad->cur_out->iov.head.iov_base;
  1082. int rv = vm_map_reply_pagereq (src, cnt, pages);
  1083. if (rv < 0)
  1084. return (rv);
  1085. cap_lpad_return (lpad, self, rv);
  1086. }
  1087. static struct vm_object*
  1088. cap_channel_load_vmobj (struct cap_channel *chp)
  1089. {
  1090. RCU_GUARD ();
  1091. _Auto prev = atomic_load_rlx (&chp->vmobj);
  1092. return (!prev || vm_object_tryref (prev) ? prev : NULL);
  1093. }
  1094. struct vm_object*
  1095. cap_channel_get_vmobj (struct cap_channel *chp)
  1096. {
  1097. uint32_t flags = VM_OBJECT_EXTERNAL |
  1098. ((chp->flow->base.tflags & CAP_FLOW_PAGER_FLUSHES) ?
  1099. VM_OBJECT_FLUSHES : 0);
  1100. while (1)
  1101. {
  1102. _Auto prev = cap_channel_load_vmobj (chp);
  1103. if (prev)
  1104. return (prev);
  1105. struct vm_object *obj;
  1106. if (vm_object_create (&obj, flags, chp) != 0)
  1107. // We couldn't create the object but maybe someone else could.
  1108. return (cap_channel_load_vmobj (chp));
  1109. else if (atomic_cas_bool_acq (&chp->vmobj, NULL, obj))
  1110. {
  1111. cap_base_acq (chp);
  1112. return (obj);
  1113. }
  1114. vm_object_destroy (obj);
  1115. }
  1116. }
  1117. void
  1118. cap_channel_put_vmobj (struct cap_channel *chp)
  1119. {
  1120. rcu_read_enter ();
  1121. _Auto prev = atomic_load_rlx (&chp->vmobj);
  1122. if (prev && vm_object_unref_nofree (prev, 1))
  1123. {
  1124. atomic_store_rel (&chp->vmobj, NULL);
  1125. rcu_read_leave ();
  1126. vm_object_destroy (prev);
  1127. }
  1128. else
  1129. rcu_read_leave ();
  1130. }
  1131. bool
  1132. cap_channel_mark_shared (struct cap_base *cap)
  1133. {
  1134. while (1)
  1135. {
  1136. uintptr_t tmp = atomic_load_rlx (&cap->tflags);
  1137. if (tmp & CAP_CHANNEL_SHARED)
  1138. return (false);
  1139. else if (atomic_cas_bool_acq_rel (&cap->tflags, tmp,
  1140. tmp | CAP_CHANNEL_SHARED))
  1141. return (true);
  1142. atomic_spin_nop ();
  1143. }
  1144. }
  1145. static size_t
  1146. cap_get_max (const size_t *args, size_t n)
  1147. {
  1148. size_t ret = *args;
  1149. for (size_t i = 1; i < n; ++i)
  1150. if (args[i] > ret)
  1151. ret = args[i];
  1152. return (ret);
  1153. }
  1154. #define CAP_MAX(...) \
  1155. ({ \
  1156. const size_t args_[] = { __VA_ARGS__ }; \
  1157. cap_get_max (args_, ARRAY_SIZE (args_)); \
  1158. })
  1159. static int __init
  1160. cap_setup (void)
  1161. {
  1162. // Every capability type but flows are allocated from the same cache.
  1163. #define SZ(type) sizeof (struct cap_##type)
  1164. #define AL(type) alignof (struct cap_##type)
  1165. size_t size = CAP_MAX (SZ (task), SZ (thread), SZ (channel),
  1166. SZ (kernel), SZ (alert_async));
  1167. size_t alignment = CAP_MAX (AL (task), AL (thread), AL (channel),
  1168. AL (kernel), AL (alert_async));
  1169. kmem_cache_init (&cap_misc_cache, "cap_misc", size, alignment, NULL, 0);
  1170. kmem_cache_init (&cap_lpad_cache, "cap_lpad",
  1171. sizeof (struct cap_lpad), 0, NULL, 0);
  1172. kmem_cache_init (&cap_flow_cache, "cap_flow",
  1173. sizeof (struct cap_flow), 0, NULL, 0);
  1174. adaptive_lock_init (&cap_intr_lock);
  1175. for (size_t i = 0; i < ARRAY_SIZE (cap_intr_handlers); ++i)
  1176. list_init (&cap_intr_handlers[i]);
  1177. return (0);
  1178. }
  1179. INIT_OP_DEFINE (cap_setup,
  1180. INIT_OP_DEP (intr_setup, true),
  1181. INIT_OP_DEP (kmem_setup, true));
  1182. #ifdef CONFIG_SHELL
  1183. #include <kern/panic.h>
  1184. static void
  1185. cap_shell_info (struct shell *shell, int argc, char **argv)
  1186. {
  1187. _Auto stream = shell->stream;
  1188. if (argc < 2)
  1189. {
  1190. stream_puts (stream, "usage: cap_info task\n");
  1191. return;
  1192. }
  1193. const _Auto task = task_lookup (argv[1]);
  1194. if (! task)
  1195. {
  1196. stream_puts (stream, "cap_info: task not found\n");
  1197. return;
  1198. }
  1199. fmt_xprintf (stream, "capabilities:\nindex\ttype\textra\n");
  1200. ADAPTIVE_LOCK_GUARD (&task->caps.lock);
  1201. struct rdxtree_iter it;
  1202. struct cap_base *cap;
  1203. rdxtree_for_each (&task->caps.tree, &it, cap)
  1204. {
  1205. fmt_xprintf (stream, "%llu\t", it.key);
  1206. switch (cap_type (cap))
  1207. {
  1208. case CAP_TYPE_CHANNEL:
  1209. fmt_xprintf (stream, "channel\t{tag: %lu}\n",
  1210. ((struct cap_channel *)cap)->tag);
  1211. break;
  1212. case CAP_TYPE_FLOW:
  1213. fmt_xprintf (stream, "flow\t{entry: %lu}\n",
  1214. ((struct cap_flow *)cap)->entry);
  1215. break;
  1216. case CAP_TYPE_TASK:
  1217. fmt_xprintf (stream, "task\t{task: %s}\n",
  1218. ((struct cap_task *)cap)->task->name);
  1219. break;
  1220. case CAP_TYPE_THREAD:
  1221. fmt_xprintf (stream, "thread\t{thread: %s}\n",
  1222. ((struct cap_thread *)cap)->thread->name);
  1223. break;
  1224. case CAP_TYPE_KERNEL:
  1225. fmt_xprintf (stream, "kernel\t{kind: %d}\n",
  1226. ((struct cap_kernel *)cap)->kind);
  1227. break;
  1228. default:
  1229. panic ("unknown capability type: %u\n", cap_type (cap));
  1230. }
  1231. }
  1232. task_unref (task);
  1233. }
  1234. static struct shell_cmd cap_shell_cmds[] =
  1235. {
  1236. SHELL_CMD_INITIALIZER ("cap_info", cap_shell_info,
  1237. "cap_info <task_name>",
  1238. "display capabilities of a task"),
  1239. };
  1240. static int __init
  1241. cap_setup_shell (void)
  1242. {
  1243. SHELL_REGISTER_CMDS (cap_shell_cmds, shell_get_main_cmd_set ());
  1244. return (0);
  1245. }
  1246. INIT_OP_DEFINE (cap_setup_shell,
  1247. INIT_OP_DEP (printf_setup, true),
  1248. INIT_OP_DEP (shell_setup, true),
  1249. INIT_OP_DEP (task_setup, true),
  1250. INIT_OP_DEP (cap_setup, true));
  1251. #endif