kcmsock.c 45 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107
  1. /*
  2. * Kernel Connection Multiplexor
  3. *
  4. * Copyright (c) 2016 Tom Herbert <tom@herbertland.com>
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License version 2
  8. * as published by the Free Software Foundation.
  9. */
  10. #include <linux/bpf.h>
  11. #include <linux/errno.h>
  12. #include <linux/errqueue.h>
  13. #include <linux/file.h>
  14. #include <linux/in.h>
  15. #include <linux/kernel.h>
  16. #include <linux/module.h>
  17. #include <linux/net.h>
  18. #include <linux/netdevice.h>
  19. #include <linux/poll.h>
  20. #include <linux/rculist.h>
  21. #include <linux/skbuff.h>
  22. #include <linux/socket.h>
  23. #include <linux/uaccess.h>
  24. #include <linux/workqueue.h>
  25. #include <linux/syscalls.h>
  26. #include <linux/sched/signal.h>
  27. #include <net/kcm.h>
  28. #include <net/netns/generic.h>
  29. #include <net/sock.h>
  30. #include <uapi/linux/kcm.h>
  31. unsigned int kcm_net_id;
  32. static struct kmem_cache *kcm_psockp __read_mostly;
  33. static struct kmem_cache *kcm_muxp __read_mostly;
  34. static struct workqueue_struct *kcm_wq;
  35. static inline struct kcm_sock *kcm_sk(const struct sock *sk)
  36. {
  37. return (struct kcm_sock *)sk;
  38. }
  39. static inline struct kcm_tx_msg *kcm_tx_msg(struct sk_buff *skb)
  40. {
  41. return (struct kcm_tx_msg *)skb->cb;
  42. }
  43. static void report_csk_error(struct sock *csk, int err)
  44. {
  45. csk->sk_err = EPIPE;
  46. csk->sk_error_report(csk);
  47. }
  48. static void kcm_abort_tx_psock(struct kcm_psock *psock, int err,
  49. bool wakeup_kcm)
  50. {
  51. struct sock *csk = psock->sk;
  52. struct kcm_mux *mux = psock->mux;
  53. /* Unrecoverable error in transmit */
  54. spin_lock_bh(&mux->lock);
  55. if (psock->tx_stopped) {
  56. spin_unlock_bh(&mux->lock);
  57. return;
  58. }
  59. psock->tx_stopped = 1;
  60. KCM_STATS_INCR(psock->stats.tx_aborts);
  61. if (!psock->tx_kcm) {
  62. /* Take off psocks_avail list */
  63. list_del(&psock->psock_avail_list);
  64. } else if (wakeup_kcm) {
  65. /* In this case psock is being aborted while outside of
  66. * write_msgs and psock is reserved. Schedule tx_work
  67. * to handle the failure there. Need to commit tx_stopped
  68. * before queuing work.
  69. */
  70. smp_mb();
  71. queue_work(kcm_wq, &psock->tx_kcm->tx_work);
  72. }
  73. spin_unlock_bh(&mux->lock);
  74. /* Report error on lower socket */
  75. report_csk_error(csk, err);
  76. }
  77. /* RX mux lock held. */
  78. static void kcm_update_rx_mux_stats(struct kcm_mux *mux,
  79. struct kcm_psock *psock)
  80. {
  81. STRP_STATS_ADD(mux->stats.rx_bytes,
  82. psock->strp.stats.bytes -
  83. psock->saved_rx_bytes);
  84. mux->stats.rx_msgs +=
  85. psock->strp.stats.msgs - psock->saved_rx_msgs;
  86. psock->saved_rx_msgs = psock->strp.stats.msgs;
  87. psock->saved_rx_bytes = psock->strp.stats.bytes;
  88. }
  89. static void kcm_update_tx_mux_stats(struct kcm_mux *mux,
  90. struct kcm_psock *psock)
  91. {
  92. KCM_STATS_ADD(mux->stats.tx_bytes,
  93. psock->stats.tx_bytes - psock->saved_tx_bytes);
  94. mux->stats.tx_msgs +=
  95. psock->stats.tx_msgs - psock->saved_tx_msgs;
  96. psock->saved_tx_msgs = psock->stats.tx_msgs;
  97. psock->saved_tx_bytes = psock->stats.tx_bytes;
  98. }
  99. static int kcm_queue_rcv_skb(struct sock *sk, struct sk_buff *skb);
  100. /* KCM is ready to receive messages on its queue-- either the KCM is new or
  101. * has become unblocked after being blocked on full socket buffer. Queue any
  102. * pending ready messages on a psock. RX mux lock held.
  103. */
  104. static void kcm_rcv_ready(struct kcm_sock *kcm)
  105. {
  106. struct kcm_mux *mux = kcm->mux;
  107. struct kcm_psock *psock;
  108. struct sk_buff *skb;
  109. if (unlikely(kcm->rx_wait || kcm->rx_psock || kcm->rx_disabled))
  110. return;
  111. while (unlikely((skb = __skb_dequeue(&mux->rx_hold_queue)))) {
  112. if (kcm_queue_rcv_skb(&kcm->sk, skb)) {
  113. /* Assuming buffer limit has been reached */
  114. skb_queue_head(&mux->rx_hold_queue, skb);
  115. WARN_ON(!sk_rmem_alloc_get(&kcm->sk));
  116. return;
  117. }
  118. }
  119. while (!list_empty(&mux->psocks_ready)) {
  120. psock = list_first_entry(&mux->psocks_ready, struct kcm_psock,
  121. psock_ready_list);
  122. if (kcm_queue_rcv_skb(&kcm->sk, psock->ready_rx_msg)) {
  123. /* Assuming buffer limit has been reached */
  124. WARN_ON(!sk_rmem_alloc_get(&kcm->sk));
  125. return;
  126. }
  127. /* Consumed the ready message on the psock. Schedule rx_work to
  128. * get more messages.
  129. */
  130. list_del(&psock->psock_ready_list);
  131. psock->ready_rx_msg = NULL;
  132. /* Commit clearing of ready_rx_msg for queuing work */
  133. smp_mb();
  134. strp_unpause(&psock->strp);
  135. strp_check_rcv(&psock->strp);
  136. }
  137. /* Buffer limit is okay now, add to ready list */
  138. list_add_tail(&kcm->wait_rx_list,
  139. &kcm->mux->kcm_rx_waiters);
  140. kcm->rx_wait = true;
  141. }
  142. static void kcm_rfree(struct sk_buff *skb)
  143. {
  144. struct sock *sk = skb->sk;
  145. struct kcm_sock *kcm = kcm_sk(sk);
  146. struct kcm_mux *mux = kcm->mux;
  147. unsigned int len = skb->truesize;
  148. sk_mem_uncharge(sk, len);
  149. atomic_sub(len, &sk->sk_rmem_alloc);
  150. /* For reading rx_wait and rx_psock without holding lock */
  151. smp_mb__after_atomic();
  152. if (!kcm->rx_wait && !kcm->rx_psock &&
  153. sk_rmem_alloc_get(sk) < sk->sk_rcvlowat) {
  154. spin_lock_bh(&mux->rx_lock);
  155. kcm_rcv_ready(kcm);
  156. spin_unlock_bh(&mux->rx_lock);
  157. }
  158. }
  159. static int kcm_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
  160. {
  161. struct sk_buff_head *list = &sk->sk_receive_queue;
  162. if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
  163. return -ENOMEM;
  164. if (!sk_rmem_schedule(sk, skb, skb->truesize))
  165. return -ENOBUFS;
  166. skb->dev = NULL;
  167. skb_orphan(skb);
  168. skb->sk = sk;
  169. skb->destructor = kcm_rfree;
  170. atomic_add(skb->truesize, &sk->sk_rmem_alloc);
  171. sk_mem_charge(sk, skb->truesize);
  172. skb_queue_tail(list, skb);
  173. if (!sock_flag(sk, SOCK_DEAD))
  174. sk->sk_data_ready(sk);
  175. return 0;
  176. }
  177. /* Requeue received messages for a kcm socket to other kcm sockets. This is
  178. * called with a kcm socket is receive disabled.
  179. * RX mux lock held.
  180. */
  181. static void requeue_rx_msgs(struct kcm_mux *mux, struct sk_buff_head *head)
  182. {
  183. struct sk_buff *skb;
  184. struct kcm_sock *kcm;
  185. while ((skb = __skb_dequeue(head))) {
  186. /* Reset destructor to avoid calling kcm_rcv_ready */
  187. skb->destructor = sock_rfree;
  188. skb_orphan(skb);
  189. try_again:
  190. if (list_empty(&mux->kcm_rx_waiters)) {
  191. skb_queue_tail(&mux->rx_hold_queue, skb);
  192. continue;
  193. }
  194. kcm = list_first_entry(&mux->kcm_rx_waiters,
  195. struct kcm_sock, wait_rx_list);
  196. if (kcm_queue_rcv_skb(&kcm->sk, skb)) {
  197. /* Should mean socket buffer full */
  198. list_del(&kcm->wait_rx_list);
  199. kcm->rx_wait = false;
  200. /* Commit rx_wait to read in kcm_free */
  201. smp_wmb();
  202. goto try_again;
  203. }
  204. }
  205. }
  206. /* Lower sock lock held */
  207. static struct kcm_sock *reserve_rx_kcm(struct kcm_psock *psock,
  208. struct sk_buff *head)
  209. {
  210. struct kcm_mux *mux = psock->mux;
  211. struct kcm_sock *kcm;
  212. WARN_ON(psock->ready_rx_msg);
  213. if (psock->rx_kcm)
  214. return psock->rx_kcm;
  215. spin_lock_bh(&mux->rx_lock);
  216. if (psock->rx_kcm) {
  217. spin_unlock_bh(&mux->rx_lock);
  218. return psock->rx_kcm;
  219. }
  220. kcm_update_rx_mux_stats(mux, psock);
  221. if (list_empty(&mux->kcm_rx_waiters)) {
  222. psock->ready_rx_msg = head;
  223. strp_pause(&psock->strp);
  224. list_add_tail(&psock->psock_ready_list,
  225. &mux->psocks_ready);
  226. spin_unlock_bh(&mux->rx_lock);
  227. return NULL;
  228. }
  229. kcm = list_first_entry(&mux->kcm_rx_waiters,
  230. struct kcm_sock, wait_rx_list);
  231. list_del(&kcm->wait_rx_list);
  232. kcm->rx_wait = false;
  233. psock->rx_kcm = kcm;
  234. kcm->rx_psock = psock;
  235. spin_unlock_bh(&mux->rx_lock);
  236. return kcm;
  237. }
  238. static void kcm_done(struct kcm_sock *kcm);
  239. static void kcm_done_work(struct work_struct *w)
  240. {
  241. kcm_done(container_of(w, struct kcm_sock, done_work));
  242. }
  243. /* Lower sock held */
  244. static void unreserve_rx_kcm(struct kcm_psock *psock,
  245. bool rcv_ready)
  246. {
  247. struct kcm_sock *kcm = psock->rx_kcm;
  248. struct kcm_mux *mux = psock->mux;
  249. if (!kcm)
  250. return;
  251. spin_lock_bh(&mux->rx_lock);
  252. psock->rx_kcm = NULL;
  253. kcm->rx_psock = NULL;
  254. /* Commit kcm->rx_psock before sk_rmem_alloc_get to sync with
  255. * kcm_rfree
  256. */
  257. smp_mb();
  258. if (unlikely(kcm->done)) {
  259. spin_unlock_bh(&mux->rx_lock);
  260. /* Need to run kcm_done in a task since we need to qcquire
  261. * callback locks which may already be held here.
  262. */
  263. INIT_WORK(&kcm->done_work, kcm_done_work);
  264. schedule_work(&kcm->done_work);
  265. return;
  266. }
  267. if (unlikely(kcm->rx_disabled)) {
  268. requeue_rx_msgs(mux, &kcm->sk.sk_receive_queue);
  269. } else if (rcv_ready || unlikely(!sk_rmem_alloc_get(&kcm->sk))) {
  270. /* Check for degenerative race with rx_wait that all
  271. * data was dequeued (accounted for in kcm_rfree).
  272. */
  273. kcm_rcv_ready(kcm);
  274. }
  275. spin_unlock_bh(&mux->rx_lock);
  276. }
  277. /* Lower sock lock held */
  278. static void psock_data_ready(struct sock *sk)
  279. {
  280. struct kcm_psock *psock;
  281. read_lock_bh(&sk->sk_callback_lock);
  282. psock = (struct kcm_psock *)sk->sk_user_data;
  283. if (likely(psock))
  284. strp_data_ready(&psock->strp);
  285. read_unlock_bh(&sk->sk_callback_lock);
  286. }
  287. /* Called with lower sock held */
  288. static void kcm_rcv_strparser(struct strparser *strp, struct sk_buff *skb)
  289. {
  290. struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp);
  291. struct kcm_sock *kcm;
  292. try_queue:
  293. kcm = reserve_rx_kcm(psock, skb);
  294. if (!kcm) {
  295. /* Unable to reserve a KCM, message is held in psock and strp
  296. * is paused.
  297. */
  298. return;
  299. }
  300. if (kcm_queue_rcv_skb(&kcm->sk, skb)) {
  301. /* Should mean socket buffer full */
  302. unreserve_rx_kcm(psock, false);
  303. goto try_queue;
  304. }
  305. }
  306. static int kcm_parse_func_strparser(struct strparser *strp, struct sk_buff *skb)
  307. {
  308. struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp);
  309. struct bpf_prog *prog = psock->bpf_prog;
  310. return (*prog->bpf_func)(skb, prog->insnsi);
  311. }
  312. static int kcm_read_sock_done(struct strparser *strp, int err)
  313. {
  314. struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp);
  315. unreserve_rx_kcm(psock, true);
  316. return err;
  317. }
  318. static void psock_state_change(struct sock *sk)
  319. {
  320. /* TCP only does a EPOLLIN for a half close. Do a EPOLLHUP here
  321. * since application will normally not poll with EPOLLIN
  322. * on the TCP sockets.
  323. */
  324. report_csk_error(sk, EPIPE);
  325. }
  326. static void psock_write_space(struct sock *sk)
  327. {
  328. struct kcm_psock *psock;
  329. struct kcm_mux *mux;
  330. struct kcm_sock *kcm;
  331. read_lock_bh(&sk->sk_callback_lock);
  332. psock = (struct kcm_psock *)sk->sk_user_data;
  333. if (unlikely(!psock))
  334. goto out;
  335. mux = psock->mux;
  336. spin_lock_bh(&mux->lock);
  337. /* Check if the socket is reserved so someone is waiting for sending. */
  338. kcm = psock->tx_kcm;
  339. if (kcm && !unlikely(kcm->tx_stopped))
  340. queue_work(kcm_wq, &kcm->tx_work);
  341. spin_unlock_bh(&mux->lock);
  342. out:
  343. read_unlock_bh(&sk->sk_callback_lock);
  344. }
  345. static void unreserve_psock(struct kcm_sock *kcm);
  346. /* kcm sock is locked. */
  347. static struct kcm_psock *reserve_psock(struct kcm_sock *kcm)
  348. {
  349. struct kcm_mux *mux = kcm->mux;
  350. struct kcm_psock *psock;
  351. psock = kcm->tx_psock;
  352. smp_rmb(); /* Must read tx_psock before tx_wait */
  353. if (psock) {
  354. WARN_ON(kcm->tx_wait);
  355. if (unlikely(psock->tx_stopped))
  356. unreserve_psock(kcm);
  357. else
  358. return kcm->tx_psock;
  359. }
  360. spin_lock_bh(&mux->lock);
  361. /* Check again under lock to see if psock was reserved for this
  362. * psock via psock_unreserve.
  363. */
  364. psock = kcm->tx_psock;
  365. if (unlikely(psock)) {
  366. WARN_ON(kcm->tx_wait);
  367. spin_unlock_bh(&mux->lock);
  368. return kcm->tx_psock;
  369. }
  370. if (!list_empty(&mux->psocks_avail)) {
  371. psock = list_first_entry(&mux->psocks_avail,
  372. struct kcm_psock,
  373. psock_avail_list);
  374. list_del(&psock->psock_avail_list);
  375. if (kcm->tx_wait) {
  376. list_del(&kcm->wait_psock_list);
  377. kcm->tx_wait = false;
  378. }
  379. kcm->tx_psock = psock;
  380. psock->tx_kcm = kcm;
  381. KCM_STATS_INCR(psock->stats.reserved);
  382. } else if (!kcm->tx_wait) {
  383. list_add_tail(&kcm->wait_psock_list,
  384. &mux->kcm_tx_waiters);
  385. kcm->tx_wait = true;
  386. }
  387. spin_unlock_bh(&mux->lock);
  388. return psock;
  389. }
  390. /* mux lock held */
  391. static void psock_now_avail(struct kcm_psock *psock)
  392. {
  393. struct kcm_mux *mux = psock->mux;
  394. struct kcm_sock *kcm;
  395. if (list_empty(&mux->kcm_tx_waiters)) {
  396. list_add_tail(&psock->psock_avail_list,
  397. &mux->psocks_avail);
  398. } else {
  399. kcm = list_first_entry(&mux->kcm_tx_waiters,
  400. struct kcm_sock,
  401. wait_psock_list);
  402. list_del(&kcm->wait_psock_list);
  403. kcm->tx_wait = false;
  404. psock->tx_kcm = kcm;
  405. /* Commit before changing tx_psock since that is read in
  406. * reserve_psock before queuing work.
  407. */
  408. smp_mb();
  409. kcm->tx_psock = psock;
  410. KCM_STATS_INCR(psock->stats.reserved);
  411. queue_work(kcm_wq, &kcm->tx_work);
  412. }
  413. }
  414. /* kcm sock is locked. */
  415. static void unreserve_psock(struct kcm_sock *kcm)
  416. {
  417. struct kcm_psock *psock;
  418. struct kcm_mux *mux = kcm->mux;
  419. spin_lock_bh(&mux->lock);
  420. psock = kcm->tx_psock;
  421. if (WARN_ON(!psock)) {
  422. spin_unlock_bh(&mux->lock);
  423. return;
  424. }
  425. smp_rmb(); /* Read tx_psock before tx_wait */
  426. kcm_update_tx_mux_stats(mux, psock);
  427. WARN_ON(kcm->tx_wait);
  428. kcm->tx_psock = NULL;
  429. psock->tx_kcm = NULL;
  430. KCM_STATS_INCR(psock->stats.unreserved);
  431. if (unlikely(psock->tx_stopped)) {
  432. if (psock->done) {
  433. /* Deferred free */
  434. list_del(&psock->psock_list);
  435. mux->psocks_cnt--;
  436. sock_put(psock->sk);
  437. fput(psock->sk->sk_socket->file);
  438. kmem_cache_free(kcm_psockp, psock);
  439. }
  440. /* Don't put back on available list */
  441. spin_unlock_bh(&mux->lock);
  442. return;
  443. }
  444. psock_now_avail(psock);
  445. spin_unlock_bh(&mux->lock);
  446. }
  447. static void kcm_report_tx_retry(struct kcm_sock *kcm)
  448. {
  449. struct kcm_mux *mux = kcm->mux;
  450. spin_lock_bh(&mux->lock);
  451. KCM_STATS_INCR(mux->stats.tx_retries);
  452. spin_unlock_bh(&mux->lock);
  453. }
  454. /* Write any messages ready on the kcm socket. Called with kcm sock lock
  455. * held. Return bytes actually sent or error.
  456. */
  457. static int kcm_write_msgs(struct kcm_sock *kcm)
  458. {
  459. struct sock *sk = &kcm->sk;
  460. struct kcm_psock *psock;
  461. struct sk_buff *skb, *head;
  462. struct kcm_tx_msg *txm;
  463. unsigned short fragidx, frag_offset;
  464. unsigned int sent, total_sent = 0;
  465. int ret = 0;
  466. kcm->tx_wait_more = false;
  467. psock = kcm->tx_psock;
  468. if (unlikely(psock && psock->tx_stopped)) {
  469. /* A reserved psock was aborted asynchronously. Unreserve
  470. * it and we'll retry the message.
  471. */
  472. unreserve_psock(kcm);
  473. kcm_report_tx_retry(kcm);
  474. if (skb_queue_empty(&sk->sk_write_queue))
  475. return 0;
  476. kcm_tx_msg(skb_peek(&sk->sk_write_queue))->sent = 0;
  477. } else if (skb_queue_empty(&sk->sk_write_queue)) {
  478. return 0;
  479. }
  480. head = skb_peek(&sk->sk_write_queue);
  481. txm = kcm_tx_msg(head);
  482. if (txm->sent) {
  483. /* Send of first skbuff in queue already in progress */
  484. if (WARN_ON(!psock)) {
  485. ret = -EINVAL;
  486. goto out;
  487. }
  488. sent = txm->sent;
  489. frag_offset = txm->frag_offset;
  490. fragidx = txm->fragidx;
  491. skb = txm->frag_skb;
  492. goto do_frag;
  493. }
  494. try_again:
  495. psock = reserve_psock(kcm);
  496. if (!psock)
  497. goto out;
  498. do {
  499. skb = head;
  500. txm = kcm_tx_msg(head);
  501. sent = 0;
  502. do_frag_list:
  503. if (WARN_ON(!skb_shinfo(skb)->nr_frags)) {
  504. ret = -EINVAL;
  505. goto out;
  506. }
  507. for (fragidx = 0; fragidx < skb_shinfo(skb)->nr_frags;
  508. fragidx++) {
  509. skb_frag_t *frag;
  510. frag_offset = 0;
  511. do_frag:
  512. frag = &skb_shinfo(skb)->frags[fragidx];
  513. if (WARN_ON(!frag->size)) {
  514. ret = -EINVAL;
  515. goto out;
  516. }
  517. ret = kernel_sendpage(psock->sk->sk_socket,
  518. frag->page.p,
  519. frag->page_offset + frag_offset,
  520. frag->size - frag_offset,
  521. MSG_DONTWAIT);
  522. if (ret <= 0) {
  523. if (ret == -EAGAIN) {
  524. /* Save state to try again when there's
  525. * write space on the socket
  526. */
  527. txm->sent = sent;
  528. txm->frag_offset = frag_offset;
  529. txm->fragidx = fragidx;
  530. txm->frag_skb = skb;
  531. ret = 0;
  532. goto out;
  533. }
  534. /* Hard failure in sending message, abort this
  535. * psock since it has lost framing
  536. * synchonization and retry sending the
  537. * message from the beginning.
  538. */
  539. kcm_abort_tx_psock(psock, ret ? -ret : EPIPE,
  540. true);
  541. unreserve_psock(kcm);
  542. txm->sent = 0;
  543. kcm_report_tx_retry(kcm);
  544. ret = 0;
  545. goto try_again;
  546. }
  547. sent += ret;
  548. frag_offset += ret;
  549. KCM_STATS_ADD(psock->stats.tx_bytes, ret);
  550. if (frag_offset < frag->size) {
  551. /* Not finished with this frag */
  552. goto do_frag;
  553. }
  554. }
  555. if (skb == head) {
  556. if (skb_has_frag_list(skb)) {
  557. skb = skb_shinfo(skb)->frag_list;
  558. goto do_frag_list;
  559. }
  560. } else if (skb->next) {
  561. skb = skb->next;
  562. goto do_frag_list;
  563. }
  564. /* Successfully sent the whole packet, account for it. */
  565. skb_dequeue(&sk->sk_write_queue);
  566. kfree_skb(head);
  567. sk->sk_wmem_queued -= sent;
  568. total_sent += sent;
  569. KCM_STATS_INCR(psock->stats.tx_msgs);
  570. } while ((head = skb_peek(&sk->sk_write_queue)));
  571. out:
  572. if (!head) {
  573. /* Done with all queued messages. */
  574. WARN_ON(!skb_queue_empty(&sk->sk_write_queue));
  575. unreserve_psock(kcm);
  576. }
  577. /* Check if write space is available */
  578. sk->sk_write_space(sk);
  579. return total_sent ? : ret;
  580. }
  581. static void kcm_tx_work(struct work_struct *w)
  582. {
  583. struct kcm_sock *kcm = container_of(w, struct kcm_sock, tx_work);
  584. struct sock *sk = &kcm->sk;
  585. int err;
  586. lock_sock(sk);
  587. /* Primarily for SOCK_DGRAM sockets, also handle asynchronous tx
  588. * aborts
  589. */
  590. err = kcm_write_msgs(kcm);
  591. if (err < 0) {
  592. /* Hard failure in write, report error on KCM socket */
  593. pr_warn("KCM: Hard failure on kcm_write_msgs %d\n", err);
  594. report_csk_error(&kcm->sk, -err);
  595. goto out;
  596. }
  597. /* Primarily for SOCK_SEQPACKET sockets */
  598. if (likely(sk->sk_socket) &&
  599. test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
  600. clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
  601. sk->sk_write_space(sk);
  602. }
  603. out:
  604. release_sock(sk);
  605. }
  606. static void kcm_push(struct kcm_sock *kcm)
  607. {
  608. if (kcm->tx_wait_more)
  609. kcm_write_msgs(kcm);
  610. }
  611. static ssize_t kcm_sendpage(struct socket *sock, struct page *page,
  612. int offset, size_t size, int flags)
  613. {
  614. struct sock *sk = sock->sk;
  615. struct kcm_sock *kcm = kcm_sk(sk);
  616. struct sk_buff *skb = NULL, *head = NULL;
  617. long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
  618. bool eor;
  619. int err = 0;
  620. int i;
  621. if (flags & MSG_SENDPAGE_NOTLAST)
  622. flags |= MSG_MORE;
  623. /* No MSG_EOR from splice, only look at MSG_MORE */
  624. eor = !(flags & MSG_MORE);
  625. lock_sock(sk);
  626. sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
  627. err = -EPIPE;
  628. if (sk->sk_err)
  629. goto out_error;
  630. if (kcm->seq_skb) {
  631. /* Previously opened message */
  632. head = kcm->seq_skb;
  633. skb = kcm_tx_msg(head)->last_skb;
  634. i = skb_shinfo(skb)->nr_frags;
  635. if (skb_can_coalesce(skb, i, page, offset)) {
  636. skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], size);
  637. skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
  638. goto coalesced;
  639. }
  640. if (i >= MAX_SKB_FRAGS) {
  641. struct sk_buff *tskb;
  642. tskb = alloc_skb(0, sk->sk_allocation);
  643. while (!tskb) {
  644. kcm_push(kcm);
  645. err = sk_stream_wait_memory(sk, &timeo);
  646. if (err)
  647. goto out_error;
  648. }
  649. if (head == skb)
  650. skb_shinfo(head)->frag_list = tskb;
  651. else
  652. skb->next = tskb;
  653. skb = tskb;
  654. skb->ip_summed = CHECKSUM_UNNECESSARY;
  655. i = 0;
  656. }
  657. } else {
  658. /* Call the sk_stream functions to manage the sndbuf mem. */
  659. if (!sk_stream_memory_free(sk)) {
  660. kcm_push(kcm);
  661. set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
  662. err = sk_stream_wait_memory(sk, &timeo);
  663. if (err)
  664. goto out_error;
  665. }
  666. head = alloc_skb(0, sk->sk_allocation);
  667. while (!head) {
  668. kcm_push(kcm);
  669. err = sk_stream_wait_memory(sk, &timeo);
  670. if (err)
  671. goto out_error;
  672. }
  673. skb = head;
  674. i = 0;
  675. }
  676. get_page(page);
  677. skb_fill_page_desc(skb, i, page, offset, size);
  678. skb_shinfo(skb)->tx_flags |= SKBTX_SHARED_FRAG;
  679. coalesced:
  680. skb->len += size;
  681. skb->data_len += size;
  682. skb->truesize += size;
  683. sk->sk_wmem_queued += size;
  684. sk_mem_charge(sk, size);
  685. if (head != skb) {
  686. head->len += size;
  687. head->data_len += size;
  688. head->truesize += size;
  689. }
  690. if (eor) {
  691. bool not_busy = skb_queue_empty(&sk->sk_write_queue);
  692. /* Message complete, queue it on send buffer */
  693. __skb_queue_tail(&sk->sk_write_queue, head);
  694. kcm->seq_skb = NULL;
  695. KCM_STATS_INCR(kcm->stats.tx_msgs);
  696. if (flags & MSG_BATCH) {
  697. kcm->tx_wait_more = true;
  698. } else if (kcm->tx_wait_more || not_busy) {
  699. err = kcm_write_msgs(kcm);
  700. if (err < 0) {
  701. /* We got a hard error in write_msgs but have
  702. * already queued this message. Report an error
  703. * in the socket, but don't affect return value
  704. * from sendmsg
  705. */
  706. pr_warn("KCM: Hard failure on kcm_write_msgs\n");
  707. report_csk_error(&kcm->sk, -err);
  708. }
  709. }
  710. } else {
  711. /* Message not complete, save state */
  712. kcm->seq_skb = head;
  713. kcm_tx_msg(head)->last_skb = skb;
  714. }
  715. KCM_STATS_ADD(kcm->stats.tx_bytes, size);
  716. release_sock(sk);
  717. return size;
  718. out_error:
  719. kcm_push(kcm);
  720. err = sk_stream_error(sk, flags, err);
  721. /* make sure we wake any epoll edge trigger waiter */
  722. if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN))
  723. sk->sk_write_space(sk);
  724. release_sock(sk);
  725. return err;
  726. }
  727. static int kcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
  728. {
  729. struct sock *sk = sock->sk;
  730. struct kcm_sock *kcm = kcm_sk(sk);
  731. struct sk_buff *skb = NULL, *head = NULL;
  732. size_t copy, copied = 0;
  733. long timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
  734. int eor = (sock->type == SOCK_DGRAM) ?
  735. !(msg->msg_flags & MSG_MORE) : !!(msg->msg_flags & MSG_EOR);
  736. int err = -EPIPE;
  737. lock_sock(sk);
  738. /* Per tcp_sendmsg this should be in poll */
  739. sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
  740. if (sk->sk_err)
  741. goto out_error;
  742. if (kcm->seq_skb) {
  743. /* Previously opened message */
  744. head = kcm->seq_skb;
  745. skb = kcm_tx_msg(head)->last_skb;
  746. goto start;
  747. }
  748. /* Call the sk_stream functions to manage the sndbuf mem. */
  749. if (!sk_stream_memory_free(sk)) {
  750. kcm_push(kcm);
  751. set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
  752. err = sk_stream_wait_memory(sk, &timeo);
  753. if (err)
  754. goto out_error;
  755. }
  756. if (msg_data_left(msg)) {
  757. /* New message, alloc head skb */
  758. head = alloc_skb(0, sk->sk_allocation);
  759. while (!head) {
  760. kcm_push(kcm);
  761. err = sk_stream_wait_memory(sk, &timeo);
  762. if (err)
  763. goto out_error;
  764. head = alloc_skb(0, sk->sk_allocation);
  765. }
  766. skb = head;
  767. /* Set ip_summed to CHECKSUM_UNNECESSARY to avoid calling
  768. * csum_and_copy_from_iter from skb_do_copy_data_nocache.
  769. */
  770. skb->ip_summed = CHECKSUM_UNNECESSARY;
  771. }
  772. start:
  773. while (msg_data_left(msg)) {
  774. bool merge = true;
  775. int i = skb_shinfo(skb)->nr_frags;
  776. struct page_frag *pfrag = sk_page_frag(sk);
  777. if (!sk_page_frag_refill(sk, pfrag))
  778. goto wait_for_memory;
  779. if (!skb_can_coalesce(skb, i, pfrag->page,
  780. pfrag->offset)) {
  781. if (i == MAX_SKB_FRAGS) {
  782. struct sk_buff *tskb;
  783. tskb = alloc_skb(0, sk->sk_allocation);
  784. if (!tskb)
  785. goto wait_for_memory;
  786. if (head == skb)
  787. skb_shinfo(head)->frag_list = tskb;
  788. else
  789. skb->next = tskb;
  790. skb = tskb;
  791. skb->ip_summed = CHECKSUM_UNNECESSARY;
  792. continue;
  793. }
  794. merge = false;
  795. }
  796. copy = min_t(int, msg_data_left(msg),
  797. pfrag->size - pfrag->offset);
  798. if (!sk_wmem_schedule(sk, copy))
  799. goto wait_for_memory;
  800. err = skb_copy_to_page_nocache(sk, &msg->msg_iter, skb,
  801. pfrag->page,
  802. pfrag->offset,
  803. copy);
  804. if (err)
  805. goto out_error;
  806. /* Update the skb. */
  807. if (merge) {
  808. skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
  809. } else {
  810. skb_fill_page_desc(skb, i, pfrag->page,
  811. pfrag->offset, copy);
  812. get_page(pfrag->page);
  813. }
  814. pfrag->offset += copy;
  815. copied += copy;
  816. if (head != skb) {
  817. head->len += copy;
  818. head->data_len += copy;
  819. }
  820. continue;
  821. wait_for_memory:
  822. kcm_push(kcm);
  823. err = sk_stream_wait_memory(sk, &timeo);
  824. if (err)
  825. goto out_error;
  826. }
  827. if (eor) {
  828. bool not_busy = skb_queue_empty(&sk->sk_write_queue);
  829. if (head) {
  830. /* Message complete, queue it on send buffer */
  831. __skb_queue_tail(&sk->sk_write_queue, head);
  832. kcm->seq_skb = NULL;
  833. KCM_STATS_INCR(kcm->stats.tx_msgs);
  834. }
  835. if (msg->msg_flags & MSG_BATCH) {
  836. kcm->tx_wait_more = true;
  837. } else if (kcm->tx_wait_more || not_busy) {
  838. err = kcm_write_msgs(kcm);
  839. if (err < 0) {
  840. /* We got a hard error in write_msgs but have
  841. * already queued this message. Report an error
  842. * in the socket, but don't affect return value
  843. * from sendmsg
  844. */
  845. pr_warn("KCM: Hard failure on kcm_write_msgs\n");
  846. report_csk_error(&kcm->sk, -err);
  847. }
  848. }
  849. } else {
  850. /* Message not complete, save state */
  851. partial_message:
  852. if (head) {
  853. kcm->seq_skb = head;
  854. kcm_tx_msg(head)->last_skb = skb;
  855. }
  856. }
  857. KCM_STATS_ADD(kcm->stats.tx_bytes, copied);
  858. release_sock(sk);
  859. return copied;
  860. out_error:
  861. kcm_push(kcm);
  862. if (copied && sock->type == SOCK_SEQPACKET) {
  863. /* Wrote some bytes before encountering an
  864. * error, return partial success.
  865. */
  866. goto partial_message;
  867. }
  868. if (head != kcm->seq_skb)
  869. kfree_skb(head);
  870. err = sk_stream_error(sk, msg->msg_flags, err);
  871. /* make sure we wake any epoll edge trigger waiter */
  872. if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && err == -EAGAIN))
  873. sk->sk_write_space(sk);
  874. release_sock(sk);
  875. return err;
  876. }
  877. static struct sk_buff *kcm_wait_data(struct sock *sk, int flags,
  878. long timeo, int *err)
  879. {
  880. struct sk_buff *skb;
  881. while (!(skb = skb_peek(&sk->sk_receive_queue))) {
  882. if (sk->sk_err) {
  883. *err = sock_error(sk);
  884. return NULL;
  885. }
  886. if (sock_flag(sk, SOCK_DONE))
  887. return NULL;
  888. if ((flags & MSG_DONTWAIT) || !timeo) {
  889. *err = -EAGAIN;
  890. return NULL;
  891. }
  892. sk_wait_data(sk, &timeo, NULL);
  893. /* Handle signals */
  894. if (signal_pending(current)) {
  895. *err = sock_intr_errno(timeo);
  896. return NULL;
  897. }
  898. }
  899. return skb;
  900. }
  901. static int kcm_recvmsg(struct socket *sock, struct msghdr *msg,
  902. size_t len, int flags)
  903. {
  904. struct sock *sk = sock->sk;
  905. struct kcm_sock *kcm = kcm_sk(sk);
  906. int err = 0;
  907. long timeo;
  908. struct strp_msg *stm;
  909. int copied = 0;
  910. struct sk_buff *skb;
  911. timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
  912. lock_sock(sk);
  913. skb = kcm_wait_data(sk, flags, timeo, &err);
  914. if (!skb)
  915. goto out;
  916. /* Okay, have a message on the receive queue */
  917. stm = strp_msg(skb);
  918. if (len > stm->full_len)
  919. len = stm->full_len;
  920. err = skb_copy_datagram_msg(skb, stm->offset, msg, len);
  921. if (err < 0)
  922. goto out;
  923. copied = len;
  924. if (likely(!(flags & MSG_PEEK))) {
  925. KCM_STATS_ADD(kcm->stats.rx_bytes, copied);
  926. if (copied < stm->full_len) {
  927. if (sock->type == SOCK_DGRAM) {
  928. /* Truncated message */
  929. msg->msg_flags |= MSG_TRUNC;
  930. goto msg_finished;
  931. }
  932. stm->offset += copied;
  933. stm->full_len -= copied;
  934. } else {
  935. msg_finished:
  936. /* Finished with message */
  937. msg->msg_flags |= MSG_EOR;
  938. KCM_STATS_INCR(kcm->stats.rx_msgs);
  939. skb_unlink(skb, &sk->sk_receive_queue);
  940. kfree_skb(skb);
  941. }
  942. }
  943. out:
  944. release_sock(sk);
  945. return copied ? : err;
  946. }
  947. static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos,
  948. struct pipe_inode_info *pipe, size_t len,
  949. unsigned int flags)
  950. {
  951. struct sock *sk = sock->sk;
  952. struct kcm_sock *kcm = kcm_sk(sk);
  953. long timeo;
  954. struct strp_msg *stm;
  955. int err = 0;
  956. ssize_t copied;
  957. struct sk_buff *skb;
  958. /* Only support splice for SOCKSEQPACKET */
  959. timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
  960. lock_sock(sk);
  961. skb = kcm_wait_data(sk, flags, timeo, &err);
  962. if (!skb)
  963. goto err_out;
  964. /* Okay, have a message on the receive queue */
  965. stm = strp_msg(skb);
  966. if (len > stm->full_len)
  967. len = stm->full_len;
  968. copied = skb_splice_bits(skb, sk, stm->offset, pipe, len, flags);
  969. if (copied < 0) {
  970. err = copied;
  971. goto err_out;
  972. }
  973. KCM_STATS_ADD(kcm->stats.rx_bytes, copied);
  974. stm->offset += copied;
  975. stm->full_len -= copied;
  976. /* We have no way to return MSG_EOR. If all the bytes have been
  977. * read we still leave the message in the receive socket buffer.
  978. * A subsequent recvmsg needs to be done to return MSG_EOR and
  979. * finish reading the message.
  980. */
  981. release_sock(sk);
  982. return copied;
  983. err_out:
  984. release_sock(sk);
  985. return err;
  986. }
  987. /* kcm sock lock held */
  988. static void kcm_recv_disable(struct kcm_sock *kcm)
  989. {
  990. struct kcm_mux *mux = kcm->mux;
  991. if (kcm->rx_disabled)
  992. return;
  993. spin_lock_bh(&mux->rx_lock);
  994. kcm->rx_disabled = 1;
  995. /* If a psock is reserved we'll do cleanup in unreserve */
  996. if (!kcm->rx_psock) {
  997. if (kcm->rx_wait) {
  998. list_del(&kcm->wait_rx_list);
  999. kcm->rx_wait = false;
  1000. }
  1001. requeue_rx_msgs(mux, &kcm->sk.sk_receive_queue);
  1002. }
  1003. spin_unlock_bh(&mux->rx_lock);
  1004. }
  1005. /* kcm sock lock held */
  1006. static void kcm_recv_enable(struct kcm_sock *kcm)
  1007. {
  1008. struct kcm_mux *mux = kcm->mux;
  1009. if (!kcm->rx_disabled)
  1010. return;
  1011. spin_lock_bh(&mux->rx_lock);
  1012. kcm->rx_disabled = 0;
  1013. kcm_rcv_ready(kcm);
  1014. spin_unlock_bh(&mux->rx_lock);
  1015. }
  1016. static int kcm_setsockopt(struct socket *sock, int level, int optname,
  1017. char __user *optval, unsigned int optlen)
  1018. {
  1019. struct kcm_sock *kcm = kcm_sk(sock->sk);
  1020. int val, valbool;
  1021. int err = 0;
  1022. if (level != SOL_KCM)
  1023. return -ENOPROTOOPT;
  1024. if (optlen < sizeof(int))
  1025. return -EINVAL;
  1026. if (get_user(val, (int __user *)optval))
  1027. return -EINVAL;
  1028. valbool = val ? 1 : 0;
  1029. switch (optname) {
  1030. case KCM_RECV_DISABLE:
  1031. lock_sock(&kcm->sk);
  1032. if (valbool)
  1033. kcm_recv_disable(kcm);
  1034. else
  1035. kcm_recv_enable(kcm);
  1036. release_sock(&kcm->sk);
  1037. break;
  1038. default:
  1039. err = -ENOPROTOOPT;
  1040. }
  1041. return err;
  1042. }
  1043. static int kcm_getsockopt(struct socket *sock, int level, int optname,
  1044. char __user *optval, int __user *optlen)
  1045. {
  1046. struct kcm_sock *kcm = kcm_sk(sock->sk);
  1047. int val, len;
  1048. if (level != SOL_KCM)
  1049. return -ENOPROTOOPT;
  1050. if (get_user(len, optlen))
  1051. return -EFAULT;
  1052. len = min_t(unsigned int, len, sizeof(int));
  1053. if (len < 0)
  1054. return -EINVAL;
  1055. switch (optname) {
  1056. case KCM_RECV_DISABLE:
  1057. val = kcm->rx_disabled;
  1058. break;
  1059. default:
  1060. return -ENOPROTOOPT;
  1061. }
  1062. if (put_user(len, optlen))
  1063. return -EFAULT;
  1064. if (copy_to_user(optval, &val, len))
  1065. return -EFAULT;
  1066. return 0;
  1067. }
  1068. static void init_kcm_sock(struct kcm_sock *kcm, struct kcm_mux *mux)
  1069. {
  1070. struct kcm_sock *tkcm;
  1071. struct list_head *head;
  1072. int index = 0;
  1073. /* For SOCK_SEQPACKET sock type, datagram_poll checks the sk_state, so
  1074. * we set sk_state, otherwise epoll_wait always returns right away with
  1075. * EPOLLHUP
  1076. */
  1077. kcm->sk.sk_state = TCP_ESTABLISHED;
  1078. /* Add to mux's kcm sockets list */
  1079. kcm->mux = mux;
  1080. spin_lock_bh(&mux->lock);
  1081. head = &mux->kcm_socks;
  1082. list_for_each_entry(tkcm, &mux->kcm_socks, kcm_sock_list) {
  1083. if (tkcm->index != index)
  1084. break;
  1085. head = &tkcm->kcm_sock_list;
  1086. index++;
  1087. }
  1088. list_add(&kcm->kcm_sock_list, head);
  1089. kcm->index = index;
  1090. mux->kcm_socks_cnt++;
  1091. spin_unlock_bh(&mux->lock);
  1092. INIT_WORK(&kcm->tx_work, kcm_tx_work);
  1093. spin_lock_bh(&mux->rx_lock);
  1094. kcm_rcv_ready(kcm);
  1095. spin_unlock_bh(&mux->rx_lock);
  1096. }
  1097. static int kcm_attach(struct socket *sock, struct socket *csock,
  1098. struct bpf_prog *prog)
  1099. {
  1100. struct kcm_sock *kcm = kcm_sk(sock->sk);
  1101. struct kcm_mux *mux = kcm->mux;
  1102. struct sock *csk;
  1103. struct kcm_psock *psock = NULL, *tpsock;
  1104. struct list_head *head;
  1105. int index = 0;
  1106. static const struct strp_callbacks cb = {
  1107. .rcv_msg = kcm_rcv_strparser,
  1108. .parse_msg = kcm_parse_func_strparser,
  1109. .read_sock_done = kcm_read_sock_done,
  1110. };
  1111. int err = 0;
  1112. csk = csock->sk;
  1113. if (!csk)
  1114. return -EINVAL;
  1115. lock_sock(csk);
  1116. /* Only allow TCP sockets to be attached for now */
  1117. if ((csk->sk_family != AF_INET && csk->sk_family != AF_INET6) ||
  1118. csk->sk_protocol != IPPROTO_TCP) {
  1119. err = -EOPNOTSUPP;
  1120. goto out;
  1121. }
  1122. /* Don't allow listeners or closed sockets */
  1123. if (csk->sk_state == TCP_LISTEN || csk->sk_state == TCP_CLOSE) {
  1124. err = -EOPNOTSUPP;
  1125. goto out;
  1126. }
  1127. psock = kmem_cache_zalloc(kcm_psockp, GFP_KERNEL);
  1128. if (!psock) {
  1129. err = -ENOMEM;
  1130. goto out;
  1131. }
  1132. psock->mux = mux;
  1133. psock->sk = csk;
  1134. psock->bpf_prog = prog;
  1135. err = strp_init(&psock->strp, csk, &cb);
  1136. if (err) {
  1137. kmem_cache_free(kcm_psockp, psock);
  1138. goto out;
  1139. }
  1140. write_lock_bh(&csk->sk_callback_lock);
  1141. /* Check if sk_user_data is aready by KCM or someone else.
  1142. * Must be done under lock to prevent race conditions.
  1143. */
  1144. if (csk->sk_user_data) {
  1145. write_unlock_bh(&csk->sk_callback_lock);
  1146. strp_stop(&psock->strp);
  1147. strp_done(&psock->strp);
  1148. kmem_cache_free(kcm_psockp, psock);
  1149. err = -EALREADY;
  1150. goto out;
  1151. }
  1152. psock->save_data_ready = csk->sk_data_ready;
  1153. psock->save_write_space = csk->sk_write_space;
  1154. psock->save_state_change = csk->sk_state_change;
  1155. csk->sk_user_data = psock;
  1156. csk->sk_data_ready = psock_data_ready;
  1157. csk->sk_write_space = psock_write_space;
  1158. csk->sk_state_change = psock_state_change;
  1159. write_unlock_bh(&csk->sk_callback_lock);
  1160. sock_hold(csk);
  1161. /* Finished initialization, now add the psock to the MUX. */
  1162. spin_lock_bh(&mux->lock);
  1163. head = &mux->psocks;
  1164. list_for_each_entry(tpsock, &mux->psocks, psock_list) {
  1165. if (tpsock->index != index)
  1166. break;
  1167. head = &tpsock->psock_list;
  1168. index++;
  1169. }
  1170. list_add(&psock->psock_list, head);
  1171. psock->index = index;
  1172. KCM_STATS_INCR(mux->stats.psock_attach);
  1173. mux->psocks_cnt++;
  1174. psock_now_avail(psock);
  1175. spin_unlock_bh(&mux->lock);
  1176. /* Schedule RX work in case there are already bytes queued */
  1177. strp_check_rcv(&psock->strp);
  1178. out:
  1179. release_sock(csk);
  1180. return err;
  1181. }
  1182. static int kcm_attach_ioctl(struct socket *sock, struct kcm_attach *info)
  1183. {
  1184. struct socket *csock;
  1185. struct bpf_prog *prog;
  1186. int err;
  1187. csock = sockfd_lookup(info->fd, &err);
  1188. if (!csock)
  1189. return -ENOENT;
  1190. prog = bpf_prog_get_type(info->bpf_fd, BPF_PROG_TYPE_SOCKET_FILTER);
  1191. if (IS_ERR(prog)) {
  1192. err = PTR_ERR(prog);
  1193. goto out;
  1194. }
  1195. err = kcm_attach(sock, csock, prog);
  1196. if (err) {
  1197. bpf_prog_put(prog);
  1198. goto out;
  1199. }
  1200. /* Keep reference on file also */
  1201. return 0;
  1202. out:
  1203. fput(csock->file);
  1204. return err;
  1205. }
  1206. static void kcm_unattach(struct kcm_psock *psock)
  1207. {
  1208. struct sock *csk = psock->sk;
  1209. struct kcm_mux *mux = psock->mux;
  1210. lock_sock(csk);
  1211. /* Stop getting callbacks from TCP socket. After this there should
  1212. * be no way to reserve a kcm for this psock.
  1213. */
  1214. write_lock_bh(&csk->sk_callback_lock);
  1215. csk->sk_user_data = NULL;
  1216. csk->sk_data_ready = psock->save_data_ready;
  1217. csk->sk_write_space = psock->save_write_space;
  1218. csk->sk_state_change = psock->save_state_change;
  1219. strp_stop(&psock->strp);
  1220. if (WARN_ON(psock->rx_kcm)) {
  1221. write_unlock_bh(&csk->sk_callback_lock);
  1222. release_sock(csk);
  1223. return;
  1224. }
  1225. spin_lock_bh(&mux->rx_lock);
  1226. /* Stop receiver activities. After this point psock should not be
  1227. * able to get onto ready list either through callbacks or work.
  1228. */
  1229. if (psock->ready_rx_msg) {
  1230. list_del(&psock->psock_ready_list);
  1231. kfree_skb(psock->ready_rx_msg);
  1232. psock->ready_rx_msg = NULL;
  1233. KCM_STATS_INCR(mux->stats.rx_ready_drops);
  1234. }
  1235. spin_unlock_bh(&mux->rx_lock);
  1236. write_unlock_bh(&csk->sk_callback_lock);
  1237. /* Call strp_done without sock lock */
  1238. release_sock(csk);
  1239. strp_done(&psock->strp);
  1240. lock_sock(csk);
  1241. bpf_prog_put(psock->bpf_prog);
  1242. spin_lock_bh(&mux->lock);
  1243. aggregate_psock_stats(&psock->stats, &mux->aggregate_psock_stats);
  1244. save_strp_stats(&psock->strp, &mux->aggregate_strp_stats);
  1245. KCM_STATS_INCR(mux->stats.psock_unattach);
  1246. if (psock->tx_kcm) {
  1247. /* psock was reserved. Just mark it finished and we will clean
  1248. * up in the kcm paths, we need kcm lock which can not be
  1249. * acquired here.
  1250. */
  1251. KCM_STATS_INCR(mux->stats.psock_unattach_rsvd);
  1252. spin_unlock_bh(&mux->lock);
  1253. /* We are unattaching a socket that is reserved. Abort the
  1254. * socket since we may be out of sync in sending on it. We need
  1255. * to do this without the mux lock.
  1256. */
  1257. kcm_abort_tx_psock(psock, EPIPE, false);
  1258. spin_lock_bh(&mux->lock);
  1259. if (!psock->tx_kcm) {
  1260. /* psock now unreserved in window mux was unlocked */
  1261. goto no_reserved;
  1262. }
  1263. psock->done = 1;
  1264. /* Commit done before queuing work to process it */
  1265. smp_mb();
  1266. /* Queue tx work to make sure psock->done is handled */
  1267. queue_work(kcm_wq, &psock->tx_kcm->tx_work);
  1268. spin_unlock_bh(&mux->lock);
  1269. } else {
  1270. no_reserved:
  1271. if (!psock->tx_stopped)
  1272. list_del(&psock->psock_avail_list);
  1273. list_del(&psock->psock_list);
  1274. mux->psocks_cnt--;
  1275. spin_unlock_bh(&mux->lock);
  1276. sock_put(csk);
  1277. fput(csk->sk_socket->file);
  1278. kmem_cache_free(kcm_psockp, psock);
  1279. }
  1280. release_sock(csk);
  1281. }
  1282. static int kcm_unattach_ioctl(struct socket *sock, struct kcm_unattach *info)
  1283. {
  1284. struct kcm_sock *kcm = kcm_sk(sock->sk);
  1285. struct kcm_mux *mux = kcm->mux;
  1286. struct kcm_psock *psock;
  1287. struct socket *csock;
  1288. struct sock *csk;
  1289. int err;
  1290. csock = sockfd_lookup(info->fd, &err);
  1291. if (!csock)
  1292. return -ENOENT;
  1293. csk = csock->sk;
  1294. if (!csk) {
  1295. err = -EINVAL;
  1296. goto out;
  1297. }
  1298. err = -ENOENT;
  1299. spin_lock_bh(&mux->lock);
  1300. list_for_each_entry(psock, &mux->psocks, psock_list) {
  1301. if (psock->sk != csk)
  1302. continue;
  1303. /* Found the matching psock */
  1304. if (psock->unattaching || WARN_ON(psock->done)) {
  1305. err = -EALREADY;
  1306. break;
  1307. }
  1308. psock->unattaching = 1;
  1309. spin_unlock_bh(&mux->lock);
  1310. /* Lower socket lock should already be held */
  1311. kcm_unattach(psock);
  1312. err = 0;
  1313. goto out;
  1314. }
  1315. spin_unlock_bh(&mux->lock);
  1316. out:
  1317. fput(csock->file);
  1318. return err;
  1319. }
  1320. static struct proto kcm_proto = {
  1321. .name = "KCM",
  1322. .owner = THIS_MODULE,
  1323. .obj_size = sizeof(struct kcm_sock),
  1324. };
  1325. /* Clone a kcm socket. */
  1326. static struct file *kcm_clone(struct socket *osock)
  1327. {
  1328. struct socket *newsock;
  1329. struct sock *newsk;
  1330. newsock = sock_alloc();
  1331. if (!newsock)
  1332. return ERR_PTR(-ENFILE);
  1333. newsock->type = osock->type;
  1334. newsock->ops = osock->ops;
  1335. __module_get(newsock->ops->owner);
  1336. newsk = sk_alloc(sock_net(osock->sk), PF_KCM, GFP_KERNEL,
  1337. &kcm_proto, false);
  1338. if (!newsk) {
  1339. sock_release(newsock);
  1340. return ERR_PTR(-ENOMEM);
  1341. }
  1342. sock_init_data(newsock, newsk);
  1343. init_kcm_sock(kcm_sk(newsk), kcm_sk(osock->sk)->mux);
  1344. return sock_alloc_file(newsock, 0, osock->sk->sk_prot_creator->name);
  1345. }
  1346. static int kcm_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
  1347. {
  1348. int err;
  1349. switch (cmd) {
  1350. case SIOCKCMATTACH: {
  1351. struct kcm_attach info;
  1352. if (copy_from_user(&info, (void __user *)arg, sizeof(info)))
  1353. return -EFAULT;
  1354. err = kcm_attach_ioctl(sock, &info);
  1355. break;
  1356. }
  1357. case SIOCKCMUNATTACH: {
  1358. struct kcm_unattach info;
  1359. if (copy_from_user(&info, (void __user *)arg, sizeof(info)))
  1360. return -EFAULT;
  1361. err = kcm_unattach_ioctl(sock, &info);
  1362. break;
  1363. }
  1364. case SIOCKCMCLONE: {
  1365. struct kcm_clone info;
  1366. struct file *file;
  1367. info.fd = get_unused_fd_flags(0);
  1368. if (unlikely(info.fd < 0))
  1369. return info.fd;
  1370. file = kcm_clone(sock);
  1371. if (IS_ERR(file)) {
  1372. put_unused_fd(info.fd);
  1373. return PTR_ERR(file);
  1374. }
  1375. if (copy_to_user((void __user *)arg, &info,
  1376. sizeof(info))) {
  1377. put_unused_fd(info.fd);
  1378. fput(file);
  1379. return -EFAULT;
  1380. }
  1381. fd_install(info.fd, file);
  1382. err = 0;
  1383. break;
  1384. }
  1385. default:
  1386. err = -ENOIOCTLCMD;
  1387. break;
  1388. }
  1389. return err;
  1390. }
  1391. static void free_mux(struct rcu_head *rcu)
  1392. {
  1393. struct kcm_mux *mux = container_of(rcu,
  1394. struct kcm_mux, rcu);
  1395. kmem_cache_free(kcm_muxp, mux);
  1396. }
  1397. static void release_mux(struct kcm_mux *mux)
  1398. {
  1399. struct kcm_net *knet = mux->knet;
  1400. struct kcm_psock *psock, *tmp_psock;
  1401. /* Release psocks */
  1402. list_for_each_entry_safe(psock, tmp_psock,
  1403. &mux->psocks, psock_list) {
  1404. if (!WARN_ON(psock->unattaching))
  1405. kcm_unattach(psock);
  1406. }
  1407. if (WARN_ON(mux->psocks_cnt))
  1408. return;
  1409. __skb_queue_purge(&mux->rx_hold_queue);
  1410. mutex_lock(&knet->mutex);
  1411. aggregate_mux_stats(&mux->stats, &knet->aggregate_mux_stats);
  1412. aggregate_psock_stats(&mux->aggregate_psock_stats,
  1413. &knet->aggregate_psock_stats);
  1414. aggregate_strp_stats(&mux->aggregate_strp_stats,
  1415. &knet->aggregate_strp_stats);
  1416. list_del_rcu(&mux->kcm_mux_list);
  1417. knet->count--;
  1418. mutex_unlock(&knet->mutex);
  1419. call_rcu(&mux->rcu, free_mux);
  1420. }
  1421. static void kcm_done(struct kcm_sock *kcm)
  1422. {
  1423. struct kcm_mux *mux = kcm->mux;
  1424. struct sock *sk = &kcm->sk;
  1425. int socks_cnt;
  1426. spin_lock_bh(&mux->rx_lock);
  1427. if (kcm->rx_psock) {
  1428. /* Cleanup in unreserve_rx_kcm */
  1429. WARN_ON(kcm->done);
  1430. kcm->rx_disabled = 1;
  1431. kcm->done = 1;
  1432. spin_unlock_bh(&mux->rx_lock);
  1433. return;
  1434. }
  1435. if (kcm->rx_wait) {
  1436. list_del(&kcm->wait_rx_list);
  1437. kcm->rx_wait = false;
  1438. }
  1439. /* Move any pending receive messages to other kcm sockets */
  1440. requeue_rx_msgs(mux, &sk->sk_receive_queue);
  1441. spin_unlock_bh(&mux->rx_lock);
  1442. if (WARN_ON(sk_rmem_alloc_get(sk)))
  1443. return;
  1444. /* Detach from MUX */
  1445. spin_lock_bh(&mux->lock);
  1446. list_del(&kcm->kcm_sock_list);
  1447. mux->kcm_socks_cnt--;
  1448. socks_cnt = mux->kcm_socks_cnt;
  1449. spin_unlock_bh(&mux->lock);
  1450. if (!socks_cnt) {
  1451. /* We are done with the mux now. */
  1452. release_mux(mux);
  1453. }
  1454. WARN_ON(kcm->rx_wait);
  1455. sock_put(&kcm->sk);
  1456. }
  1457. /* Called by kcm_release to close a KCM socket.
  1458. * If this is the last KCM socket on the MUX, destroy the MUX.
  1459. */
  1460. static int kcm_release(struct socket *sock)
  1461. {
  1462. struct sock *sk = sock->sk;
  1463. struct kcm_sock *kcm;
  1464. struct kcm_mux *mux;
  1465. struct kcm_psock *psock;
  1466. if (!sk)
  1467. return 0;
  1468. kcm = kcm_sk(sk);
  1469. mux = kcm->mux;
  1470. sock_orphan(sk);
  1471. kfree_skb(kcm->seq_skb);
  1472. lock_sock(sk);
  1473. /* Purge queue under lock to avoid race condition with tx_work trying
  1474. * to act when queue is nonempty. If tx_work runs after this point
  1475. * it will just return.
  1476. */
  1477. __skb_queue_purge(&sk->sk_write_queue);
  1478. /* Set tx_stopped. This is checked when psock is bound to a kcm and we
  1479. * get a writespace callback. This prevents further work being queued
  1480. * from the callback (unbinding the psock occurs after canceling work.
  1481. */
  1482. kcm->tx_stopped = 1;
  1483. release_sock(sk);
  1484. spin_lock_bh(&mux->lock);
  1485. if (kcm->tx_wait) {
  1486. /* Take of tx_wait list, after this point there should be no way
  1487. * that a psock will be assigned to this kcm.
  1488. */
  1489. list_del(&kcm->wait_psock_list);
  1490. kcm->tx_wait = false;
  1491. }
  1492. spin_unlock_bh(&mux->lock);
  1493. /* Cancel work. After this point there should be no outside references
  1494. * to the kcm socket.
  1495. */
  1496. cancel_work_sync(&kcm->tx_work);
  1497. lock_sock(sk);
  1498. psock = kcm->tx_psock;
  1499. if (psock) {
  1500. /* A psock was reserved, so we need to kill it since it
  1501. * may already have some bytes queued from a message. We
  1502. * need to do this after removing kcm from tx_wait list.
  1503. */
  1504. kcm_abort_tx_psock(psock, EPIPE, false);
  1505. unreserve_psock(kcm);
  1506. }
  1507. release_sock(sk);
  1508. WARN_ON(kcm->tx_wait);
  1509. WARN_ON(kcm->tx_psock);
  1510. sock->sk = NULL;
  1511. kcm_done(kcm);
  1512. return 0;
  1513. }
  1514. static const struct proto_ops kcm_dgram_ops = {
  1515. .family = PF_KCM,
  1516. .owner = THIS_MODULE,
  1517. .release = kcm_release,
  1518. .bind = sock_no_bind,
  1519. .connect = sock_no_connect,
  1520. .socketpair = sock_no_socketpair,
  1521. .accept = sock_no_accept,
  1522. .getname = sock_no_getname,
  1523. .poll = datagram_poll,
  1524. .ioctl = kcm_ioctl,
  1525. .listen = sock_no_listen,
  1526. .shutdown = sock_no_shutdown,
  1527. .setsockopt = kcm_setsockopt,
  1528. .getsockopt = kcm_getsockopt,
  1529. .sendmsg = kcm_sendmsg,
  1530. .recvmsg = kcm_recvmsg,
  1531. .mmap = sock_no_mmap,
  1532. .sendpage = kcm_sendpage,
  1533. };
  1534. static const struct proto_ops kcm_seqpacket_ops = {
  1535. .family = PF_KCM,
  1536. .owner = THIS_MODULE,
  1537. .release = kcm_release,
  1538. .bind = sock_no_bind,
  1539. .connect = sock_no_connect,
  1540. .socketpair = sock_no_socketpair,
  1541. .accept = sock_no_accept,
  1542. .getname = sock_no_getname,
  1543. .poll = datagram_poll,
  1544. .ioctl = kcm_ioctl,
  1545. .listen = sock_no_listen,
  1546. .shutdown = sock_no_shutdown,
  1547. .setsockopt = kcm_setsockopt,
  1548. .getsockopt = kcm_getsockopt,
  1549. .sendmsg = kcm_sendmsg,
  1550. .recvmsg = kcm_recvmsg,
  1551. .mmap = sock_no_mmap,
  1552. .sendpage = kcm_sendpage,
  1553. .splice_read = kcm_splice_read,
  1554. };
  1555. /* Create proto operation for kcm sockets */
  1556. static int kcm_create(struct net *net, struct socket *sock,
  1557. int protocol, int kern)
  1558. {
  1559. struct kcm_net *knet = net_generic(net, kcm_net_id);
  1560. struct sock *sk;
  1561. struct kcm_mux *mux;
  1562. switch (sock->type) {
  1563. case SOCK_DGRAM:
  1564. sock->ops = &kcm_dgram_ops;
  1565. break;
  1566. case SOCK_SEQPACKET:
  1567. sock->ops = &kcm_seqpacket_ops;
  1568. break;
  1569. default:
  1570. return -ESOCKTNOSUPPORT;
  1571. }
  1572. if (protocol != KCMPROTO_CONNECTED)
  1573. return -EPROTONOSUPPORT;
  1574. sk = sk_alloc(net, PF_KCM, GFP_KERNEL, &kcm_proto, kern);
  1575. if (!sk)
  1576. return -ENOMEM;
  1577. /* Allocate a kcm mux, shared between KCM sockets */
  1578. mux = kmem_cache_zalloc(kcm_muxp, GFP_KERNEL);
  1579. if (!mux) {
  1580. sk_free(sk);
  1581. return -ENOMEM;
  1582. }
  1583. spin_lock_init(&mux->lock);
  1584. spin_lock_init(&mux->rx_lock);
  1585. INIT_LIST_HEAD(&mux->kcm_socks);
  1586. INIT_LIST_HEAD(&mux->kcm_rx_waiters);
  1587. INIT_LIST_HEAD(&mux->kcm_tx_waiters);
  1588. INIT_LIST_HEAD(&mux->psocks);
  1589. INIT_LIST_HEAD(&mux->psocks_ready);
  1590. INIT_LIST_HEAD(&mux->psocks_avail);
  1591. mux->knet = knet;
  1592. /* Add new MUX to list */
  1593. mutex_lock(&knet->mutex);
  1594. list_add_rcu(&mux->kcm_mux_list, &knet->mux_list);
  1595. knet->count++;
  1596. mutex_unlock(&knet->mutex);
  1597. skb_queue_head_init(&mux->rx_hold_queue);
  1598. /* Init KCM socket */
  1599. sock_init_data(sock, sk);
  1600. init_kcm_sock(kcm_sk(sk), mux);
  1601. return 0;
  1602. }
  1603. static const struct net_proto_family kcm_family_ops = {
  1604. .family = PF_KCM,
  1605. .create = kcm_create,
  1606. .owner = THIS_MODULE,
  1607. };
  1608. static __net_init int kcm_init_net(struct net *net)
  1609. {
  1610. struct kcm_net *knet = net_generic(net, kcm_net_id);
  1611. INIT_LIST_HEAD_RCU(&knet->mux_list);
  1612. mutex_init(&knet->mutex);
  1613. return 0;
  1614. }
  1615. static __net_exit void kcm_exit_net(struct net *net)
  1616. {
  1617. struct kcm_net *knet = net_generic(net, kcm_net_id);
  1618. /* All KCM sockets should be closed at this point, which should mean
  1619. * that all multiplexors and psocks have been destroyed.
  1620. */
  1621. WARN_ON(!list_empty(&knet->mux_list));
  1622. }
  1623. static struct pernet_operations kcm_net_ops = {
  1624. .init = kcm_init_net,
  1625. .exit = kcm_exit_net,
  1626. .id = &kcm_net_id,
  1627. .size = sizeof(struct kcm_net),
  1628. };
  1629. static int __init kcm_init(void)
  1630. {
  1631. int err = -ENOMEM;
  1632. kcm_muxp = kmem_cache_create("kcm_mux_cache",
  1633. sizeof(struct kcm_mux), 0,
  1634. SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
  1635. if (!kcm_muxp)
  1636. goto fail;
  1637. kcm_psockp = kmem_cache_create("kcm_psock_cache",
  1638. sizeof(struct kcm_psock), 0,
  1639. SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
  1640. if (!kcm_psockp)
  1641. goto fail;
  1642. kcm_wq = create_singlethread_workqueue("kkcmd");
  1643. if (!kcm_wq)
  1644. goto fail;
  1645. err = proto_register(&kcm_proto, 1);
  1646. if (err)
  1647. goto fail;
  1648. err = register_pernet_device(&kcm_net_ops);
  1649. if (err)
  1650. goto net_ops_fail;
  1651. err = sock_register(&kcm_family_ops);
  1652. if (err)
  1653. goto sock_register_fail;
  1654. err = kcm_proc_init();
  1655. if (err)
  1656. goto proc_init_fail;
  1657. return 0;
  1658. proc_init_fail:
  1659. sock_unregister(PF_KCM);
  1660. sock_register_fail:
  1661. unregister_pernet_device(&kcm_net_ops);
  1662. net_ops_fail:
  1663. proto_unregister(&kcm_proto);
  1664. fail:
  1665. kmem_cache_destroy(kcm_muxp);
  1666. kmem_cache_destroy(kcm_psockp);
  1667. if (kcm_wq)
  1668. destroy_workqueue(kcm_wq);
  1669. return err;
  1670. }
  1671. static void __exit kcm_exit(void)
  1672. {
  1673. kcm_proc_exit();
  1674. sock_unregister(PF_KCM);
  1675. unregister_pernet_device(&kcm_net_ops);
  1676. proto_unregister(&kcm_proto);
  1677. destroy_workqueue(kcm_wq);
  1678. kmem_cache_destroy(kcm_muxp);
  1679. kmem_cache_destroy(kcm_psockp);
  1680. }
  1681. module_init(kcm_init);
  1682. module_exit(kcm_exit);
  1683. MODULE_LICENSE("GPL");
  1684. MODULE_ALIAS_NETPROTO(PF_KCM);