shm.c 42 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * linux/ipc/shm.c
  4. * Copyright (C) 1992, 1993 Krishna Balasubramanian
  5. * Many improvements/fixes by Bruno Haible.
  6. * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994.
  7. * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli.
  8. *
  9. * /proc/sysvipc/shm support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
  10. * BIGMEM support, Andrea Arcangeli <andrea@suse.de>
  11. * SMP thread shm, Jean-Luc Boyard <jean-luc.boyard@siemens.fr>
  12. * HIGHMEM support, Ingo Molnar <mingo@redhat.com>
  13. * Make shmmax, shmall, shmmni sysctl'able, Christoph Rohland <cr@sap.com>
  14. * Shared /dev/zero support, Kanoj Sarcar <kanoj@sgi.com>
  15. * Move the mm functionality over to mm/shmem.c, Christoph Rohland <cr@sap.com>
  16. *
  17. * support for audit of ipc object properties and permission changes
  18. * Dustin Kirkland <dustin.kirkland@us.ibm.com>
  19. *
  20. * namespaces support
  21. * OpenVZ, SWsoft Inc.
  22. * Pavel Emelianov <xemul@openvz.org>
  23. *
  24. * Better ipc lock (kern_ipc_perm.lock) handling
  25. * Davidlohr Bueso <davidlohr.bueso@hp.com>, June 2013.
  26. */
  27. #include <linux/slab.h>
  28. #include <linux/mm.h>
  29. #include <linux/hugetlb.h>
  30. #include <linux/shm.h>
  31. #include <linux/init.h>
  32. #include <linux/file.h>
  33. #include <linux/mman.h>
  34. #include <linux/shmem_fs.h>
  35. #include <linux/security.h>
  36. #include <linux/syscalls.h>
  37. #include <linux/audit.h>
  38. #include <linux/capability.h>
  39. #include <linux/ptrace.h>
  40. #include <linux/seq_file.h>
  41. #include <linux/rwsem.h>
  42. #include <linux/nsproxy.h>
  43. #include <linux/mount.h>
  44. #include <linux/ipc_namespace.h>
  45. #include <linux/rhashtable.h>
  46. #include <linux/uaccess.h>
  47. #include "util.h"
  48. struct shmid_kernel /* private to the kernel */
  49. {
  50. struct kern_ipc_perm shm_perm;
  51. struct file *shm_file;
  52. unsigned long shm_nattch;
  53. unsigned long shm_segsz;
  54. time64_t shm_atim;
  55. time64_t shm_dtim;
  56. time64_t shm_ctim;
  57. struct pid *shm_cprid;
  58. struct pid *shm_lprid;
  59. struct user_struct *mlock_user;
  60. /* The task created the shm object. NULL if the task is dead. */
  61. struct task_struct *shm_creator;
  62. struct list_head shm_clist; /* list by creator */
  63. } __randomize_layout;
  64. /* shm_mode upper byte flags */
  65. #define SHM_DEST 01000 /* segment will be destroyed on last detach */
  66. #define SHM_LOCKED 02000 /* segment will not be swapped */
  67. struct shm_file_data {
  68. int id;
  69. struct ipc_namespace *ns;
  70. struct file *file;
  71. const struct vm_operations_struct *vm_ops;
  72. };
  73. #define shm_file_data(file) (*((struct shm_file_data **)&(file)->private_data))
  74. static const struct file_operations shm_file_operations;
  75. static const struct vm_operations_struct shm_vm_ops;
  76. #define shm_ids(ns) ((ns)->ids[IPC_SHM_IDS])
  77. #define shm_unlock(shp) \
  78. ipc_unlock(&(shp)->shm_perm)
  79. static int newseg(struct ipc_namespace *, struct ipc_params *);
  80. static void shm_open(struct vm_area_struct *vma);
  81. static void shm_close(struct vm_area_struct *vma);
  82. static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp);
  83. #ifdef CONFIG_PROC_FS
  84. static int sysvipc_shm_proc_show(struct seq_file *s, void *it);
  85. #endif
  86. void shm_init_ns(struct ipc_namespace *ns)
  87. {
  88. ns->shm_ctlmax = SHMMAX;
  89. ns->shm_ctlall = SHMALL;
  90. ns->shm_ctlmni = SHMMNI;
  91. ns->shm_rmid_forced = 0;
  92. ns->shm_tot = 0;
  93. ipc_init_ids(&shm_ids(ns));
  94. }
  95. /*
  96. * Called with shm_ids.rwsem (writer) and the shp structure locked.
  97. * Only shm_ids.rwsem remains locked on exit.
  98. */
  99. static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
  100. {
  101. struct shmid_kernel *shp;
  102. shp = container_of(ipcp, struct shmid_kernel, shm_perm);
  103. if (shp->shm_nattch) {
  104. shp->shm_perm.mode |= SHM_DEST;
  105. /* Do not find it any more */
  106. ipc_set_key_private(&shm_ids(ns), &shp->shm_perm);
  107. shm_unlock(shp);
  108. } else
  109. shm_destroy(ns, shp);
  110. }
  111. #ifdef CONFIG_IPC_NS
  112. void shm_exit_ns(struct ipc_namespace *ns)
  113. {
  114. free_ipcs(ns, &shm_ids(ns), do_shm_rmid);
  115. idr_destroy(&ns->ids[IPC_SHM_IDS].ipcs_idr);
  116. rhashtable_destroy(&ns->ids[IPC_SHM_IDS].key_ht);
  117. }
  118. #endif
  119. static int __init ipc_ns_init(void)
  120. {
  121. shm_init_ns(&init_ipc_ns);
  122. return 0;
  123. }
  124. pure_initcall(ipc_ns_init);
  125. void __init shm_init(void)
  126. {
  127. ipc_init_proc_interface("sysvipc/shm",
  128. #if BITS_PER_LONG <= 32
  129. " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime rss swap\n",
  130. #else
  131. " key shmid perms size cpid lpid nattch uid gid cuid cgid atime dtime ctime rss swap\n",
  132. #endif
  133. IPC_SHM_IDS, sysvipc_shm_proc_show);
  134. }
  135. static inline struct shmid_kernel *shm_obtain_object(struct ipc_namespace *ns, int id)
  136. {
  137. struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&shm_ids(ns), id);
  138. if (IS_ERR(ipcp))
  139. return ERR_CAST(ipcp);
  140. return container_of(ipcp, struct shmid_kernel, shm_perm);
  141. }
  142. static inline struct shmid_kernel *shm_obtain_object_check(struct ipc_namespace *ns, int id)
  143. {
  144. struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&shm_ids(ns), id);
  145. if (IS_ERR(ipcp))
  146. return ERR_CAST(ipcp);
  147. return container_of(ipcp, struct shmid_kernel, shm_perm);
  148. }
  149. /*
  150. * shm_lock_(check_) routines are called in the paths where the rwsem
  151. * is not necessarily held.
  152. */
  153. static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id)
  154. {
  155. struct kern_ipc_perm *ipcp;
  156. rcu_read_lock();
  157. ipcp = ipc_obtain_object_idr(&shm_ids(ns), id);
  158. if (IS_ERR(ipcp))
  159. goto err;
  160. ipc_lock_object(ipcp);
  161. /*
  162. * ipc_rmid() may have already freed the ID while ipc_lock_object()
  163. * was spinning: here verify that the structure is still valid.
  164. * Upon races with RMID, return -EIDRM, thus indicating that
  165. * the ID points to a removed identifier.
  166. */
  167. if (ipc_valid_object(ipcp)) {
  168. /* return a locked ipc object upon success */
  169. return container_of(ipcp, struct shmid_kernel, shm_perm);
  170. }
  171. ipc_unlock_object(ipcp);
  172. ipcp = ERR_PTR(-EIDRM);
  173. err:
  174. rcu_read_unlock();
  175. /*
  176. * Callers of shm_lock() must validate the status of the returned ipc
  177. * object pointer and error out as appropriate.
  178. */
  179. return ERR_CAST(ipcp);
  180. }
  181. static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp)
  182. {
  183. rcu_read_lock();
  184. ipc_lock_object(&ipcp->shm_perm);
  185. }
  186. static void shm_rcu_free(struct rcu_head *head)
  187. {
  188. struct kern_ipc_perm *ptr = container_of(head, struct kern_ipc_perm,
  189. rcu);
  190. struct shmid_kernel *shp = container_of(ptr, struct shmid_kernel,
  191. shm_perm);
  192. security_shm_free(&shp->shm_perm);
  193. kvfree(shp);
  194. }
  195. static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
  196. {
  197. list_del(&s->shm_clist);
  198. ipc_rmid(&shm_ids(ns), &s->shm_perm);
  199. }
  200. static int __shm_open(struct vm_area_struct *vma)
  201. {
  202. struct file *file = vma->vm_file;
  203. struct shm_file_data *sfd = shm_file_data(file);
  204. struct shmid_kernel *shp;
  205. shp = shm_lock(sfd->ns, sfd->id);
  206. if (IS_ERR(shp))
  207. return PTR_ERR(shp);
  208. if (shp->shm_file != sfd->file) {
  209. /* ID was reused */
  210. shm_unlock(shp);
  211. return -EINVAL;
  212. }
  213. shp->shm_atim = ktime_get_real_seconds();
  214. ipc_update_pid(&shp->shm_lprid, task_tgid(current));
  215. shp->shm_nattch++;
  216. shm_unlock(shp);
  217. return 0;
  218. }
  219. /* This is called by fork, once for every shm attach. */
  220. static void shm_open(struct vm_area_struct *vma)
  221. {
  222. int err = __shm_open(vma);
  223. /*
  224. * We raced in the idr lookup or with shm_destroy().
  225. * Either way, the ID is busted.
  226. */
  227. WARN_ON_ONCE(err);
  228. }
  229. /*
  230. * shm_destroy - free the struct shmid_kernel
  231. *
  232. * @ns: namespace
  233. * @shp: struct to free
  234. *
  235. * It has to be called with shp and shm_ids.rwsem (writer) locked,
  236. * but returns with shp unlocked and freed.
  237. */
  238. static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
  239. {
  240. struct file *shm_file;
  241. shm_file = shp->shm_file;
  242. shp->shm_file = NULL;
  243. ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
  244. shm_rmid(ns, shp);
  245. shm_unlock(shp);
  246. if (!is_file_hugepages(shm_file))
  247. shmem_lock(shm_file, 0, shp->mlock_user);
  248. else if (shp->mlock_user)
  249. user_shm_unlock(i_size_read(file_inode(shm_file)),
  250. shp->mlock_user);
  251. fput(shm_file);
  252. ipc_update_pid(&shp->shm_cprid, NULL);
  253. ipc_update_pid(&shp->shm_lprid, NULL);
  254. ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
  255. }
  256. /*
  257. * shm_may_destroy - identifies whether shm segment should be destroyed now
  258. *
  259. * Returns true if and only if there are no active users of the segment and
  260. * one of the following is true:
  261. *
  262. * 1) shmctl(id, IPC_RMID, NULL) was called for this shp
  263. *
  264. * 2) sysctl kernel.shm_rmid_forced is set to 1.
  265. */
  266. static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
  267. {
  268. return (shp->shm_nattch == 0) &&
  269. (ns->shm_rmid_forced ||
  270. (shp->shm_perm.mode & SHM_DEST));
  271. }
  272. /*
  273. * remove the attach descriptor vma.
  274. * free memory for segment if it is marked destroyed.
  275. * The descriptor has already been removed from the current->mm->mmap list
  276. * and will later be kfree()d.
  277. */
  278. static void shm_close(struct vm_area_struct *vma)
  279. {
  280. struct file *file = vma->vm_file;
  281. struct shm_file_data *sfd = shm_file_data(file);
  282. struct shmid_kernel *shp;
  283. struct ipc_namespace *ns = sfd->ns;
  284. down_write(&shm_ids(ns).rwsem);
  285. /* remove from the list of attaches of the shm segment */
  286. shp = shm_lock(ns, sfd->id);
  287. /*
  288. * We raced in the idr lookup or with shm_destroy().
  289. * Either way, the ID is busted.
  290. */
  291. if (WARN_ON_ONCE(IS_ERR(shp)))
  292. goto done; /* no-op */
  293. ipc_update_pid(&shp->shm_lprid, task_tgid(current));
  294. shp->shm_dtim = ktime_get_real_seconds();
  295. shp->shm_nattch--;
  296. if (shm_may_destroy(ns, shp))
  297. shm_destroy(ns, shp);
  298. else
  299. shm_unlock(shp);
  300. done:
  301. up_write(&shm_ids(ns).rwsem);
  302. }
  303. /* Called with ns->shm_ids(ns).rwsem locked */
  304. static int shm_try_destroy_orphaned(int id, void *p, void *data)
  305. {
  306. struct ipc_namespace *ns = data;
  307. struct kern_ipc_perm *ipcp = p;
  308. struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm);
  309. /*
  310. * We want to destroy segments without users and with already
  311. * exit'ed originating process.
  312. *
  313. * As shp->* are changed under rwsem, it's safe to skip shp locking.
  314. */
  315. if (shp->shm_creator != NULL)
  316. return 0;
  317. if (shm_may_destroy(ns, shp)) {
  318. shm_lock_by_ptr(shp);
  319. shm_destroy(ns, shp);
  320. }
  321. return 0;
  322. }
  323. void shm_destroy_orphaned(struct ipc_namespace *ns)
  324. {
  325. down_write(&shm_ids(ns).rwsem);
  326. if (shm_ids(ns).in_use)
  327. idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns);
  328. up_write(&shm_ids(ns).rwsem);
  329. }
  330. /* Locking assumes this will only be called with task == current */
  331. void exit_shm(struct task_struct *task)
  332. {
  333. struct ipc_namespace *ns = task->nsproxy->ipc_ns;
  334. struct shmid_kernel *shp, *n;
  335. if (list_empty(&task->sysvshm.shm_clist))
  336. return;
  337. /*
  338. * If kernel.shm_rmid_forced is not set then only keep track of
  339. * which shmids are orphaned, so that a later set of the sysctl
  340. * can clean them up.
  341. */
  342. if (!ns->shm_rmid_forced) {
  343. down_read(&shm_ids(ns).rwsem);
  344. list_for_each_entry(shp, &task->sysvshm.shm_clist, shm_clist)
  345. shp->shm_creator = NULL;
  346. /*
  347. * Only under read lock but we are only called on current
  348. * so no entry on the list will be shared.
  349. */
  350. list_del(&task->sysvshm.shm_clist);
  351. up_read(&shm_ids(ns).rwsem);
  352. return;
  353. }
  354. /*
  355. * Destroy all already created segments, that were not yet mapped,
  356. * and mark any mapped as orphan to cover the sysctl toggling.
  357. * Destroy is skipped if shm_may_destroy() returns false.
  358. */
  359. down_write(&shm_ids(ns).rwsem);
  360. list_for_each_entry_safe(shp, n, &task->sysvshm.shm_clist, shm_clist) {
  361. shp->shm_creator = NULL;
  362. if (shm_may_destroy(ns, shp)) {
  363. shm_lock_by_ptr(shp);
  364. shm_destroy(ns, shp);
  365. }
  366. }
  367. /* Remove the list head from any segments still attached. */
  368. list_del(&task->sysvshm.shm_clist);
  369. up_write(&shm_ids(ns).rwsem);
  370. }
  371. static vm_fault_t shm_fault(struct vm_fault *vmf)
  372. {
  373. struct file *file = vmf->vma->vm_file;
  374. struct shm_file_data *sfd = shm_file_data(file);
  375. return sfd->vm_ops->fault(vmf);
  376. }
  377. static int shm_split(struct vm_area_struct *vma, unsigned long addr)
  378. {
  379. struct file *file = vma->vm_file;
  380. struct shm_file_data *sfd = shm_file_data(file);
  381. if (sfd->vm_ops->split)
  382. return sfd->vm_ops->split(vma, addr);
  383. return 0;
  384. }
  385. static unsigned long shm_pagesize(struct vm_area_struct *vma)
  386. {
  387. struct file *file = vma->vm_file;
  388. struct shm_file_data *sfd = shm_file_data(file);
  389. if (sfd->vm_ops->pagesize)
  390. return sfd->vm_ops->pagesize(vma);
  391. return PAGE_SIZE;
  392. }
  393. #ifdef CONFIG_NUMA
  394. static int shm_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
  395. {
  396. struct file *file = vma->vm_file;
  397. struct shm_file_data *sfd = shm_file_data(file);
  398. int err = 0;
  399. if (sfd->vm_ops->set_policy)
  400. err = sfd->vm_ops->set_policy(vma, new);
  401. return err;
  402. }
  403. static struct mempolicy *shm_get_policy(struct vm_area_struct *vma,
  404. unsigned long addr)
  405. {
  406. struct file *file = vma->vm_file;
  407. struct shm_file_data *sfd = shm_file_data(file);
  408. struct mempolicy *pol = NULL;
  409. if (sfd->vm_ops->get_policy)
  410. pol = sfd->vm_ops->get_policy(vma, addr);
  411. else if (vma->vm_policy)
  412. pol = vma->vm_policy;
  413. return pol;
  414. }
  415. #endif
  416. static int shm_mmap(struct file *file, struct vm_area_struct *vma)
  417. {
  418. struct shm_file_data *sfd = shm_file_data(file);
  419. int ret;
  420. /*
  421. * In case of remap_file_pages() emulation, the file can represent an
  422. * IPC ID that was removed, and possibly even reused by another shm
  423. * segment already. Propagate this case as an error to caller.
  424. */
  425. ret = __shm_open(vma);
  426. if (ret)
  427. return ret;
  428. ret = call_mmap(sfd->file, vma);
  429. if (ret) {
  430. shm_close(vma);
  431. return ret;
  432. }
  433. sfd->vm_ops = vma->vm_ops;
  434. #ifdef CONFIG_MMU
  435. WARN_ON(!sfd->vm_ops->fault);
  436. #endif
  437. vma->vm_ops = &shm_vm_ops;
  438. return 0;
  439. }
  440. static int shm_release(struct inode *ino, struct file *file)
  441. {
  442. struct shm_file_data *sfd = shm_file_data(file);
  443. put_ipc_ns(sfd->ns);
  444. fput(sfd->file);
  445. shm_file_data(file) = NULL;
  446. kfree(sfd);
  447. return 0;
  448. }
  449. static int shm_fsync(struct file *file, loff_t start, loff_t end, int datasync)
  450. {
  451. struct shm_file_data *sfd = shm_file_data(file);
  452. if (!sfd->file->f_op->fsync)
  453. return -EINVAL;
  454. return sfd->file->f_op->fsync(sfd->file, start, end, datasync);
  455. }
  456. static long shm_fallocate(struct file *file, int mode, loff_t offset,
  457. loff_t len)
  458. {
  459. struct shm_file_data *sfd = shm_file_data(file);
  460. if (!sfd->file->f_op->fallocate)
  461. return -EOPNOTSUPP;
  462. return sfd->file->f_op->fallocate(file, mode, offset, len);
  463. }
  464. static unsigned long shm_get_unmapped_area(struct file *file,
  465. unsigned long addr, unsigned long len, unsigned long pgoff,
  466. unsigned long flags)
  467. {
  468. struct shm_file_data *sfd = shm_file_data(file);
  469. return sfd->file->f_op->get_unmapped_area(sfd->file, addr, len,
  470. pgoff, flags);
  471. }
  472. static const struct file_operations shm_file_operations = {
  473. .mmap = shm_mmap,
  474. .fsync = shm_fsync,
  475. .release = shm_release,
  476. .get_unmapped_area = shm_get_unmapped_area,
  477. .llseek = noop_llseek,
  478. .fallocate = shm_fallocate,
  479. };
  480. /*
  481. * shm_file_operations_huge is now identical to shm_file_operations,
  482. * but we keep it distinct for the sake of is_file_shm_hugepages().
  483. */
  484. static const struct file_operations shm_file_operations_huge = {
  485. .mmap = shm_mmap,
  486. .fsync = shm_fsync,
  487. .release = shm_release,
  488. .get_unmapped_area = shm_get_unmapped_area,
  489. .llseek = noop_llseek,
  490. .fallocate = shm_fallocate,
  491. };
  492. bool is_file_shm_hugepages(struct file *file)
  493. {
  494. return file->f_op == &shm_file_operations_huge;
  495. }
  496. static const struct vm_operations_struct shm_vm_ops = {
  497. .open = shm_open, /* callback for a new vm-area open */
  498. .close = shm_close, /* callback for when the vm-area is released */
  499. .fault = shm_fault,
  500. .split = shm_split,
  501. .pagesize = shm_pagesize,
  502. #if defined(CONFIG_NUMA)
  503. .set_policy = shm_set_policy,
  504. .get_policy = shm_get_policy,
  505. #endif
  506. };
  507. /**
  508. * newseg - Create a new shared memory segment
  509. * @ns: namespace
  510. * @params: ptr to the structure that contains key, size and shmflg
  511. *
  512. * Called with shm_ids.rwsem held as a writer.
  513. */
  514. static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
  515. {
  516. key_t key = params->key;
  517. int shmflg = params->flg;
  518. size_t size = params->u.size;
  519. int error;
  520. struct shmid_kernel *shp;
  521. size_t numpages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
  522. struct file *file;
  523. char name[13];
  524. vm_flags_t acctflag = 0;
  525. if (size < SHMMIN || size > ns->shm_ctlmax)
  526. return -EINVAL;
  527. if (numpages << PAGE_SHIFT < size)
  528. return -ENOSPC;
  529. if (ns->shm_tot + numpages < ns->shm_tot ||
  530. ns->shm_tot + numpages > ns->shm_ctlall)
  531. return -ENOSPC;
  532. shp = kvmalloc(sizeof(*shp), GFP_KERNEL);
  533. if (unlikely(!shp))
  534. return -ENOMEM;
  535. shp->shm_perm.key = key;
  536. shp->shm_perm.mode = (shmflg & S_IRWXUGO);
  537. shp->mlock_user = NULL;
  538. shp->shm_perm.security = NULL;
  539. error = security_shm_alloc(&shp->shm_perm);
  540. if (error) {
  541. kvfree(shp);
  542. return error;
  543. }
  544. sprintf(name, "SYSV%08x", key);
  545. if (shmflg & SHM_HUGETLB) {
  546. struct hstate *hs;
  547. size_t hugesize;
  548. hs = hstate_sizelog((shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK);
  549. if (!hs) {
  550. error = -EINVAL;
  551. goto no_file;
  552. }
  553. hugesize = ALIGN(size, huge_page_size(hs));
  554. /* hugetlb_file_setup applies strict accounting */
  555. if (shmflg & SHM_NORESERVE)
  556. acctflag = VM_NORESERVE;
  557. file = hugetlb_file_setup(name, hugesize, acctflag,
  558. &shp->mlock_user, HUGETLB_SHMFS_INODE,
  559. (shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK);
  560. } else {
  561. /*
  562. * Do not allow no accounting for OVERCOMMIT_NEVER, even
  563. * if it's asked for.
  564. */
  565. if ((shmflg & SHM_NORESERVE) &&
  566. sysctl_overcommit_memory != OVERCOMMIT_NEVER)
  567. acctflag = VM_NORESERVE;
  568. file = shmem_kernel_file_setup(name, size, acctflag);
  569. }
  570. error = PTR_ERR(file);
  571. if (IS_ERR(file))
  572. goto no_file;
  573. shp->shm_cprid = get_pid(task_tgid(current));
  574. shp->shm_lprid = NULL;
  575. shp->shm_atim = shp->shm_dtim = 0;
  576. shp->shm_ctim = ktime_get_real_seconds();
  577. shp->shm_segsz = size;
  578. shp->shm_nattch = 0;
  579. shp->shm_file = file;
  580. shp->shm_creator = current;
  581. /* ipc_addid() locks shp upon success. */
  582. error = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni);
  583. if (error < 0)
  584. goto no_id;
  585. list_add(&shp->shm_clist, &current->sysvshm.shm_clist);
  586. /*
  587. * shmid gets reported as "inode#" in /proc/pid/maps.
  588. * proc-ps tools use this. Changing this will break them.
  589. */
  590. file_inode(file)->i_ino = shp->shm_perm.id;
  591. ns->shm_tot += numpages;
  592. error = shp->shm_perm.id;
  593. ipc_unlock_object(&shp->shm_perm);
  594. rcu_read_unlock();
  595. return error;
  596. no_id:
  597. ipc_update_pid(&shp->shm_cprid, NULL);
  598. ipc_update_pid(&shp->shm_lprid, NULL);
  599. if (is_file_hugepages(file) && shp->mlock_user)
  600. user_shm_unlock(size, shp->mlock_user);
  601. fput(file);
  602. ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
  603. return error;
  604. no_file:
  605. call_rcu(&shp->shm_perm.rcu, shm_rcu_free);
  606. return error;
  607. }
  608. /*
  609. * Called with shm_ids.rwsem and ipcp locked.
  610. */
  611. static inline int shm_more_checks(struct kern_ipc_perm *ipcp,
  612. struct ipc_params *params)
  613. {
  614. struct shmid_kernel *shp;
  615. shp = container_of(ipcp, struct shmid_kernel, shm_perm);
  616. if (shp->shm_segsz < params->u.size)
  617. return -EINVAL;
  618. return 0;
  619. }
  620. long ksys_shmget(key_t key, size_t size, int shmflg)
  621. {
  622. struct ipc_namespace *ns;
  623. static const struct ipc_ops shm_ops = {
  624. .getnew = newseg,
  625. .associate = security_shm_associate,
  626. .more_checks = shm_more_checks,
  627. };
  628. struct ipc_params shm_params;
  629. ns = current->nsproxy->ipc_ns;
  630. shm_params.key = key;
  631. shm_params.flg = shmflg;
  632. shm_params.u.size = size;
  633. return ipcget(ns, &shm_ids(ns), &shm_ops, &shm_params);
  634. }
  635. SYSCALL_DEFINE3(shmget, key_t, key, size_t, size, int, shmflg)
  636. {
  637. return ksys_shmget(key, size, shmflg);
  638. }
  639. static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version)
  640. {
  641. switch (version) {
  642. case IPC_64:
  643. return copy_to_user(buf, in, sizeof(*in));
  644. case IPC_OLD:
  645. {
  646. struct shmid_ds out;
  647. memset(&out, 0, sizeof(out));
  648. ipc64_perm_to_ipc_perm(&in->shm_perm, &out.shm_perm);
  649. out.shm_segsz = in->shm_segsz;
  650. out.shm_atime = in->shm_atime;
  651. out.shm_dtime = in->shm_dtime;
  652. out.shm_ctime = in->shm_ctime;
  653. out.shm_cpid = in->shm_cpid;
  654. out.shm_lpid = in->shm_lpid;
  655. out.shm_nattch = in->shm_nattch;
  656. return copy_to_user(buf, &out, sizeof(out));
  657. }
  658. default:
  659. return -EINVAL;
  660. }
  661. }
  662. static inline unsigned long
  663. copy_shmid_from_user(struct shmid64_ds *out, void __user *buf, int version)
  664. {
  665. switch (version) {
  666. case IPC_64:
  667. if (copy_from_user(out, buf, sizeof(*out)))
  668. return -EFAULT;
  669. return 0;
  670. case IPC_OLD:
  671. {
  672. struct shmid_ds tbuf_old;
  673. if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
  674. return -EFAULT;
  675. out->shm_perm.uid = tbuf_old.shm_perm.uid;
  676. out->shm_perm.gid = tbuf_old.shm_perm.gid;
  677. out->shm_perm.mode = tbuf_old.shm_perm.mode;
  678. return 0;
  679. }
  680. default:
  681. return -EINVAL;
  682. }
  683. }
  684. static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminfo64 *in, int version)
  685. {
  686. switch (version) {
  687. case IPC_64:
  688. return copy_to_user(buf, in, sizeof(*in));
  689. case IPC_OLD:
  690. {
  691. struct shminfo out;
  692. if (in->shmmax > INT_MAX)
  693. out.shmmax = INT_MAX;
  694. else
  695. out.shmmax = (int)in->shmmax;
  696. out.shmmin = in->shmmin;
  697. out.shmmni = in->shmmni;
  698. out.shmseg = in->shmseg;
  699. out.shmall = in->shmall;
  700. return copy_to_user(buf, &out, sizeof(out));
  701. }
  702. default:
  703. return -EINVAL;
  704. }
  705. }
  706. /*
  707. * Calculate and add used RSS and swap pages of a shm.
  708. * Called with shm_ids.rwsem held as a reader
  709. */
  710. static void shm_add_rss_swap(struct shmid_kernel *shp,
  711. unsigned long *rss_add, unsigned long *swp_add)
  712. {
  713. struct inode *inode;
  714. inode = file_inode(shp->shm_file);
  715. if (is_file_hugepages(shp->shm_file)) {
  716. struct address_space *mapping = inode->i_mapping;
  717. struct hstate *h = hstate_file(shp->shm_file);
  718. *rss_add += pages_per_huge_page(h) * mapping->nrpages;
  719. } else {
  720. #ifdef CONFIG_SHMEM
  721. struct shmem_inode_info *info = SHMEM_I(inode);
  722. spin_lock_irq(&info->lock);
  723. *rss_add += inode->i_mapping->nrpages;
  724. *swp_add += info->swapped;
  725. spin_unlock_irq(&info->lock);
  726. #else
  727. *rss_add += inode->i_mapping->nrpages;
  728. #endif
  729. }
  730. }
  731. /*
  732. * Called with shm_ids.rwsem held as a reader
  733. */
  734. static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss,
  735. unsigned long *swp)
  736. {
  737. int next_id;
  738. int total, in_use;
  739. *rss = 0;
  740. *swp = 0;
  741. in_use = shm_ids(ns).in_use;
  742. for (total = 0, next_id = 0; total < in_use; next_id++) {
  743. struct kern_ipc_perm *ipc;
  744. struct shmid_kernel *shp;
  745. ipc = idr_find(&shm_ids(ns).ipcs_idr, next_id);
  746. if (ipc == NULL)
  747. continue;
  748. shp = container_of(ipc, struct shmid_kernel, shm_perm);
  749. shm_add_rss_swap(shp, rss, swp);
  750. total++;
  751. }
  752. }
  753. /*
  754. * This function handles some shmctl commands which require the rwsem
  755. * to be held in write mode.
  756. * NOTE: no locks must be held, the rwsem is taken inside this function.
  757. */
  758. static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd,
  759. struct shmid64_ds *shmid64)
  760. {
  761. struct kern_ipc_perm *ipcp;
  762. struct shmid_kernel *shp;
  763. int err;
  764. down_write(&shm_ids(ns).rwsem);
  765. rcu_read_lock();
  766. ipcp = ipcctl_obtain_check(ns, &shm_ids(ns), shmid, cmd,
  767. &shmid64->shm_perm, 0);
  768. if (IS_ERR(ipcp)) {
  769. err = PTR_ERR(ipcp);
  770. goto out_unlock1;
  771. }
  772. shp = container_of(ipcp, struct shmid_kernel, shm_perm);
  773. err = security_shm_shmctl(&shp->shm_perm, cmd);
  774. if (err)
  775. goto out_unlock1;
  776. switch (cmd) {
  777. case IPC_RMID:
  778. ipc_lock_object(&shp->shm_perm);
  779. /* do_shm_rmid unlocks the ipc object and rcu */
  780. do_shm_rmid(ns, ipcp);
  781. goto out_up;
  782. case IPC_SET:
  783. ipc_lock_object(&shp->shm_perm);
  784. err = ipc_update_perm(&shmid64->shm_perm, ipcp);
  785. if (err)
  786. goto out_unlock0;
  787. shp->shm_ctim = ktime_get_real_seconds();
  788. break;
  789. default:
  790. err = -EINVAL;
  791. goto out_unlock1;
  792. }
  793. out_unlock0:
  794. ipc_unlock_object(&shp->shm_perm);
  795. out_unlock1:
  796. rcu_read_unlock();
  797. out_up:
  798. up_write(&shm_ids(ns).rwsem);
  799. return err;
  800. }
  801. static int shmctl_ipc_info(struct ipc_namespace *ns,
  802. struct shminfo64 *shminfo)
  803. {
  804. int err = security_shm_shmctl(NULL, IPC_INFO);
  805. if (!err) {
  806. memset(shminfo, 0, sizeof(*shminfo));
  807. shminfo->shmmni = shminfo->shmseg = ns->shm_ctlmni;
  808. shminfo->shmmax = ns->shm_ctlmax;
  809. shminfo->shmall = ns->shm_ctlall;
  810. shminfo->shmmin = SHMMIN;
  811. down_read(&shm_ids(ns).rwsem);
  812. err = ipc_get_maxidx(&shm_ids(ns));
  813. up_read(&shm_ids(ns).rwsem);
  814. if (err < 0)
  815. err = 0;
  816. }
  817. return err;
  818. }
  819. static int shmctl_shm_info(struct ipc_namespace *ns,
  820. struct shm_info *shm_info)
  821. {
  822. int err = security_shm_shmctl(NULL, SHM_INFO);
  823. if (!err) {
  824. memset(shm_info, 0, sizeof(*shm_info));
  825. down_read(&shm_ids(ns).rwsem);
  826. shm_info->used_ids = shm_ids(ns).in_use;
  827. shm_get_stat(ns, &shm_info->shm_rss, &shm_info->shm_swp);
  828. shm_info->shm_tot = ns->shm_tot;
  829. shm_info->swap_attempts = 0;
  830. shm_info->swap_successes = 0;
  831. err = ipc_get_maxidx(&shm_ids(ns));
  832. up_read(&shm_ids(ns).rwsem);
  833. if (err < 0)
  834. err = 0;
  835. }
  836. return err;
  837. }
  838. static int shmctl_stat(struct ipc_namespace *ns, int shmid,
  839. int cmd, struct shmid64_ds *tbuf)
  840. {
  841. struct shmid_kernel *shp;
  842. int err;
  843. memset(tbuf, 0, sizeof(*tbuf));
  844. rcu_read_lock();
  845. if (cmd == SHM_STAT || cmd == SHM_STAT_ANY) {
  846. shp = shm_obtain_object(ns, shmid);
  847. if (IS_ERR(shp)) {
  848. err = PTR_ERR(shp);
  849. goto out_unlock;
  850. }
  851. } else { /* IPC_STAT */
  852. shp = shm_obtain_object_check(ns, shmid);
  853. if (IS_ERR(shp)) {
  854. err = PTR_ERR(shp);
  855. goto out_unlock;
  856. }
  857. }
  858. /*
  859. * Semantically SHM_STAT_ANY ought to be identical to
  860. * that functionality provided by the /proc/sysvipc/
  861. * interface. As such, only audit these calls and
  862. * do not do traditional S_IRUGO permission checks on
  863. * the ipc object.
  864. */
  865. if (cmd == SHM_STAT_ANY)
  866. audit_ipc_obj(&shp->shm_perm);
  867. else {
  868. err = -EACCES;
  869. if (ipcperms(ns, &shp->shm_perm, S_IRUGO))
  870. goto out_unlock;
  871. }
  872. err = security_shm_shmctl(&shp->shm_perm, cmd);
  873. if (err)
  874. goto out_unlock;
  875. ipc_lock_object(&shp->shm_perm);
  876. if (!ipc_valid_object(&shp->shm_perm)) {
  877. ipc_unlock_object(&shp->shm_perm);
  878. err = -EIDRM;
  879. goto out_unlock;
  880. }
  881. kernel_to_ipc64_perm(&shp->shm_perm, &tbuf->shm_perm);
  882. tbuf->shm_segsz = shp->shm_segsz;
  883. tbuf->shm_atime = shp->shm_atim;
  884. tbuf->shm_dtime = shp->shm_dtim;
  885. tbuf->shm_ctime = shp->shm_ctim;
  886. #ifndef CONFIG_64BIT
  887. tbuf->shm_atime_high = shp->shm_atim >> 32;
  888. tbuf->shm_dtime_high = shp->shm_dtim >> 32;
  889. tbuf->shm_ctime_high = shp->shm_ctim >> 32;
  890. #endif
  891. tbuf->shm_cpid = pid_vnr(shp->shm_cprid);
  892. tbuf->shm_lpid = pid_vnr(shp->shm_lprid);
  893. tbuf->shm_nattch = shp->shm_nattch;
  894. if (cmd == IPC_STAT) {
  895. /*
  896. * As defined in SUS:
  897. * Return 0 on success
  898. */
  899. err = 0;
  900. } else {
  901. /*
  902. * SHM_STAT and SHM_STAT_ANY (both Linux specific)
  903. * Return the full id, including the sequence number
  904. */
  905. err = shp->shm_perm.id;
  906. }
  907. ipc_unlock_object(&shp->shm_perm);
  908. out_unlock:
  909. rcu_read_unlock();
  910. return err;
  911. }
  912. static int shmctl_do_lock(struct ipc_namespace *ns, int shmid, int cmd)
  913. {
  914. struct shmid_kernel *shp;
  915. struct file *shm_file;
  916. int err;
  917. rcu_read_lock();
  918. shp = shm_obtain_object_check(ns, shmid);
  919. if (IS_ERR(shp)) {
  920. err = PTR_ERR(shp);
  921. goto out_unlock1;
  922. }
  923. audit_ipc_obj(&(shp->shm_perm));
  924. err = security_shm_shmctl(&shp->shm_perm, cmd);
  925. if (err)
  926. goto out_unlock1;
  927. ipc_lock_object(&shp->shm_perm);
  928. /* check if shm_destroy() is tearing down shp */
  929. if (!ipc_valid_object(&shp->shm_perm)) {
  930. err = -EIDRM;
  931. goto out_unlock0;
  932. }
  933. if (!ns_capable(ns->user_ns, CAP_IPC_LOCK)) {
  934. kuid_t euid = current_euid();
  935. if (!uid_eq(euid, shp->shm_perm.uid) &&
  936. !uid_eq(euid, shp->shm_perm.cuid)) {
  937. err = -EPERM;
  938. goto out_unlock0;
  939. }
  940. if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK)) {
  941. err = -EPERM;
  942. goto out_unlock0;
  943. }
  944. }
  945. shm_file = shp->shm_file;
  946. if (is_file_hugepages(shm_file))
  947. goto out_unlock0;
  948. if (cmd == SHM_LOCK) {
  949. struct user_struct *user = current_user();
  950. err = shmem_lock(shm_file, 1, user);
  951. if (!err && !(shp->shm_perm.mode & SHM_LOCKED)) {
  952. shp->shm_perm.mode |= SHM_LOCKED;
  953. shp->mlock_user = user;
  954. }
  955. goto out_unlock0;
  956. }
  957. /* SHM_UNLOCK */
  958. if (!(shp->shm_perm.mode & SHM_LOCKED))
  959. goto out_unlock0;
  960. shmem_lock(shm_file, 0, shp->mlock_user);
  961. shp->shm_perm.mode &= ~SHM_LOCKED;
  962. shp->mlock_user = NULL;
  963. get_file(shm_file);
  964. ipc_unlock_object(&shp->shm_perm);
  965. rcu_read_unlock();
  966. shmem_unlock_mapping(shm_file->f_mapping);
  967. fput(shm_file);
  968. return err;
  969. out_unlock0:
  970. ipc_unlock_object(&shp->shm_perm);
  971. out_unlock1:
  972. rcu_read_unlock();
  973. return err;
  974. }
  975. long ksys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf)
  976. {
  977. int err, version;
  978. struct ipc_namespace *ns;
  979. struct shmid64_ds sem64;
  980. if (cmd < 0 || shmid < 0)
  981. return -EINVAL;
  982. version = ipc_parse_version(&cmd);
  983. ns = current->nsproxy->ipc_ns;
  984. switch (cmd) {
  985. case IPC_INFO: {
  986. struct shminfo64 shminfo;
  987. err = shmctl_ipc_info(ns, &shminfo);
  988. if (err < 0)
  989. return err;
  990. if (copy_shminfo_to_user(buf, &shminfo, version))
  991. err = -EFAULT;
  992. return err;
  993. }
  994. case SHM_INFO: {
  995. struct shm_info shm_info;
  996. err = shmctl_shm_info(ns, &shm_info);
  997. if (err < 0)
  998. return err;
  999. if (copy_to_user(buf, &shm_info, sizeof(shm_info)))
  1000. err = -EFAULT;
  1001. return err;
  1002. }
  1003. case SHM_STAT:
  1004. case SHM_STAT_ANY:
  1005. case IPC_STAT: {
  1006. err = shmctl_stat(ns, shmid, cmd, &sem64);
  1007. if (err < 0)
  1008. return err;
  1009. if (copy_shmid_to_user(buf, &sem64, version))
  1010. err = -EFAULT;
  1011. return err;
  1012. }
  1013. case IPC_SET:
  1014. if (copy_shmid_from_user(&sem64, buf, version))
  1015. return -EFAULT;
  1016. /* fallthru */
  1017. case IPC_RMID:
  1018. return shmctl_down(ns, shmid, cmd, &sem64);
  1019. case SHM_LOCK:
  1020. case SHM_UNLOCK:
  1021. return shmctl_do_lock(ns, shmid, cmd);
  1022. default:
  1023. return -EINVAL;
  1024. }
  1025. }
  1026. SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
  1027. {
  1028. return ksys_shmctl(shmid, cmd, buf);
  1029. }
  1030. #ifdef CONFIG_COMPAT
  1031. struct compat_shmid_ds {
  1032. struct compat_ipc_perm shm_perm;
  1033. int shm_segsz;
  1034. compat_time_t shm_atime;
  1035. compat_time_t shm_dtime;
  1036. compat_time_t shm_ctime;
  1037. compat_ipc_pid_t shm_cpid;
  1038. compat_ipc_pid_t shm_lpid;
  1039. unsigned short shm_nattch;
  1040. unsigned short shm_unused;
  1041. compat_uptr_t shm_unused2;
  1042. compat_uptr_t shm_unused3;
  1043. };
  1044. struct compat_shminfo64 {
  1045. compat_ulong_t shmmax;
  1046. compat_ulong_t shmmin;
  1047. compat_ulong_t shmmni;
  1048. compat_ulong_t shmseg;
  1049. compat_ulong_t shmall;
  1050. compat_ulong_t __unused1;
  1051. compat_ulong_t __unused2;
  1052. compat_ulong_t __unused3;
  1053. compat_ulong_t __unused4;
  1054. };
  1055. struct compat_shm_info {
  1056. compat_int_t used_ids;
  1057. compat_ulong_t shm_tot, shm_rss, shm_swp;
  1058. compat_ulong_t swap_attempts, swap_successes;
  1059. };
  1060. static int copy_compat_shminfo_to_user(void __user *buf, struct shminfo64 *in,
  1061. int version)
  1062. {
  1063. if (in->shmmax > INT_MAX)
  1064. in->shmmax = INT_MAX;
  1065. if (version == IPC_64) {
  1066. struct compat_shminfo64 info;
  1067. memset(&info, 0, sizeof(info));
  1068. info.shmmax = in->shmmax;
  1069. info.shmmin = in->shmmin;
  1070. info.shmmni = in->shmmni;
  1071. info.shmseg = in->shmseg;
  1072. info.shmall = in->shmall;
  1073. return copy_to_user(buf, &info, sizeof(info));
  1074. } else {
  1075. struct shminfo info;
  1076. memset(&info, 0, sizeof(info));
  1077. info.shmmax = in->shmmax;
  1078. info.shmmin = in->shmmin;
  1079. info.shmmni = in->shmmni;
  1080. info.shmseg = in->shmseg;
  1081. info.shmall = in->shmall;
  1082. return copy_to_user(buf, &info, sizeof(info));
  1083. }
  1084. }
  1085. static int put_compat_shm_info(struct shm_info *ip,
  1086. struct compat_shm_info __user *uip)
  1087. {
  1088. struct compat_shm_info info;
  1089. memset(&info, 0, sizeof(info));
  1090. info.used_ids = ip->used_ids;
  1091. info.shm_tot = ip->shm_tot;
  1092. info.shm_rss = ip->shm_rss;
  1093. info.shm_swp = ip->shm_swp;
  1094. info.swap_attempts = ip->swap_attempts;
  1095. info.swap_successes = ip->swap_successes;
  1096. return copy_to_user(uip, &info, sizeof(info));
  1097. }
  1098. static int copy_compat_shmid_to_user(void __user *buf, struct shmid64_ds *in,
  1099. int version)
  1100. {
  1101. if (version == IPC_64) {
  1102. struct compat_shmid64_ds v;
  1103. memset(&v, 0, sizeof(v));
  1104. to_compat_ipc64_perm(&v.shm_perm, &in->shm_perm);
  1105. v.shm_atime = lower_32_bits(in->shm_atime);
  1106. v.shm_atime_high = upper_32_bits(in->shm_atime);
  1107. v.shm_dtime = lower_32_bits(in->shm_dtime);
  1108. v.shm_dtime_high = upper_32_bits(in->shm_dtime);
  1109. v.shm_ctime = lower_32_bits(in->shm_ctime);
  1110. v.shm_ctime_high = upper_32_bits(in->shm_ctime);
  1111. v.shm_segsz = in->shm_segsz;
  1112. v.shm_nattch = in->shm_nattch;
  1113. v.shm_cpid = in->shm_cpid;
  1114. v.shm_lpid = in->shm_lpid;
  1115. return copy_to_user(buf, &v, sizeof(v));
  1116. } else {
  1117. struct compat_shmid_ds v;
  1118. memset(&v, 0, sizeof(v));
  1119. to_compat_ipc_perm(&v.shm_perm, &in->shm_perm);
  1120. v.shm_perm.key = in->shm_perm.key;
  1121. v.shm_atime = in->shm_atime;
  1122. v.shm_dtime = in->shm_dtime;
  1123. v.shm_ctime = in->shm_ctime;
  1124. v.shm_segsz = in->shm_segsz;
  1125. v.shm_nattch = in->shm_nattch;
  1126. v.shm_cpid = in->shm_cpid;
  1127. v.shm_lpid = in->shm_lpid;
  1128. return copy_to_user(buf, &v, sizeof(v));
  1129. }
  1130. }
  1131. static int copy_compat_shmid_from_user(struct shmid64_ds *out, void __user *buf,
  1132. int version)
  1133. {
  1134. memset(out, 0, sizeof(*out));
  1135. if (version == IPC_64) {
  1136. struct compat_shmid64_ds __user *p = buf;
  1137. return get_compat_ipc64_perm(&out->shm_perm, &p->shm_perm);
  1138. } else {
  1139. struct compat_shmid_ds __user *p = buf;
  1140. return get_compat_ipc_perm(&out->shm_perm, &p->shm_perm);
  1141. }
  1142. }
  1143. long compat_ksys_shmctl(int shmid, int cmd, void __user *uptr)
  1144. {
  1145. struct ipc_namespace *ns;
  1146. struct shmid64_ds sem64;
  1147. int version = compat_ipc_parse_version(&cmd);
  1148. int err;
  1149. ns = current->nsproxy->ipc_ns;
  1150. if (cmd < 0 || shmid < 0)
  1151. return -EINVAL;
  1152. switch (cmd) {
  1153. case IPC_INFO: {
  1154. struct shminfo64 shminfo;
  1155. err = shmctl_ipc_info(ns, &shminfo);
  1156. if (err < 0)
  1157. return err;
  1158. if (copy_compat_shminfo_to_user(uptr, &shminfo, version))
  1159. err = -EFAULT;
  1160. return err;
  1161. }
  1162. case SHM_INFO: {
  1163. struct shm_info shm_info;
  1164. err = shmctl_shm_info(ns, &shm_info);
  1165. if (err < 0)
  1166. return err;
  1167. if (put_compat_shm_info(&shm_info, uptr))
  1168. err = -EFAULT;
  1169. return err;
  1170. }
  1171. case IPC_STAT:
  1172. case SHM_STAT_ANY:
  1173. case SHM_STAT:
  1174. err = shmctl_stat(ns, shmid, cmd, &sem64);
  1175. if (err < 0)
  1176. return err;
  1177. if (copy_compat_shmid_to_user(uptr, &sem64, version))
  1178. err = -EFAULT;
  1179. return err;
  1180. case IPC_SET:
  1181. if (copy_compat_shmid_from_user(&sem64, uptr, version))
  1182. return -EFAULT;
  1183. /* fallthru */
  1184. case IPC_RMID:
  1185. return shmctl_down(ns, shmid, cmd, &sem64);
  1186. case SHM_LOCK:
  1187. case SHM_UNLOCK:
  1188. return shmctl_do_lock(ns, shmid, cmd);
  1189. break;
  1190. default:
  1191. return -EINVAL;
  1192. }
  1193. return err;
  1194. }
  1195. COMPAT_SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, void __user *, uptr)
  1196. {
  1197. return compat_ksys_shmctl(shmid, cmd, uptr);
  1198. }
  1199. #endif
  1200. /*
  1201. * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists.
  1202. *
  1203. * NOTE! Despite the name, this is NOT a direct system call entrypoint. The
  1204. * "raddr" thing points to kernel space, and there has to be a wrapper around
  1205. * this.
  1206. */
  1207. long do_shmat(int shmid, char __user *shmaddr, int shmflg,
  1208. ulong *raddr, unsigned long shmlba)
  1209. {
  1210. struct shmid_kernel *shp;
  1211. unsigned long addr = (unsigned long)shmaddr;
  1212. unsigned long size;
  1213. struct file *file, *base;
  1214. int err;
  1215. unsigned long flags = MAP_SHARED;
  1216. unsigned long prot;
  1217. int acc_mode;
  1218. struct ipc_namespace *ns;
  1219. struct shm_file_data *sfd;
  1220. int f_flags;
  1221. unsigned long populate = 0;
  1222. err = -EINVAL;
  1223. if (shmid < 0)
  1224. goto out;
  1225. if (addr) {
  1226. if (addr & (shmlba - 1)) {
  1227. if (shmflg & SHM_RND) {
  1228. addr &= ~(shmlba - 1); /* round down */
  1229. /*
  1230. * Ensure that the round-down is non-nil
  1231. * when remapping. This can happen for
  1232. * cases when addr < shmlba.
  1233. */
  1234. if (!addr && (shmflg & SHM_REMAP))
  1235. goto out;
  1236. } else
  1237. #ifndef __ARCH_FORCE_SHMLBA
  1238. if (addr & ~PAGE_MASK)
  1239. #endif
  1240. goto out;
  1241. }
  1242. flags |= MAP_FIXED;
  1243. } else if ((shmflg & SHM_REMAP))
  1244. goto out;
  1245. if (shmflg & SHM_RDONLY) {
  1246. prot = PROT_READ;
  1247. acc_mode = S_IRUGO;
  1248. f_flags = O_RDONLY;
  1249. } else {
  1250. prot = PROT_READ | PROT_WRITE;
  1251. acc_mode = S_IRUGO | S_IWUGO;
  1252. f_flags = O_RDWR;
  1253. }
  1254. if (shmflg & SHM_EXEC) {
  1255. prot |= PROT_EXEC;
  1256. acc_mode |= S_IXUGO;
  1257. }
  1258. /*
  1259. * We cannot rely on the fs check since SYSV IPC does have an
  1260. * additional creator id...
  1261. */
  1262. ns = current->nsproxy->ipc_ns;
  1263. rcu_read_lock();
  1264. shp = shm_obtain_object_check(ns, shmid);
  1265. if (IS_ERR(shp)) {
  1266. err = PTR_ERR(shp);
  1267. goto out_unlock;
  1268. }
  1269. err = -EACCES;
  1270. if (ipcperms(ns, &shp->shm_perm, acc_mode))
  1271. goto out_unlock;
  1272. err = security_shm_shmat(&shp->shm_perm, shmaddr, shmflg);
  1273. if (err)
  1274. goto out_unlock;
  1275. ipc_lock_object(&shp->shm_perm);
  1276. /* check if shm_destroy() is tearing down shp */
  1277. if (!ipc_valid_object(&shp->shm_perm)) {
  1278. ipc_unlock_object(&shp->shm_perm);
  1279. err = -EIDRM;
  1280. goto out_unlock;
  1281. }
  1282. /*
  1283. * We need to take a reference to the real shm file to prevent the
  1284. * pointer from becoming stale in cases where the lifetime of the outer
  1285. * file extends beyond that of the shm segment. It's not usually
  1286. * possible, but it can happen during remap_file_pages() emulation as
  1287. * that unmaps the memory, then does ->mmap() via file reference only.
  1288. * We'll deny the ->mmap() if the shm segment was since removed, but to
  1289. * detect shm ID reuse we need to compare the file pointers.
  1290. */
  1291. base = get_file(shp->shm_file);
  1292. shp->shm_nattch++;
  1293. size = i_size_read(file_inode(base));
  1294. ipc_unlock_object(&shp->shm_perm);
  1295. rcu_read_unlock();
  1296. err = -ENOMEM;
  1297. sfd = kzalloc(sizeof(*sfd), GFP_KERNEL);
  1298. if (!sfd) {
  1299. fput(base);
  1300. goto out_nattch;
  1301. }
  1302. file = alloc_file_clone(base, f_flags,
  1303. is_file_hugepages(base) ?
  1304. &shm_file_operations_huge :
  1305. &shm_file_operations);
  1306. err = PTR_ERR(file);
  1307. if (IS_ERR(file)) {
  1308. kfree(sfd);
  1309. fput(base);
  1310. goto out_nattch;
  1311. }
  1312. sfd->id = shp->shm_perm.id;
  1313. sfd->ns = get_ipc_ns(ns);
  1314. sfd->file = base;
  1315. sfd->vm_ops = NULL;
  1316. file->private_data = sfd;
  1317. err = security_mmap_file(file, prot, flags);
  1318. if (err)
  1319. goto out_fput;
  1320. if (down_write_killable(&current->mm->mmap_sem)) {
  1321. err = -EINTR;
  1322. goto out_fput;
  1323. }
  1324. if (addr && !(shmflg & SHM_REMAP)) {
  1325. err = -EINVAL;
  1326. if (addr + size < addr)
  1327. goto invalid;
  1328. if (find_vma_intersection(current->mm, addr, addr + size))
  1329. goto invalid;
  1330. }
  1331. addr = do_mmap_pgoff(file, addr, size, prot, flags, 0, &populate, NULL);
  1332. *raddr = addr;
  1333. err = 0;
  1334. if (IS_ERR_VALUE(addr))
  1335. err = (long)addr;
  1336. invalid:
  1337. up_write(&current->mm->mmap_sem);
  1338. if (populate)
  1339. mm_populate(addr, populate);
  1340. out_fput:
  1341. fput(file);
  1342. out_nattch:
  1343. down_write(&shm_ids(ns).rwsem);
  1344. shp = shm_lock(ns, shmid);
  1345. shp->shm_nattch--;
  1346. if (shm_may_destroy(ns, shp))
  1347. shm_destroy(ns, shp);
  1348. else
  1349. shm_unlock(shp);
  1350. up_write(&shm_ids(ns).rwsem);
  1351. return err;
  1352. out_unlock:
  1353. rcu_read_unlock();
  1354. out:
  1355. return err;
  1356. }
  1357. SYSCALL_DEFINE3(shmat, int, shmid, char __user *, shmaddr, int, shmflg)
  1358. {
  1359. unsigned long ret;
  1360. long err;
  1361. err = do_shmat(shmid, shmaddr, shmflg, &ret, SHMLBA);
  1362. if (err)
  1363. return err;
  1364. force_successful_syscall_return();
  1365. return (long)ret;
  1366. }
  1367. #ifdef CONFIG_COMPAT
  1368. #ifndef COMPAT_SHMLBA
  1369. #define COMPAT_SHMLBA SHMLBA
  1370. #endif
  1371. COMPAT_SYSCALL_DEFINE3(shmat, int, shmid, compat_uptr_t, shmaddr, int, shmflg)
  1372. {
  1373. unsigned long ret;
  1374. long err;
  1375. err = do_shmat(shmid, compat_ptr(shmaddr), shmflg, &ret, COMPAT_SHMLBA);
  1376. if (err)
  1377. return err;
  1378. force_successful_syscall_return();
  1379. return (long)ret;
  1380. }
  1381. #endif
  1382. /*
  1383. * detach and kill segment if marked destroyed.
  1384. * The work is done in shm_close.
  1385. */
  1386. long ksys_shmdt(char __user *shmaddr)
  1387. {
  1388. struct mm_struct *mm = current->mm;
  1389. struct vm_area_struct *vma;
  1390. unsigned long addr = (unsigned long)shmaddr;
  1391. int retval = -EINVAL;
  1392. #ifdef CONFIG_MMU
  1393. loff_t size = 0;
  1394. struct file *file;
  1395. struct vm_area_struct *next;
  1396. #endif
  1397. if (addr & ~PAGE_MASK)
  1398. return retval;
  1399. if (down_write_killable(&mm->mmap_sem))
  1400. return -EINTR;
  1401. /*
  1402. * This function tries to be smart and unmap shm segments that
  1403. * were modified by partial mlock or munmap calls:
  1404. * - It first determines the size of the shm segment that should be
  1405. * unmapped: It searches for a vma that is backed by shm and that
  1406. * started at address shmaddr. It records it's size and then unmaps
  1407. * it.
  1408. * - Then it unmaps all shm vmas that started at shmaddr and that
  1409. * are within the initially determined size and that are from the
  1410. * same shm segment from which we determined the size.
  1411. * Errors from do_munmap are ignored: the function only fails if
  1412. * it's called with invalid parameters or if it's called to unmap
  1413. * a part of a vma. Both calls in this function are for full vmas,
  1414. * the parameters are directly copied from the vma itself and always
  1415. * valid - therefore do_munmap cannot fail. (famous last words?)
  1416. */
  1417. /*
  1418. * If it had been mremap()'d, the starting address would not
  1419. * match the usual checks anyway. So assume all vma's are
  1420. * above the starting address given.
  1421. */
  1422. vma = find_vma(mm, addr);
  1423. #ifdef CONFIG_MMU
  1424. while (vma) {
  1425. next = vma->vm_next;
  1426. /*
  1427. * Check if the starting address would match, i.e. it's
  1428. * a fragment created by mprotect() and/or munmap(), or it
  1429. * otherwise it starts at this address with no hassles.
  1430. */
  1431. if ((vma->vm_ops == &shm_vm_ops) &&
  1432. (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) {
  1433. /*
  1434. * Record the file of the shm segment being
  1435. * unmapped. With mremap(), someone could place
  1436. * page from another segment but with equal offsets
  1437. * in the range we are unmapping.
  1438. */
  1439. file = vma->vm_file;
  1440. size = i_size_read(file_inode(vma->vm_file));
  1441. do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
  1442. /*
  1443. * We discovered the size of the shm segment, so
  1444. * break out of here and fall through to the next
  1445. * loop that uses the size information to stop
  1446. * searching for matching vma's.
  1447. */
  1448. retval = 0;
  1449. vma = next;
  1450. break;
  1451. }
  1452. vma = next;
  1453. }
  1454. /*
  1455. * We need look no further than the maximum address a fragment
  1456. * could possibly have landed at. Also cast things to loff_t to
  1457. * prevent overflows and make comparisons vs. equal-width types.
  1458. */
  1459. size = PAGE_ALIGN(size);
  1460. while (vma && (loff_t)(vma->vm_end - addr) <= size) {
  1461. next = vma->vm_next;
  1462. /* finding a matching vma now does not alter retval */
  1463. if ((vma->vm_ops == &shm_vm_ops) &&
  1464. ((vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) &&
  1465. (vma->vm_file == file))
  1466. do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
  1467. vma = next;
  1468. }
  1469. #else /* CONFIG_MMU */
  1470. /* under NOMMU conditions, the exact address to be destroyed must be
  1471. * given
  1472. */
  1473. if (vma && vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) {
  1474. do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
  1475. retval = 0;
  1476. }
  1477. #endif
  1478. up_write(&mm->mmap_sem);
  1479. return retval;
  1480. }
  1481. SYSCALL_DEFINE1(shmdt, char __user *, shmaddr)
  1482. {
  1483. return ksys_shmdt(shmaddr);
  1484. }
  1485. #ifdef CONFIG_PROC_FS
  1486. static int sysvipc_shm_proc_show(struct seq_file *s, void *it)
  1487. {
  1488. struct pid_namespace *pid_ns = ipc_seq_pid_ns(s);
  1489. struct user_namespace *user_ns = seq_user_ns(s);
  1490. struct kern_ipc_perm *ipcp = it;
  1491. struct shmid_kernel *shp;
  1492. unsigned long rss = 0, swp = 0;
  1493. shp = container_of(ipcp, struct shmid_kernel, shm_perm);
  1494. shm_add_rss_swap(shp, &rss, &swp);
  1495. #if BITS_PER_LONG <= 32
  1496. #define SIZE_SPEC "%10lu"
  1497. #else
  1498. #define SIZE_SPEC "%21lu"
  1499. #endif
  1500. seq_printf(s,
  1501. "%10d %10d %4o " SIZE_SPEC " %5u %5u "
  1502. "%5lu %5u %5u %5u %5u %10llu %10llu %10llu "
  1503. SIZE_SPEC " " SIZE_SPEC "\n",
  1504. shp->shm_perm.key,
  1505. shp->shm_perm.id,
  1506. shp->shm_perm.mode,
  1507. shp->shm_segsz,
  1508. pid_nr_ns(shp->shm_cprid, pid_ns),
  1509. pid_nr_ns(shp->shm_lprid, pid_ns),
  1510. shp->shm_nattch,
  1511. from_kuid_munged(user_ns, shp->shm_perm.uid),
  1512. from_kgid_munged(user_ns, shp->shm_perm.gid),
  1513. from_kuid_munged(user_ns, shp->shm_perm.cuid),
  1514. from_kgid_munged(user_ns, shp->shm_perm.cgid),
  1515. shp->shm_atim,
  1516. shp->shm_dtim,
  1517. shp->shm_ctim,
  1518. rss * PAGE_SIZE,
  1519. swp * PAGE_SIZE);
  1520. return 0;
  1521. }
  1522. #endif