linux_sysvec.c 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909
  1. /*-
  2. * SPDX-License-Identifier: BSD-2-Clause
  3. *
  4. * Copyright (c) 1994-1996 Søren Schmidt
  5. * All rights reserved.
  6. *
  7. * Redistribution and use in source and binary forms, with or without
  8. * modification, are permitted provided that the following conditions
  9. * are met:
  10. * 1. Redistributions of source code must retain the above copyright
  11. * notice, this list of conditions and the following disclaimer.
  12. * 2. Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in the
  14. * documentation and/or other materials provided with the distribution.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  17. * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  19. * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  20. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  21. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  22. * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  23. * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  24. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  25. * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  26. * SUCH DAMAGE.
  27. */
  28. #define __ELF_WORD_SIZE 32
  29. #include <sys/param.h>
  30. #include <sys/exec.h>
  31. #include <sys/fcntl.h>
  32. #include <sys/imgact.h>
  33. #include <sys/imgact_aout.h>
  34. #include <sys/imgact_elf.h>
  35. #include <sys/kernel.h>
  36. #include <sys/lock.h>
  37. #include <sys/malloc.h>
  38. #include <sys/module.h>
  39. #include <sys/mutex.h>
  40. #include <sys/proc.h>
  41. #include <sys/stddef.h>
  42. #include <sys/syscallsubr.h>
  43. #include <sys/sysctl.h>
  44. #include <sys/sysent.h>
  45. #include <sys/sysproto.h>
  46. #include <vm/pmap.h>
  47. #include <vm/vm.h>
  48. #include <vm/vm_map.h>
  49. #include <vm/vm_page.h>
  50. #include <machine/cpu.h>
  51. #include <machine/cputypes.h>
  52. #include <machine/md_var.h>
  53. #include <machine/pcb.h>
  54. #include <machine/trap.h>
  55. #include <x86/linux/linux_x86.h>
  56. #include <i386/linux/linux.h>
  57. #include <i386/linux/linux_proto.h>
  58. #include <compat/linux/linux_elf.h>
  59. #include <compat/linux/linux_emul.h>
  60. #include <compat/linux/linux_fork.h>
  61. #include <compat/linux/linux_ioctl.h>
  62. #include <compat/linux/linux_mib.h>
  63. #include <compat/linux/linux_misc.h>
  64. #include <compat/linux/linux_signal.h>
  65. #include <compat/linux/linux_util.h>
  66. #include <compat/linux/linux_vdso.h>
  67. #include <x86/linux/linux_x86_sigframe.h>
  68. MODULE_VERSION(linux, 1);
  69. #define LINUX_VDSOPAGE_SIZE PAGE_SIZE * 2
  70. #define LINUX_VDSOPAGE (VM_MAXUSER_ADDRESS - LINUX_VDSOPAGE_SIZE)
  71. #define LINUX_SHAREDPAGE (LINUX_VDSOPAGE - PAGE_SIZE)
  72. /*
  73. * PAGE_SIZE - the size
  74. * of the native SHAREDPAGE
  75. */
  76. #define LINUX_USRSTACK LINUX_SHAREDPAGE
  77. #define LINUX_PS_STRINGS (LINUX_USRSTACK - sizeof(struct ps_strings))
  78. static int linux_szsigcode;
  79. static vm_object_t linux_vdso_obj;
  80. static char *linux_vdso_mapping;
  81. extern char _binary_linux_vdso_so_o_start;
  82. extern char _binary_linux_vdso_so_o_end;
  83. static vm_offset_t linux_vdso_base;
  84. extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
  85. extern const char *linux_syscallnames[];
  86. SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler);
  87. static int linux_fixup(uintptr_t *stack_base,
  88. struct image_params *iparams);
  89. static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
  90. static void linux_exec_setregs(struct thread *td,
  91. struct image_params *imgp, uintptr_t stack);
  92. static void linux_exec_sysvec_init(void *param);
  93. static int linux_on_exec_vmspace(struct proc *p,
  94. struct image_params *imgp);
  95. static void linux_set_fork_retval(struct thread *td);
  96. static void linux_vdso_install(const void *param);
  97. static void linux_vdso_deinstall(const void *param);
  98. static void linux_vdso_reloc(char *mapping, Elf_Addr offset);
  99. LINUX_VDSO_SYM_CHAR(linux_platform);
  100. LINUX_VDSO_SYM_INTPTR(__kernel_vsyscall);
  101. LINUX_VDSO_SYM_INTPTR(linux_vdso_sigcode);
  102. LINUX_VDSO_SYM_INTPTR(linux_vdso_rt_sigcode);
  103. LINUX_VDSO_SYM_INTPTR(kern_timekeep_base);
  104. LINUX_VDSO_SYM_INTPTR(kern_tsc_selector);
  105. LINUX_VDSO_SYM_INTPTR(kern_cpu_selector);
  106. static int
  107. linux_fixup(uintptr_t *stack_base, struct image_params *imgp)
  108. {
  109. register_t *base, *argv, *envp;
  110. base = (register_t *)*stack_base;
  111. argv = base;
  112. envp = base + (imgp->args->argc + 1);
  113. base--;
  114. if (suword(base, (intptr_t)envp) != 0)
  115. return (EFAULT);
  116. base--;
  117. if (suword(base, (intptr_t)argv) != 0)
  118. return (EFAULT);
  119. base--;
  120. if (suword(base, imgp->args->argc) != 0)
  121. return (EFAULT);
  122. *stack_base = (uintptr_t)base;
  123. return (0);
  124. }
  125. void
  126. linux32_arch_copyout_auxargs(struct image_params *imgp, Elf_Auxinfo **pos)
  127. {
  128. AUXARGS_ENTRY((*pos), LINUX_AT_SYSINFO_EHDR, linux_vdso_base);
  129. AUXARGS_ENTRY((*pos), LINUX_AT_SYSINFO, __kernel_vsyscall);
  130. AUXARGS_ENTRY((*pos), LINUX_AT_HWCAP, cpu_feature);
  131. AUXARGS_ENTRY((*pos), LINUX_AT_HWCAP2, linux_x86_elf_hwcap2());
  132. AUXARGS_ENTRY((*pos), LINUX_AT_PLATFORM, PTROUT(linux_platform));
  133. }
  134. static void
  135. linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
  136. {
  137. struct thread *td = curthread;
  138. struct proc *p = td->td_proc;
  139. struct sigacts *psp;
  140. struct trapframe *regs;
  141. struct l_rt_sigframe *fp, frame;
  142. int sig, code;
  143. int oonstack;
  144. sig = linux_translate_traps(ksi->ksi_signo, ksi->ksi_trapno);
  145. code = ksi->ksi_code;
  146. PROC_LOCK_ASSERT(p, MA_OWNED);
  147. psp = p->p_sigacts;
  148. mtx_assert(&psp->ps_mtx, MA_OWNED);
  149. regs = td->td_frame;
  150. oonstack = sigonstack(regs->tf_esp);
  151. /* Allocate space for the signal handler context. */
  152. if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
  153. SIGISMEMBER(psp->ps_sigonstack, sig)) {
  154. fp = (struct l_rt_sigframe *)((uintptr_t)td->td_sigstk.ss_sp +
  155. td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe));
  156. } else
  157. fp = (struct l_rt_sigframe *)regs->tf_esp - 1;
  158. mtx_unlock(&psp->ps_mtx);
  159. /* Build the argument list for the signal handler. */
  160. sig = bsd_to_linux_signal(sig);
  161. bzero(&frame, sizeof(frame));
  162. frame.sf_sig = sig;
  163. frame.sf_siginfo = PTROUT(&fp->sf_si);
  164. frame.sf_ucontext = PTROUT(&fp->sf_uc);
  165. /* Fill in POSIX parts. */
  166. siginfo_to_lsiginfo(&ksi->ksi_info, &frame.sf_si, sig);
  167. /* Build the signal context to be used by sigreturn. */
  168. frame.sf_uc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp);
  169. frame.sf_uc.uc_stack.ss_size = td->td_sigstk.ss_size;
  170. frame.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK)
  171. ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE;
  172. PROC_UNLOCK(p);
  173. bsd_to_linux_sigset(mask, &frame.sf_uc.uc_sigmask);
  174. frame.sf_uc.uc_mcontext.sc_mask = frame.sf_uc.uc_sigmask.__mask;
  175. frame.sf_uc.uc_mcontext.sc_gs = rgs();
  176. frame.sf_uc.uc_mcontext.sc_fs = regs->tf_fs;
  177. frame.sf_uc.uc_mcontext.sc_es = regs->tf_es;
  178. frame.sf_uc.uc_mcontext.sc_ds = regs->tf_ds;
  179. frame.sf_uc.uc_mcontext.sc_edi = regs->tf_edi;
  180. frame.sf_uc.uc_mcontext.sc_esi = regs->tf_esi;
  181. frame.sf_uc.uc_mcontext.sc_ebp = regs->tf_ebp;
  182. frame.sf_uc.uc_mcontext.sc_ebx = regs->tf_ebx;
  183. frame.sf_uc.uc_mcontext.sc_esp = regs->tf_esp;
  184. frame.sf_uc.uc_mcontext.sc_edx = regs->tf_edx;
  185. frame.sf_uc.uc_mcontext.sc_ecx = regs->tf_ecx;
  186. frame.sf_uc.uc_mcontext.sc_eax = regs->tf_eax;
  187. frame.sf_uc.uc_mcontext.sc_eip = regs->tf_eip;
  188. frame.sf_uc.uc_mcontext.sc_cs = regs->tf_cs;
  189. frame.sf_uc.uc_mcontext.sc_eflags = regs->tf_eflags;
  190. frame.sf_uc.uc_mcontext.sc_esp_at_signal = regs->tf_esp;
  191. frame.sf_uc.uc_mcontext.sc_ss = regs->tf_ss;
  192. frame.sf_uc.uc_mcontext.sc_err = regs->tf_err;
  193. frame.sf_uc.uc_mcontext.sc_cr2 = (register_t)ksi->ksi_addr;
  194. frame.sf_uc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code);
  195. if (copyout(&frame, fp, sizeof(frame)) != 0) {
  196. /*
  197. * Process has trashed its stack; give it an illegal
  198. * instruction to halt it in its tracks.
  199. */
  200. PROC_LOCK(p);
  201. sigexit(td, SIGILL);
  202. }
  203. /* Build context to run handler in. */
  204. regs->tf_esp = PTROUT(fp);
  205. regs->tf_eip = linux_vdso_rt_sigcode;
  206. regs->tf_edi = PTROUT(catcher);
  207. regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
  208. regs->tf_cs = _ucodesel;
  209. regs->tf_ds = _udatasel;
  210. regs->tf_es = _udatasel;
  211. regs->tf_fs = _udatasel;
  212. regs->tf_ss = _udatasel;
  213. PROC_LOCK(p);
  214. mtx_lock(&psp->ps_mtx);
  215. }
  216. /*
  217. * Send an interrupt to process.
  218. *
  219. * Stack is set up to allow sigcode stored
  220. * in u. to call routine, followed by kcall
  221. * to sigreturn routine below. After sigreturn
  222. * resets the signal mask, the stack, and the
  223. * frame pointer, it returns to the user
  224. * specified pc, psl.
  225. */
  226. static void
  227. linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask)
  228. {
  229. struct thread *td = curthread;
  230. struct proc *p = td->td_proc;
  231. struct sigacts *psp;
  232. struct trapframe *regs;
  233. struct l_sigframe *fp, frame;
  234. l_sigset_t lmask;
  235. int sig;
  236. int oonstack;
  237. PROC_LOCK_ASSERT(p, MA_OWNED);
  238. psp = p->p_sigacts;
  239. sig = linux_translate_traps(ksi->ksi_signo, ksi->ksi_trapno);
  240. mtx_assert(&psp->ps_mtx, MA_OWNED);
  241. if (SIGISMEMBER(psp->ps_siginfo, sig)) {
  242. /* Signal handler installed with SA_SIGINFO. */
  243. linux_rt_sendsig(catcher, ksi, mask);
  244. return;
  245. }
  246. regs = td->td_frame;
  247. oonstack = sigonstack(regs->tf_esp);
  248. /* Allocate space for the signal handler context. */
  249. if ((td->td_pflags & TDP_ALTSTACK) && !oonstack &&
  250. SIGISMEMBER(psp->ps_sigonstack, sig)) {
  251. fp = (struct l_sigframe *)((uintptr_t)td->td_sigstk.ss_sp +
  252. td->td_sigstk.ss_size - sizeof(struct l_sigframe));
  253. } else
  254. fp = (struct l_sigframe *)regs->tf_esp - 1;
  255. mtx_unlock(&psp->ps_mtx);
  256. PROC_UNLOCK(p);
  257. /* Build the argument list for the signal handler. */
  258. sig = bsd_to_linux_signal(sig);
  259. bzero(&frame, sizeof(frame));
  260. frame.sf_sig = sig;
  261. frame.sf_sigmask = *mask;
  262. bsd_to_linux_sigset(mask, &lmask);
  263. /* Build the signal context to be used by sigreturn. */
  264. frame.sf_sc.sc_mask = lmask.__mask;
  265. frame.sf_sc.sc_gs = rgs();
  266. frame.sf_sc.sc_fs = regs->tf_fs;
  267. frame.sf_sc.sc_es = regs->tf_es;
  268. frame.sf_sc.sc_ds = regs->tf_ds;
  269. frame.sf_sc.sc_edi = regs->tf_edi;
  270. frame.sf_sc.sc_esi = regs->tf_esi;
  271. frame.sf_sc.sc_ebp = regs->tf_ebp;
  272. frame.sf_sc.sc_ebx = regs->tf_ebx;
  273. frame.sf_sc.sc_esp = regs->tf_esp;
  274. frame.sf_sc.sc_edx = regs->tf_edx;
  275. frame.sf_sc.sc_ecx = regs->tf_ecx;
  276. frame.sf_sc.sc_eax = regs->tf_eax;
  277. frame.sf_sc.sc_eip = regs->tf_eip;
  278. frame.sf_sc.sc_cs = regs->tf_cs;
  279. frame.sf_sc.sc_eflags = regs->tf_eflags;
  280. frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
  281. frame.sf_sc.sc_ss = regs->tf_ss;
  282. frame.sf_sc.sc_err = regs->tf_err;
  283. frame.sf_sc.sc_cr2 = (register_t)ksi->ksi_addr;
  284. frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno);
  285. if (copyout(&frame, fp, sizeof(frame)) != 0) {
  286. /*
  287. * Process has trashed its stack; give it an illegal
  288. * instruction to halt it in its tracks.
  289. */
  290. PROC_LOCK(p);
  291. sigexit(td, SIGILL);
  292. }
  293. /* Build context to run handler in. */
  294. regs->tf_esp = PTROUT(fp);
  295. regs->tf_eip = linux_vdso_sigcode;
  296. regs->tf_edi = PTROUT(catcher);
  297. regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D);
  298. regs->tf_cs = _ucodesel;
  299. regs->tf_ds = _udatasel;
  300. regs->tf_es = _udatasel;
  301. regs->tf_fs = _udatasel;
  302. regs->tf_ss = _udatasel;
  303. PROC_LOCK(p);
  304. mtx_lock(&psp->ps_mtx);
  305. }
  306. /*
  307. * System call to cleanup state after a signal
  308. * has been taken. Reset signal mask and
  309. * stack state from context left by sendsig (above).
  310. * Return to previous pc and psl as specified by
  311. * context left by sendsig. Check carefully to
  312. * make sure that the user has not modified the
  313. * psl to gain improper privileges or to cause
  314. * a machine fault.
  315. */
  316. int
  317. linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args)
  318. {
  319. struct l_sigframe frame;
  320. struct trapframe *regs;
  321. int eflags;
  322. ksiginfo_t ksi;
  323. regs = td->td_frame;
  324. /*
  325. * The trampoline code hands us the sigframe.
  326. * It is unsafe to keep track of it ourselves, in the event that a
  327. * program jumps out of a signal handler.
  328. */
  329. if (copyin(args->sfp, &frame, sizeof(frame)) != 0)
  330. return (EFAULT);
  331. /* Check for security violations. */
  332. #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
  333. eflags = frame.sf_sc.sc_eflags;
  334. if (!EFLAGS_SECURE(eflags, regs->tf_eflags))
  335. return (EINVAL);
  336. /*
  337. * Don't allow users to load a valid privileged %cs. Let the
  338. * hardware check for invalid selectors, excess privilege in
  339. * other selectors, invalid %eip's and invalid %esp's.
  340. */
  341. #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
  342. if (!CS_SECURE(frame.sf_sc.sc_cs)) {
  343. ksiginfo_init_trap(&ksi);
  344. ksi.ksi_signo = SIGBUS;
  345. ksi.ksi_code = BUS_OBJERR;
  346. ksi.ksi_trapno = T_PROTFLT;
  347. ksi.ksi_addr = (void *)regs->tf_eip;
  348. trapsignal(td, &ksi);
  349. return (EINVAL);
  350. }
  351. kern_sigprocmask(td, SIG_SETMASK, &frame.sf_sigmask, NULL, 0);
  352. /* Restore signal context. */
  353. /* %gs was restored by the trampoline. */
  354. regs->tf_fs = frame.sf_sc.sc_fs;
  355. regs->tf_es = frame.sf_sc.sc_es;
  356. regs->tf_ds = frame.sf_sc.sc_ds;
  357. regs->tf_edi = frame.sf_sc.sc_edi;
  358. regs->tf_esi = frame.sf_sc.sc_esi;
  359. regs->tf_ebp = frame.sf_sc.sc_ebp;
  360. regs->tf_ebx = frame.sf_sc.sc_ebx;
  361. regs->tf_edx = frame.sf_sc.sc_edx;
  362. regs->tf_ecx = frame.sf_sc.sc_ecx;
  363. regs->tf_eax = frame.sf_sc.sc_eax;
  364. regs->tf_eip = frame.sf_sc.sc_eip;
  365. regs->tf_cs = frame.sf_sc.sc_cs;
  366. regs->tf_eflags = eflags;
  367. regs->tf_esp = frame.sf_sc.sc_esp_at_signal;
  368. regs->tf_ss = frame.sf_sc.sc_ss;
  369. return (EJUSTRETURN);
  370. }
  371. /*
  372. * System call to cleanup state after a signal
  373. * has been taken. Reset signal mask and
  374. * stack state from context left by rt_sendsig (above).
  375. * Return to previous pc and psl as specified by
  376. * context left by sendsig. Check carefully to
  377. * make sure that the user has not modified the
  378. * psl to gain improper privileges or to cause
  379. * a machine fault.
  380. */
  381. int
  382. linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args)
  383. {
  384. struct l_ucontext uc;
  385. struct l_sigcontext *context;
  386. sigset_t bmask;
  387. l_stack_t *lss;
  388. stack_t ss;
  389. struct trapframe *regs;
  390. int eflags;
  391. ksiginfo_t ksi;
  392. regs = td->td_frame;
  393. /*
  394. * The trampoline code hands us the ucontext.
  395. * It is unsafe to keep track of it ourselves, in the event that a
  396. * program jumps out of a signal handler.
  397. */
  398. if (copyin(args->ucp, &uc, sizeof(uc)) != 0)
  399. return (EFAULT);
  400. context = &uc.uc_mcontext;
  401. /* Check for security violations. */
  402. #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
  403. eflags = context->sc_eflags;
  404. if (!EFLAGS_SECURE(eflags, regs->tf_eflags))
  405. return (EINVAL);
  406. /*
  407. * Don't allow users to load a valid privileged %cs. Let the
  408. * hardware check for invalid selectors, excess privilege in
  409. * other selectors, invalid %eip's and invalid %esp's.
  410. */
  411. #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
  412. if (!CS_SECURE(context->sc_cs)) {
  413. ksiginfo_init_trap(&ksi);
  414. ksi.ksi_signo = SIGBUS;
  415. ksi.ksi_code = BUS_OBJERR;
  416. ksi.ksi_trapno = T_PROTFLT;
  417. ksi.ksi_addr = (void *)regs->tf_eip;
  418. trapsignal(td, &ksi);
  419. return (EINVAL);
  420. }
  421. linux_to_bsd_sigset(&uc.uc_sigmask, &bmask);
  422. kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0);
  423. /* Restore signal context. */
  424. /* %gs was restored by the trampoline. */
  425. regs->tf_fs = context->sc_fs;
  426. regs->tf_es = context->sc_es;
  427. regs->tf_ds = context->sc_ds;
  428. regs->tf_edi = context->sc_edi;
  429. regs->tf_esi = context->sc_esi;
  430. regs->tf_ebp = context->sc_ebp;
  431. regs->tf_ebx = context->sc_ebx;
  432. regs->tf_edx = context->sc_edx;
  433. regs->tf_ecx = context->sc_ecx;
  434. regs->tf_eax = context->sc_eax;
  435. regs->tf_eip = context->sc_eip;
  436. regs->tf_cs = context->sc_cs;
  437. regs->tf_eflags = eflags;
  438. regs->tf_esp = context->sc_esp_at_signal;
  439. regs->tf_ss = context->sc_ss;
  440. /* Call sigaltstack & ignore results. */
  441. lss = &uc.uc_stack;
  442. ss.ss_sp = PTRIN(lss->ss_sp);
  443. ss.ss_size = lss->ss_size;
  444. ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags);
  445. (void)kern_sigaltstack(td, &ss, NULL);
  446. return (EJUSTRETURN);
  447. }
  448. static int
  449. linux_fetch_syscall_args(struct thread *td)
  450. {
  451. struct proc *p;
  452. struct trapframe *frame;
  453. struct syscall_args *sa;
  454. p = td->td_proc;
  455. frame = td->td_frame;
  456. sa = &td->td_sa;
  457. sa->code = frame->tf_eax;
  458. sa->original_code = sa->code;
  459. sa->args[0] = frame->tf_ebx;
  460. sa->args[1] = frame->tf_ecx;
  461. sa->args[2] = frame->tf_edx;
  462. sa->args[3] = frame->tf_esi;
  463. sa->args[4] = frame->tf_edi;
  464. sa->args[5] = frame->tf_ebp;
  465. if (sa->code >= p->p_sysent->sv_size)
  466. /* nosys */
  467. sa->callp = &nosys_sysent;
  468. else
  469. sa->callp = &p->p_sysent->sv_table[sa->code];
  470. td->td_retval[0] = 0;
  471. td->td_retval[1] = frame->tf_edx;
  472. return (0);
  473. }
  474. static void
  475. linux_set_syscall_retval(struct thread *td, int error)
  476. {
  477. struct trapframe *frame = td->td_frame;
  478. cpu_set_syscall_retval(td, error);
  479. if (__predict_false(error != 0)) {
  480. if (error != ERESTART && error != EJUSTRETURN)
  481. frame->tf_eax = bsd_to_linux_errno(error);
  482. }
  483. }
  484. static void
  485. linux_set_fork_retval(struct thread *td)
  486. {
  487. struct trapframe *frame = td->td_frame;
  488. frame->tf_eax = 0;
  489. }
  490. /*
  491. * exec_setregs may initialize some registers differently than Linux
  492. * does, thus potentially confusing Linux binaries. If necessary, we
  493. * override the exec_setregs default(s) here.
  494. */
  495. static void
  496. linux_exec_setregs(struct thread *td, struct image_params *imgp,
  497. uintptr_t stack)
  498. {
  499. struct pcb *pcb = td->td_pcb;
  500. exec_setregs(td, imgp, stack);
  501. /* Linux sets %gs to 0, we default to _udatasel. */
  502. pcb->pcb_gs = 0;
  503. load_gs(0);
  504. pcb->pcb_initial_npxcw = __LINUX_NPXCW__;
  505. }
  506. struct sysentvec linux_sysvec = {
  507. .sv_size = LINUX_SYS_MAXSYSCALL,
  508. .sv_table = linux_sysent,
  509. .sv_fixup = linux_fixup,
  510. .sv_sendsig = linux_sendsig,
  511. .sv_sigcode = &_binary_linux_vdso_so_o_start,
  512. .sv_szsigcode = &linux_szsigcode,
  513. .sv_name = "Linux a.out",
  514. .sv_coredump = NULL,
  515. .sv_minsigstksz = LINUX_MINSIGSTKSZ,
  516. .sv_minuser = VM_MIN_ADDRESS,
  517. .sv_maxuser = VM_MAXUSER_ADDRESS,
  518. .sv_usrstack = LINUX_USRSTACK,
  519. .sv_psstrings = PS_STRINGS,
  520. .sv_psstringssz = sizeof(struct ps_strings),
  521. .sv_stackprot = VM_PROT_ALL,
  522. .sv_copyout_strings = exec_copyout_strings,
  523. .sv_setregs = linux_exec_setregs,
  524. .sv_fixlimit = NULL,
  525. .sv_maxssiz = NULL,
  526. .sv_flags = SV_ABI_LINUX | SV_AOUT | SV_IA32 | SV_ILP32 |
  527. SV_SIG_DISCIGN | SV_SIG_WAITNDQ,
  528. .sv_set_syscall_retval = linux_set_syscall_retval,
  529. .sv_fetch_syscall_args = linux_fetch_syscall_args,
  530. .sv_syscallnames = linux_syscallnames,
  531. .sv_schedtail = linux_schedtail,
  532. .sv_thread_detach = linux_thread_detach,
  533. .sv_trap = NULL,
  534. .sv_hwcap = NULL,
  535. .sv_hwcap2 = NULL,
  536. .sv_onexec = linux_on_exec_vmspace,
  537. .sv_onexit = linux_on_exit,
  538. .sv_ontdexit = linux_thread_dtor,
  539. .sv_setid_allowed = &linux_setid_allowed_query,
  540. .sv_set_fork_retval = linux_set_fork_retval,
  541. };
  542. INIT_SYSENTVEC(aout_sysvec, &linux_sysvec);
  543. struct sysentvec elf_linux_sysvec = {
  544. .sv_size = LINUX_SYS_MAXSYSCALL,
  545. .sv_table = linux_sysent,
  546. .sv_fixup = __elfN(freebsd_fixup),
  547. .sv_sendsig = linux_sendsig,
  548. .sv_sigcode = &_binary_linux_vdso_so_o_start,
  549. .sv_szsigcode = &linux_szsigcode,
  550. .sv_name = "Linux ELF32",
  551. .sv_coredump = elf32_coredump,
  552. .sv_elf_core_osabi = ELFOSABI_NONE,
  553. .sv_elf_core_abi_vendor = LINUX_ABI_VENDOR,
  554. .sv_elf_core_prepare_notes = __linuxN(prepare_notes),
  555. .sv_minsigstksz = LINUX_MINSIGSTKSZ,
  556. .sv_minuser = VM_MIN_ADDRESS,
  557. .sv_maxuser = VM_MAXUSER_ADDRESS,
  558. .sv_usrstack = LINUX_USRSTACK,
  559. .sv_psstrings = LINUX_PS_STRINGS,
  560. .sv_psstringssz = sizeof(struct ps_strings),
  561. .sv_stackprot = VM_PROT_ALL,
  562. .sv_copyout_auxargs = __linuxN(copyout_auxargs),
  563. .sv_copyout_strings = __linuxN(copyout_strings),
  564. .sv_setregs = linux_exec_setregs,
  565. .sv_fixlimit = NULL,
  566. .sv_maxssiz = NULL,
  567. .sv_flags = SV_ABI_LINUX | SV_IA32 | SV_ILP32 | SV_SHP |
  568. SV_SIG_DISCIGN | SV_SIG_WAITNDQ | SV_TIMEKEEP,
  569. .sv_set_syscall_retval = linux_set_syscall_retval,
  570. .sv_fetch_syscall_args = linux_fetch_syscall_args,
  571. .sv_syscallnames = NULL,
  572. .sv_shared_page_base = LINUX_SHAREDPAGE,
  573. .sv_shared_page_len = PAGE_SIZE,
  574. .sv_schedtail = linux_schedtail,
  575. .sv_thread_detach = linux_thread_detach,
  576. .sv_trap = NULL,
  577. .sv_hwcap = NULL,
  578. .sv_hwcap2 = NULL,
  579. .sv_onexec = linux_on_exec_vmspace,
  580. .sv_onexit = linux_on_exit,
  581. .sv_ontdexit = linux_thread_dtor,
  582. .sv_setid_allowed = &linux_setid_allowed_query,
  583. .sv_set_fork_retval = linux_set_fork_retval,
  584. };
  585. static int
  586. linux_on_exec_vmspace(struct proc *p, struct image_params *imgp)
  587. {
  588. int error = 0;
  589. if (SV_PROC_FLAG(p, SV_SHP) != 0)
  590. error = linux_map_vdso(p, linux_vdso_obj,
  591. linux_vdso_base, LINUX_VDSOPAGE_SIZE, imgp);
  592. if (error == 0)
  593. error = linux_on_exec(p, imgp);
  594. return (error);
  595. }
  596. /*
  597. * linux_vdso_install() and linux_exec_sysvec_init() must be called
  598. * after exec_sysvec_init() which is SI_SUB_EXEC (SI_ORDER_ANY).
  599. */
  600. static void
  601. linux_exec_sysvec_init(void *param)
  602. {
  603. l_uintptr_t *ktimekeep_base, *ktsc_selector;
  604. struct sysentvec *sv;
  605. ptrdiff_t tkoff;
  606. sv = param;
  607. /* Fill timekeep_base */
  608. exec_sysvec_init(sv);
  609. tkoff = kern_timekeep_base - linux_vdso_base;
  610. ktimekeep_base = (l_uintptr_t *)(linux_vdso_mapping + tkoff);
  611. *ktimekeep_base = sv->sv_shared_page_base + sv->sv_timekeep_offset;
  612. tkoff = kern_tsc_selector - linux_vdso_base;
  613. ktsc_selector = (l_uintptr_t *)(linux_vdso_mapping + tkoff);
  614. *ktsc_selector = linux_vdso_tsc_selector_idx();
  615. if (bootverbose)
  616. printf("Linux i386 vDSO tsc_selector: %u\n", *ktsc_selector);
  617. tkoff = kern_cpu_selector - linux_vdso_base;
  618. ktsc_selector = (l_uintptr_t *)(linux_vdso_mapping + tkoff);
  619. *ktsc_selector = linux_vdso_cpu_selector_idx();
  620. if (bootverbose)
  621. printf("Linux i386 vDSO cpu_selector: %u\n", *ktsc_selector);
  622. }
  623. SYSINIT(elf_linux_exec_sysvec_init, SI_SUB_EXEC + 1, SI_ORDER_ANY,
  624. linux_exec_sysvec_init, &elf_linux_sysvec);
  625. static void
  626. linux_vdso_install(const void *param)
  627. {
  628. char *vdso_start = &_binary_linux_vdso_so_o_start;
  629. char *vdso_end = &_binary_linux_vdso_so_o_end;
  630. linux_szsigcode = vdso_end - vdso_start;
  631. MPASS(linux_szsigcode <= LINUX_VDSOPAGE_SIZE);
  632. linux_vdso_base = LINUX_VDSOPAGE;
  633. __elfN(linux_vdso_fixup)(vdso_start, linux_vdso_base);
  634. linux_vdso_obj = __elfN(linux_shared_page_init)
  635. (&linux_vdso_mapping, LINUX_VDSOPAGE_SIZE);
  636. bcopy(vdso_start, linux_vdso_mapping, linux_szsigcode);
  637. linux_vdso_reloc(linux_vdso_mapping, linux_vdso_base);
  638. }
  639. SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC + 1, SI_ORDER_FIRST,
  640. linux_vdso_install, NULL);
  641. static void
  642. linux_vdso_deinstall(const void *param)
  643. {
  644. __elfN(linux_shared_page_fini)(linux_vdso_obj,
  645. linux_vdso_mapping, LINUX_VDSOPAGE_SIZE);
  646. }
  647. SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST,
  648. linux_vdso_deinstall, NULL);
  649. static void
  650. linux_vdso_reloc(char *mapping, Elf_Addr offset)
  651. {
  652. const Elf_Shdr *shdr;
  653. const Elf_Rel *rel;
  654. const Elf_Ehdr *ehdr;
  655. Elf_Addr *where;
  656. Elf_Size rtype, symidx;
  657. Elf_Addr addr, addend;
  658. int i, relcnt;
  659. MPASS(offset != 0);
  660. relcnt = 0;
  661. ehdr = (const Elf_Ehdr *)mapping;
  662. shdr = (const Elf_Shdr *)(mapping + ehdr->e_shoff);
  663. for (i = 0; i < ehdr->e_shnum; i++)
  664. {
  665. switch (shdr[i].sh_type) {
  666. case SHT_REL:
  667. rel = (const Elf_Rel *)(mapping + shdr[i].sh_offset);
  668. relcnt = shdr[i].sh_size / sizeof(*rel);
  669. break;
  670. case SHT_RELA:
  671. printf("Linux i386 vDSO: unexpected Rela section\n");
  672. break;
  673. }
  674. }
  675. for (i = 0; i < relcnt; i++, rel++) {
  676. where = (Elf_Addr *)(mapping + rel->r_offset);
  677. addend = *where;
  678. rtype = ELF_R_TYPE(rel->r_info);
  679. symidx = ELF_R_SYM(rel->r_info);
  680. switch (rtype) {
  681. case R_386_NONE: /* none */
  682. break;
  683. case R_386_RELATIVE: /* B + A */
  684. addr = (Elf_Addr)PTROUT(offset + addend);
  685. if (*where != addr)
  686. *where = addr;
  687. break;
  688. case R_386_IRELATIVE:
  689. printf("Linux i386 vDSO: unexpected ifunc relocation, "
  690. "symbol index %d\n", symidx);
  691. break;
  692. default:
  693. printf("Linux i386 vDSO: unexpected relocation type %d, "
  694. "symbol index %d\n", rtype, symidx);
  695. }
  696. }
  697. }
  698. static Elf_Brandnote linux_brandnote = {
  699. .hdr.n_namesz = sizeof(GNU_ABI_VENDOR),
  700. .hdr.n_descsz = 16, /* XXX at least 16 */
  701. .hdr.n_type = 1,
  702. .vendor = GNU_ABI_VENDOR,
  703. .flags = BN_TRANSLATE_OSREL,
  704. .trans_osrel = linux_trans_osrel
  705. };
  706. static Elf32_Brandinfo linux_brand = {
  707. .brand = ELFOSABI_LINUX,
  708. .machine = EM_386,
  709. .compat_3_brand = "Linux",
  710. .interp_path = "/lib/ld-linux.so.1",
  711. .sysvec = &elf_linux_sysvec,
  712. .interp_newpath = NULL,
  713. .brand_note = &linux_brandnote,
  714. .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
  715. };
  716. static Elf32_Brandinfo linux_glibc2brand = {
  717. .brand = ELFOSABI_LINUX,
  718. .machine = EM_386,
  719. .compat_3_brand = "Linux",
  720. .interp_path = "/lib/ld-linux.so.2",
  721. .sysvec = &elf_linux_sysvec,
  722. .interp_newpath = NULL,
  723. .brand_note = &linux_brandnote,
  724. .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE
  725. };
  726. static Elf32_Brandinfo linux_muslbrand = {
  727. .brand = ELFOSABI_LINUX,
  728. .machine = EM_386,
  729. .compat_3_brand = "Linux",
  730. .interp_path = "/lib/ld-musl-i386.so.1",
  731. .sysvec = &elf_linux_sysvec,
  732. .interp_newpath = NULL,
  733. .brand_note = &linux_brandnote,
  734. .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE |
  735. LINUX_BI_FUTEX_REQUEUE
  736. };
  737. Elf32_Brandinfo *linux_brandlist[] = {
  738. &linux_brand,
  739. &linux_glibc2brand,
  740. &linux_muslbrand,
  741. NULL
  742. };
  743. static int
  744. linux_elf_modevent(module_t mod, int type, void *data)
  745. {
  746. Elf32_Brandinfo **brandinfo;
  747. int error;
  748. struct linux_ioctl_handler **lihp;
  749. error = 0;
  750. switch(type) {
  751. case MOD_LOAD:
  752. for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
  753. ++brandinfo)
  754. if (elf32_insert_brand_entry(*brandinfo) < 0)
  755. error = EINVAL;
  756. if (error == 0) {
  757. SET_FOREACH(lihp, linux_ioctl_handler_set)
  758. linux_ioctl_register_handler(*lihp);
  759. linux_dev_shm_create();
  760. linux_osd_jail_register();
  761. linux_netlink_register();
  762. stclohz = (stathz ? stathz : hz);
  763. if (bootverbose)
  764. printf("Linux ELF exec handler installed\n");
  765. } else
  766. printf("cannot insert Linux ELF brand handler\n");
  767. break;
  768. case MOD_UNLOAD:
  769. for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
  770. ++brandinfo)
  771. if (elf32_brand_inuse(*brandinfo))
  772. error = EBUSY;
  773. if (error == 0) {
  774. for (brandinfo = &linux_brandlist[0];
  775. *brandinfo != NULL; ++brandinfo)
  776. if (elf32_remove_brand_entry(*brandinfo) < 0)
  777. error = EINVAL;
  778. }
  779. if (error == 0) {
  780. SET_FOREACH(lihp, linux_ioctl_handler_set)
  781. linux_ioctl_unregister_handler(*lihp);
  782. linux_netlink_deregister();
  783. linux_dev_shm_destroy();
  784. linux_osd_jail_deregister();
  785. if (bootverbose)
  786. printf("Linux ELF exec handler removed\n");
  787. } else
  788. printf("Could not deinstall ELF interpreter entry\n");
  789. break;
  790. default:
  791. return (EOPNOTSUPP);
  792. }
  793. return (error);
  794. }
  795. static moduledata_t linux_elf_mod = {
  796. "linuxelf",
  797. linux_elf_modevent,
  798. 0
  799. };
  800. DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
  801. MODULE_DEPEND(linuxelf, netlink, 1, 1, 1);
  802. FEATURE(linux, "Linux 32bit support");