entry_32.S 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. /*
  3. * Copyright (C) 1991,1992 Linus Torvalds
  4. *
  5. * entry_32.S contains the system-call and low-level fault and trap handling routines.
  6. *
  7. * Stack layout while running C code:
  8. * ptrace needs to have all registers on the stack.
  9. * If the order here is changed, it needs to be
  10. * updated in fork.c:copy_process(), signal.c:do_signal(),
  11. * ptrace.c and ptrace.h
  12. *
  13. * 0(%esp) - %ebx
  14. * 4(%esp) - %ecx
  15. * 8(%esp) - %edx
  16. * C(%esp) - %esi
  17. * 10(%esp) - %edi
  18. * 14(%esp) - %ebp
  19. * 18(%esp) - %eax
  20. * 1C(%esp) - %ds
  21. * 20(%esp) - %es
  22. * 24(%esp) - %fs
  23. * 28(%esp) - %gs saved iff !CONFIG_X86_32_LAZY_GS
  24. * 2C(%esp) - orig_eax
  25. * 30(%esp) - %eip
  26. * 34(%esp) - %cs
  27. * 38(%esp) - %eflags
  28. * 3C(%esp) - %oldesp
  29. * 40(%esp) - %oldss
  30. */
  31. #include <linux/linkage.h>
  32. #include <linux/err.h>
  33. #include <asm/thread_info.h>
  34. #include <asm/irqflags.h>
  35. #include <asm/errno.h>
  36. #include <asm/segment.h>
  37. #include <asm/smp.h>
  38. #include <asm/percpu.h>
  39. #include <asm/processor-flags.h>
  40. #include <asm/irq_vectors.h>
  41. #include <asm/cpufeatures.h>
  42. #include <asm/alternative-asm.h>
  43. #include <asm/asm.h>
  44. #include <asm/smap.h>
  45. #include <asm/frame.h>
  46. #include <asm/nospec-branch.h>
  47. .section .entry.text, "ax"
  48. /*
  49. * We use macros for low-level operations which need to be overridden
  50. * for paravirtualization. The following will never clobber any registers:
  51. * INTERRUPT_RETURN (aka. "iret")
  52. * GET_CR0_INTO_EAX (aka. "movl %cr0, %eax")
  53. * ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit").
  54. *
  55. * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must
  56. * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY).
  57. * Allowing a register to be clobbered can shrink the paravirt replacement
  58. * enough to patch inline, increasing performance.
  59. */
  60. #ifdef CONFIG_PREEMPT
  61. # define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
  62. #else
  63. # define preempt_stop(clobbers)
  64. # define resume_kernel restore_all
  65. #endif
  66. .macro TRACE_IRQS_IRET
  67. #ifdef CONFIG_TRACE_IRQFLAGS
  68. testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off?
  69. jz 1f
  70. TRACE_IRQS_ON
  71. 1:
  72. #endif
  73. .endm
  74. /*
  75. * User gs save/restore
  76. *
  77. * %gs is used for userland TLS and kernel only uses it for stack
  78. * canary which is required to be at %gs:20 by gcc. Read the comment
  79. * at the top of stackprotector.h for more info.
  80. *
  81. * Local labels 98 and 99 are used.
  82. */
  83. #ifdef CONFIG_X86_32_LAZY_GS
  84. /* unfortunately push/pop can't be no-op */
  85. .macro PUSH_GS
  86. pushl $0
  87. .endm
  88. .macro POP_GS pop=0
  89. addl $(4 + \pop), %esp
  90. .endm
  91. .macro POP_GS_EX
  92. .endm
  93. /* all the rest are no-op */
  94. .macro PTGS_TO_GS
  95. .endm
  96. .macro PTGS_TO_GS_EX
  97. .endm
  98. .macro GS_TO_REG reg
  99. .endm
  100. .macro REG_TO_PTGS reg
  101. .endm
  102. .macro SET_KERNEL_GS reg
  103. .endm
  104. #else /* CONFIG_X86_32_LAZY_GS */
  105. .macro PUSH_GS
  106. pushl %gs
  107. .endm
  108. .macro POP_GS pop=0
  109. 98: popl %gs
  110. .if \pop <> 0
  111. add $\pop, %esp
  112. .endif
  113. .endm
  114. .macro POP_GS_EX
  115. .pushsection .fixup, "ax"
  116. 99: movl $0, (%esp)
  117. jmp 98b
  118. .popsection
  119. _ASM_EXTABLE(98b, 99b)
  120. .endm
  121. .macro PTGS_TO_GS
  122. 98: mov PT_GS(%esp), %gs
  123. .endm
  124. .macro PTGS_TO_GS_EX
  125. .pushsection .fixup, "ax"
  126. 99: movl $0, PT_GS(%esp)
  127. jmp 98b
  128. .popsection
  129. _ASM_EXTABLE(98b, 99b)
  130. .endm
  131. .macro GS_TO_REG reg
  132. movl %gs, \reg
  133. .endm
  134. .macro REG_TO_PTGS reg
  135. movl \reg, PT_GS(%esp)
  136. .endm
  137. .macro SET_KERNEL_GS reg
  138. movl $(__KERNEL_STACK_CANARY), \reg
  139. movl \reg, %gs
  140. .endm
  141. #endif /* CONFIG_X86_32_LAZY_GS */
  142. .macro SAVE_ALL pt_regs_ax=%eax
  143. cld
  144. PUSH_GS
  145. pushl %fs
  146. pushl %es
  147. pushl %ds
  148. pushl \pt_regs_ax
  149. pushl %ebp
  150. pushl %edi
  151. pushl %esi
  152. pushl %edx
  153. pushl %ecx
  154. pushl %ebx
  155. movl $(__USER_DS), %edx
  156. movl %edx, %ds
  157. movl %edx, %es
  158. movl $(__KERNEL_PERCPU), %edx
  159. movl %edx, %fs
  160. SET_KERNEL_GS %edx
  161. .endm
  162. /*
  163. * This is a sneaky trick to help the unwinder find pt_regs on the stack. The
  164. * frame pointer is replaced with an encoded pointer to pt_regs. The encoding
  165. * is just clearing the MSB, which makes it an invalid stack address and is also
  166. * a signal to the unwinder that it's a pt_regs pointer in disguise.
  167. *
  168. * NOTE: This macro must be used *after* SAVE_ALL because it corrupts the
  169. * original rbp.
  170. */
  171. .macro ENCODE_FRAME_POINTER
  172. #ifdef CONFIG_FRAME_POINTER
  173. mov %esp, %ebp
  174. andl $0x7fffffff, %ebp
  175. #endif
  176. .endm
  177. .macro RESTORE_INT_REGS
  178. popl %ebx
  179. popl %ecx
  180. popl %edx
  181. popl %esi
  182. popl %edi
  183. popl %ebp
  184. popl %eax
  185. .endm
  186. .macro RESTORE_REGS pop=0
  187. RESTORE_INT_REGS
  188. 1: popl %ds
  189. 2: popl %es
  190. 3: popl %fs
  191. POP_GS \pop
  192. .pushsection .fixup, "ax"
  193. 4: movl $0, (%esp)
  194. jmp 1b
  195. 5: movl $0, (%esp)
  196. jmp 2b
  197. 6: movl $0, (%esp)
  198. jmp 3b
  199. .popsection
  200. _ASM_EXTABLE(1b, 4b)
  201. _ASM_EXTABLE(2b, 5b)
  202. _ASM_EXTABLE(3b, 6b)
  203. POP_GS_EX
  204. .endm
  205. /*
  206. * %eax: prev task
  207. * %edx: next task
  208. */
  209. ENTRY(__switch_to_asm)
  210. /*
  211. * Save callee-saved registers
  212. * This must match the order in struct inactive_task_frame
  213. */
  214. pushl %ebp
  215. pushl %ebx
  216. pushl %edi
  217. pushl %esi
  218. pushfl
  219. /* switch stack */
  220. movl %esp, TASK_threadsp(%eax)
  221. movl TASK_threadsp(%edx), %esp
  222. #ifdef CONFIG_CC_STACKPROTECTOR
  223. movl TASK_stack_canary(%edx), %ebx
  224. movl %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
  225. #endif
  226. #ifdef CONFIG_RETPOLINE
  227. /*
  228. * When switching from a shallower to a deeper call stack
  229. * the RSB may either underflow or use entries populated
  230. * with userspace addresses. On CPUs where those concerns
  231. * exist, overwrite the RSB with entries which capture
  232. * speculative execution to prevent attack.
  233. */
  234. FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
  235. #endif
  236. /* restore callee-saved registers */
  237. popfl
  238. popl %esi
  239. popl %edi
  240. popl %ebx
  241. popl %ebp
  242. jmp __switch_to
  243. END(__switch_to_asm)
  244. /*
  245. * The unwinder expects the last frame on the stack to always be at the same
  246. * offset from the end of the page, which allows it to validate the stack.
  247. * Calling schedule_tail() directly would break that convention because its an
  248. * asmlinkage function so its argument has to be pushed on the stack. This
  249. * wrapper creates a proper "end of stack" frame header before the call.
  250. */
  251. ENTRY(schedule_tail_wrapper)
  252. FRAME_BEGIN
  253. pushl %eax
  254. call schedule_tail
  255. popl %eax
  256. FRAME_END
  257. ret
  258. ENDPROC(schedule_tail_wrapper)
  259. /*
  260. * A newly forked process directly context switches into this address.
  261. *
  262. * eax: prev task we switched from
  263. * ebx: kernel thread func (NULL for user thread)
  264. * edi: kernel thread arg
  265. */
  266. ENTRY(ret_from_fork)
  267. call schedule_tail_wrapper
  268. testl %ebx, %ebx
  269. jnz 1f /* kernel threads are uncommon */
  270. 2:
  271. /* When we fork, we trace the syscall return in the child, too. */
  272. movl %esp, %eax
  273. call syscall_return_slowpath
  274. jmp restore_all
  275. /* kernel thread */
  276. 1: movl %edi, %eax
  277. CALL_NOSPEC %ebx
  278. /*
  279. * A kernel thread is allowed to return here after successfully
  280. * calling do_execve(). Exit to userspace to complete the execve()
  281. * syscall.
  282. */
  283. movl $0, PT_EAX(%esp)
  284. jmp 2b
  285. END(ret_from_fork)
  286. /*
  287. * Return to user mode is not as complex as all this looks,
  288. * but we want the default path for a system call return to
  289. * go as quickly as possible which is why some of this is
  290. * less clear than it otherwise should be.
  291. */
  292. # userspace resumption stub bypassing syscall exit tracing
  293. ALIGN
  294. ret_from_exception:
  295. preempt_stop(CLBR_ANY)
  296. ret_from_intr:
  297. #ifdef CONFIG_VM86
  298. movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS
  299. movb PT_CS(%esp), %al
  300. andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax
  301. #else
  302. /*
  303. * We can be coming here from child spawned by kernel_thread().
  304. */
  305. movl PT_CS(%esp), %eax
  306. andl $SEGMENT_RPL_MASK, %eax
  307. #endif
  308. cmpl $USER_RPL, %eax
  309. jb resume_kernel # not returning to v8086 or userspace
  310. ENTRY(resume_userspace)
  311. DISABLE_INTERRUPTS(CLBR_ANY)
  312. TRACE_IRQS_OFF
  313. movl %esp, %eax
  314. call prepare_exit_to_usermode
  315. jmp restore_all
  316. END(ret_from_exception)
  317. #ifdef CONFIG_PREEMPT
  318. ENTRY(resume_kernel)
  319. DISABLE_INTERRUPTS(CLBR_ANY)
  320. .Lneed_resched:
  321. cmpl $0, PER_CPU_VAR(__preempt_count)
  322. jnz restore_all
  323. testl $X86_EFLAGS_IF, PT_EFLAGS(%esp) # interrupts off (exception path) ?
  324. jz restore_all
  325. call preempt_schedule_irq
  326. jmp .Lneed_resched
  327. END(resume_kernel)
  328. #endif
  329. GLOBAL(__begin_SYSENTER_singlestep_region)
  330. /*
  331. * All code from here through __end_SYSENTER_singlestep_region is subject
  332. * to being single-stepped if a user program sets TF and executes SYSENTER.
  333. * There is absolutely nothing that we can do to prevent this from happening
  334. * (thanks Intel!). To keep our handling of this situation as simple as
  335. * possible, we handle TF just like AC and NT, except that our #DB handler
  336. * will ignore all of the single-step traps generated in this range.
  337. */
  338. #ifdef CONFIG_XEN
  339. /*
  340. * Xen doesn't set %esp to be precisely what the normal SYSENTER
  341. * entry point expects, so fix it up before using the normal path.
  342. */
  343. ENTRY(xen_sysenter_target)
  344. addl $5*4, %esp /* remove xen-provided frame */
  345. jmp .Lsysenter_past_esp
  346. #endif
  347. /*
  348. * 32-bit SYSENTER entry.
  349. *
  350. * 32-bit system calls through the vDSO's __kernel_vsyscall enter here
  351. * if X86_FEATURE_SEP is available. This is the preferred system call
  352. * entry on 32-bit systems.
  353. *
  354. * The SYSENTER instruction, in principle, should *only* occur in the
  355. * vDSO. In practice, a small number of Android devices were shipped
  356. * with a copy of Bionic that inlined a SYSENTER instruction. This
  357. * never happened in any of Google's Bionic versions -- it only happened
  358. * in a narrow range of Intel-provided versions.
  359. *
  360. * SYSENTER loads SS, ESP, CS, and EIP from previously programmed MSRs.
  361. * IF and VM in RFLAGS are cleared (IOW: interrupts are off).
  362. * SYSENTER does not save anything on the stack,
  363. * and does not save old EIP (!!!), ESP, or EFLAGS.
  364. *
  365. * To avoid losing track of EFLAGS.VM (and thus potentially corrupting
  366. * user and/or vm86 state), we explicitly disable the SYSENTER
  367. * instruction in vm86 mode by reprogramming the MSRs.
  368. *
  369. * Arguments:
  370. * eax system call number
  371. * ebx arg1
  372. * ecx arg2
  373. * edx arg3
  374. * esi arg4
  375. * edi arg5
  376. * ebp user stack
  377. * 0(%ebp) arg6
  378. */
  379. ENTRY(entry_SYSENTER_32)
  380. movl TSS_sysenter_sp0(%esp), %esp
  381. .Lsysenter_past_esp:
  382. pushl $__USER_DS /* pt_regs->ss */
  383. pushl %ebp /* pt_regs->sp (stashed in bp) */
  384. pushfl /* pt_regs->flags (except IF = 0) */
  385. orl $X86_EFLAGS_IF, (%esp) /* Fix IF */
  386. pushl $__USER_CS /* pt_regs->cs */
  387. pushl $0 /* pt_regs->ip = 0 (placeholder) */
  388. pushl %eax /* pt_regs->orig_ax */
  389. SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest */
  390. /*
  391. * SYSENTER doesn't filter flags, so we need to clear NT, AC
  392. * and TF ourselves. To save a few cycles, we can check whether
  393. * either was set instead of doing an unconditional popfq.
  394. * This needs to happen before enabling interrupts so that
  395. * we don't get preempted with NT set.
  396. *
  397. * If TF is set, we will single-step all the way to here -- do_debug
  398. * will ignore all the traps. (Yes, this is slow, but so is
  399. * single-stepping in general. This allows us to avoid having
  400. * a more complicated code to handle the case where a user program
  401. * forces us to single-step through the SYSENTER entry code.)
  402. *
  403. * NB.: .Lsysenter_fix_flags is a label with the code under it moved
  404. * out-of-line as an optimization: NT is unlikely to be set in the
  405. * majority of the cases and instead of polluting the I$ unnecessarily,
  406. * we're keeping that code behind a branch which will predict as
  407. * not-taken and therefore its instructions won't be fetched.
  408. */
  409. testl $X86_EFLAGS_NT|X86_EFLAGS_AC|X86_EFLAGS_TF, PT_EFLAGS(%esp)
  410. jnz .Lsysenter_fix_flags
  411. .Lsysenter_flags_fixed:
  412. /*
  413. * User mode is traced as though IRQs are on, and SYSENTER
  414. * turned them off.
  415. */
  416. TRACE_IRQS_OFF
  417. movl %esp, %eax
  418. call do_fast_syscall_32
  419. /* XEN PV guests always use IRET path */
  420. ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \
  421. "jmp .Lsyscall_32_done", X86_FEATURE_XENPV
  422. /* Opportunistic SYSEXIT */
  423. TRACE_IRQS_ON /* User mode traces as IRQs on. */
  424. movl PT_EIP(%esp), %edx /* pt_regs->ip */
  425. movl PT_OLDESP(%esp), %ecx /* pt_regs->sp */
  426. 1: mov PT_FS(%esp), %fs
  427. PTGS_TO_GS
  428. popl %ebx /* pt_regs->bx */
  429. addl $2*4, %esp /* skip pt_regs->cx and pt_regs->dx */
  430. popl %esi /* pt_regs->si */
  431. popl %edi /* pt_regs->di */
  432. popl %ebp /* pt_regs->bp */
  433. popl %eax /* pt_regs->ax */
  434. /*
  435. * Restore all flags except IF. (We restore IF separately because
  436. * STI gives a one-instruction window in which we won't be interrupted,
  437. * whereas POPF does not.)
  438. */
  439. addl $PT_EFLAGS-PT_DS, %esp /* point esp at pt_regs->flags */
  440. btr $X86_EFLAGS_IF_BIT, (%esp)
  441. popfl
  442. /*
  443. * Return back to the vDSO, which will pop ecx and edx.
  444. * Don't bother with DS and ES (they already contain __USER_DS).
  445. */
  446. sti
  447. sysexit
  448. .pushsection .fixup, "ax"
  449. 2: movl $0, PT_FS(%esp)
  450. jmp 1b
  451. .popsection
  452. _ASM_EXTABLE(1b, 2b)
  453. PTGS_TO_GS_EX
  454. .Lsysenter_fix_flags:
  455. pushl $X86_EFLAGS_FIXED
  456. popfl
  457. jmp .Lsysenter_flags_fixed
  458. GLOBAL(__end_SYSENTER_singlestep_region)
  459. ENDPROC(entry_SYSENTER_32)
  460. /*
  461. * 32-bit legacy system call entry.
  462. *
  463. * 32-bit x86 Linux system calls traditionally used the INT $0x80
  464. * instruction. INT $0x80 lands here.
  465. *
  466. * This entry point can be used by any 32-bit perform system calls.
  467. * Instances of INT $0x80 can be found inline in various programs and
  468. * libraries. It is also used by the vDSO's __kernel_vsyscall
  469. * fallback for hardware that doesn't support a faster entry method.
  470. * Restarted 32-bit system calls also fall back to INT $0x80
  471. * regardless of what instruction was originally used to do the system
  472. * call. (64-bit programs can use INT $0x80 as well, but they can
  473. * only run on 64-bit kernels and therefore land in
  474. * entry_INT80_compat.)
  475. *
  476. * This is considered a slow path. It is not used by most libc
  477. * implementations on modern hardware except during process startup.
  478. *
  479. * Arguments:
  480. * eax system call number
  481. * ebx arg1
  482. * ecx arg2
  483. * edx arg3
  484. * esi arg4
  485. * edi arg5
  486. * ebp arg6
  487. */
  488. ENTRY(entry_INT80_32)
  489. ASM_CLAC
  490. pushl %eax /* pt_regs->orig_ax */
  491. SAVE_ALL pt_regs_ax=$-ENOSYS /* save rest */
  492. /*
  493. * User mode is traced as though IRQs are on, and the interrupt gate
  494. * turned them off.
  495. */
  496. TRACE_IRQS_OFF
  497. movl %esp, %eax
  498. call do_int80_syscall_32
  499. .Lsyscall_32_done:
  500. restore_all:
  501. TRACE_IRQS_IRET
  502. .Lrestore_all_notrace:
  503. #ifdef CONFIG_X86_ESPFIX32
  504. ALTERNATIVE "jmp .Lrestore_nocheck", "", X86_BUG_ESPFIX
  505. movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS
  506. /*
  507. * Warning: PT_OLDSS(%esp) contains the wrong/random values if we
  508. * are returning to the kernel.
  509. * See comments in process.c:copy_thread() for details.
  510. */
  511. movb PT_OLDSS(%esp), %ah
  512. movb PT_CS(%esp), %al
  513. andl $(X86_EFLAGS_VM | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
  514. cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
  515. je .Lldt_ss # returning to user-space with LDT SS
  516. #endif
  517. .Lrestore_nocheck:
  518. RESTORE_REGS 4 # skip orig_eax/error_code
  519. .Lirq_return:
  520. INTERRUPT_RETURN
  521. .section .fixup, "ax"
  522. ENTRY(iret_exc )
  523. pushl $0 # no error code
  524. pushl $do_iret_error
  525. jmp common_exception
  526. .previous
  527. _ASM_EXTABLE(.Lirq_return, iret_exc)
  528. #ifdef CONFIG_X86_ESPFIX32
  529. .Lldt_ss:
  530. /*
  531. * Setup and switch to ESPFIX stack
  532. *
  533. * We're returning to userspace with a 16 bit stack. The CPU will not
  534. * restore the high word of ESP for us on executing iret... This is an
  535. * "official" bug of all the x86-compatible CPUs, which we can work
  536. * around to make dosemu and wine happy. We do this by preloading the
  537. * high word of ESP with the high word of the userspace ESP while
  538. * compensating for the offset by changing to the ESPFIX segment with
  539. * a base address that matches for the difference.
  540. */
  541. #define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + (GDT_ENTRY_ESPFIX_SS * 8)
  542. mov %esp, %edx /* load kernel esp */
  543. mov PT_OLDESP(%esp), %eax /* load userspace esp */
  544. mov %dx, %ax /* eax: new kernel esp */
  545. sub %eax, %edx /* offset (low word is 0) */
  546. shr $16, %edx
  547. mov %dl, GDT_ESPFIX_SS + 4 /* bits 16..23 */
  548. mov %dh, GDT_ESPFIX_SS + 7 /* bits 24..31 */
  549. pushl $__ESPFIX_SS
  550. pushl %eax /* new kernel esp */
  551. /*
  552. * Disable interrupts, but do not irqtrace this section: we
  553. * will soon execute iret and the tracer was already set to
  554. * the irqstate after the IRET:
  555. */
  556. DISABLE_INTERRUPTS(CLBR_ANY)
  557. lss (%esp), %esp /* switch to espfix segment */
  558. jmp .Lrestore_nocheck
  559. #endif
  560. ENDPROC(entry_INT80_32)
  561. .macro FIXUP_ESPFIX_STACK
  562. /*
  563. * Switch back for ESPFIX stack to the normal zerobased stack
  564. *
  565. * We can't call C functions using the ESPFIX stack. This code reads
  566. * the high word of the segment base from the GDT and swiches to the
  567. * normal stack and adjusts ESP with the matching offset.
  568. */
  569. #ifdef CONFIG_X86_ESPFIX32
  570. /* fixup the stack */
  571. mov GDT_ESPFIX_SS + 4, %al /* bits 16..23 */
  572. mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */
  573. shl $16, %eax
  574. addl %esp, %eax /* the adjusted stack pointer */
  575. pushl $__KERNEL_DS
  576. pushl %eax
  577. lss (%esp), %esp /* switch to the normal stack segment */
  578. #endif
  579. .endm
  580. .macro UNWIND_ESPFIX_STACK
  581. #ifdef CONFIG_X86_ESPFIX32
  582. movl %ss, %eax
  583. /* see if on espfix stack */
  584. cmpw $__ESPFIX_SS, %ax
  585. jne 27f
  586. movl $__KERNEL_DS, %eax
  587. movl %eax, %ds
  588. movl %eax, %es
  589. /* switch to normal stack */
  590. FIXUP_ESPFIX_STACK
  591. 27:
  592. #endif
  593. .endm
  594. /*
  595. * Build the entry stubs with some assembler magic.
  596. * We pack 1 stub into every 8-byte block.
  597. */
  598. .align 8
  599. ENTRY(irq_entries_start)
  600. vector=FIRST_EXTERNAL_VECTOR
  601. .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
  602. pushl $(~vector+0x80) /* Note: always in signed byte range */
  603. vector=vector+1
  604. jmp common_interrupt
  605. .align 8
  606. .endr
  607. END(irq_entries_start)
  608. /*
  609. * the CPU automatically disables interrupts when executing an IRQ vector,
  610. * so IRQ-flags tracing has to follow that:
  611. */
  612. .p2align CONFIG_X86_L1_CACHE_SHIFT
  613. common_interrupt:
  614. ASM_CLAC
  615. addl $-0x80, (%esp) /* Adjust vector into the [-256, -1] range */
  616. SAVE_ALL
  617. ENCODE_FRAME_POINTER
  618. TRACE_IRQS_OFF
  619. movl %esp, %eax
  620. call do_IRQ
  621. jmp ret_from_intr
  622. ENDPROC(common_interrupt)
  623. #define BUILD_INTERRUPT3(name, nr, fn) \
  624. ENTRY(name) \
  625. ASM_CLAC; \
  626. pushl $~(nr); \
  627. SAVE_ALL; \
  628. ENCODE_FRAME_POINTER; \
  629. TRACE_IRQS_OFF \
  630. movl %esp, %eax; \
  631. call fn; \
  632. jmp ret_from_intr; \
  633. ENDPROC(name)
  634. #define BUILD_INTERRUPT(name, nr) \
  635. BUILD_INTERRUPT3(name, nr, smp_##name); \
  636. /* The include is where all of the SMP etc. interrupts come from */
  637. #include <asm/entry_arch.h>
  638. ENTRY(coprocessor_error)
  639. ASM_CLAC
  640. pushl $0
  641. pushl $do_coprocessor_error
  642. jmp common_exception
  643. END(coprocessor_error)
  644. ENTRY(simd_coprocessor_error)
  645. ASM_CLAC
  646. pushl $0
  647. #ifdef CONFIG_X86_INVD_BUG
  648. /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */
  649. ALTERNATIVE "pushl $do_general_protection", \
  650. "pushl $do_simd_coprocessor_error", \
  651. X86_FEATURE_XMM
  652. #else
  653. pushl $do_simd_coprocessor_error
  654. #endif
  655. jmp common_exception
  656. END(simd_coprocessor_error)
  657. ENTRY(device_not_available)
  658. ASM_CLAC
  659. pushl $-1 # mark this as an int
  660. pushl $do_device_not_available
  661. jmp common_exception
  662. END(device_not_available)
  663. #ifdef CONFIG_PARAVIRT
  664. ENTRY(native_iret)
  665. iret
  666. _ASM_EXTABLE(native_iret, iret_exc)
  667. END(native_iret)
  668. #endif
  669. ENTRY(overflow)
  670. ASM_CLAC
  671. pushl $0
  672. pushl $do_overflow
  673. jmp common_exception
  674. END(overflow)
  675. ENTRY(bounds)
  676. ASM_CLAC
  677. pushl $0
  678. pushl $do_bounds
  679. jmp common_exception
  680. END(bounds)
  681. ENTRY(invalid_op)
  682. ASM_CLAC
  683. pushl $0
  684. pushl $do_invalid_op
  685. jmp common_exception
  686. END(invalid_op)
  687. ENTRY(coprocessor_segment_overrun)
  688. ASM_CLAC
  689. pushl $0
  690. pushl $do_coprocessor_segment_overrun
  691. jmp common_exception
  692. END(coprocessor_segment_overrun)
  693. ENTRY(invalid_TSS)
  694. ASM_CLAC
  695. pushl $do_invalid_TSS
  696. jmp common_exception
  697. END(invalid_TSS)
  698. ENTRY(segment_not_present)
  699. ASM_CLAC
  700. pushl $do_segment_not_present
  701. jmp common_exception
  702. END(segment_not_present)
  703. ENTRY(stack_segment)
  704. ASM_CLAC
  705. pushl $do_stack_segment
  706. jmp common_exception
  707. END(stack_segment)
  708. ENTRY(alignment_check)
  709. ASM_CLAC
  710. pushl $do_alignment_check
  711. jmp common_exception
  712. END(alignment_check)
  713. ENTRY(divide_error)
  714. ASM_CLAC
  715. pushl $0 # no error code
  716. pushl $do_divide_error
  717. jmp common_exception
  718. END(divide_error)
  719. #ifdef CONFIG_X86_MCE
  720. ENTRY(machine_check)
  721. ASM_CLAC
  722. pushl $0
  723. pushl machine_check_vector
  724. jmp common_exception
  725. END(machine_check)
  726. #endif
  727. ENTRY(spurious_interrupt_bug)
  728. ASM_CLAC
  729. pushl $0
  730. pushl $do_spurious_interrupt_bug
  731. jmp common_exception
  732. END(spurious_interrupt_bug)
  733. #ifdef CONFIG_XEN
  734. ENTRY(xen_hypervisor_callback)
  735. pushl $-1 /* orig_ax = -1 => not a system call */
  736. SAVE_ALL
  737. ENCODE_FRAME_POINTER
  738. TRACE_IRQS_OFF
  739. /*
  740. * Check to see if we got the event in the critical
  741. * region in xen_iret_direct, after we've reenabled
  742. * events and checked for pending events. This simulates
  743. * iret instruction's behaviour where it delivers a
  744. * pending interrupt when enabling interrupts:
  745. */
  746. movl PT_EIP(%esp), %eax
  747. cmpl $xen_iret_start_crit, %eax
  748. jb 1f
  749. cmpl $xen_iret_end_crit, %eax
  750. jae 1f
  751. jmp xen_iret_crit_fixup
  752. ENTRY(xen_do_upcall)
  753. 1: mov %esp, %eax
  754. call xen_evtchn_do_upcall
  755. #ifndef CONFIG_PREEMPT
  756. call xen_maybe_preempt_hcall
  757. #endif
  758. jmp ret_from_intr
  759. ENDPROC(xen_hypervisor_callback)
  760. /*
  761. * Hypervisor uses this for application faults while it executes.
  762. * We get here for two reasons:
  763. * 1. Fault while reloading DS, ES, FS or GS
  764. * 2. Fault while executing IRET
  765. * Category 1 we fix up by reattempting the load, and zeroing the segment
  766. * register if the load fails.
  767. * Category 2 we fix up by jumping to do_iret_error. We cannot use the
  768. * normal Linux return path in this case because if we use the IRET hypercall
  769. * to pop the stack frame we end up in an infinite loop of failsafe callbacks.
  770. * We distinguish between categories by maintaining a status value in EAX.
  771. */
  772. ENTRY(xen_failsafe_callback)
  773. pushl %eax
  774. movl $1, %eax
  775. 1: mov 4(%esp), %ds
  776. 2: mov 8(%esp), %es
  777. 3: mov 12(%esp), %fs
  778. 4: mov 16(%esp), %gs
  779. /* EAX == 0 => Category 1 (Bad segment)
  780. EAX != 0 => Category 2 (Bad IRET) */
  781. testl %eax, %eax
  782. popl %eax
  783. lea 16(%esp), %esp
  784. jz 5f
  785. jmp iret_exc
  786. 5: pushl $-1 /* orig_ax = -1 => not a system call */
  787. SAVE_ALL
  788. ENCODE_FRAME_POINTER
  789. jmp ret_from_exception
  790. .section .fixup, "ax"
  791. 6: xorl %eax, %eax
  792. movl %eax, 4(%esp)
  793. jmp 1b
  794. 7: xorl %eax, %eax
  795. movl %eax, 8(%esp)
  796. jmp 2b
  797. 8: xorl %eax, %eax
  798. movl %eax, 12(%esp)
  799. jmp 3b
  800. 9: xorl %eax, %eax
  801. movl %eax, 16(%esp)
  802. jmp 4b
  803. .previous
  804. _ASM_EXTABLE(1b, 6b)
  805. _ASM_EXTABLE(2b, 7b)
  806. _ASM_EXTABLE(3b, 8b)
  807. _ASM_EXTABLE(4b, 9b)
  808. ENDPROC(xen_failsafe_callback)
  809. BUILD_INTERRUPT3(xen_hvm_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
  810. xen_evtchn_do_upcall)
  811. #endif /* CONFIG_XEN */
  812. #if IS_ENABLED(CONFIG_HYPERV)
  813. BUILD_INTERRUPT3(hyperv_callback_vector, HYPERVISOR_CALLBACK_VECTOR,
  814. hyperv_vector_handler)
  815. #endif /* CONFIG_HYPERV */
  816. ENTRY(page_fault)
  817. ASM_CLAC
  818. pushl $do_page_fault
  819. ALIGN
  820. jmp common_exception
  821. END(page_fault)
  822. common_exception:
  823. /* the function address is in %gs's slot on the stack */
  824. pushl %fs
  825. pushl %es
  826. pushl %ds
  827. pushl %eax
  828. pushl %ebp
  829. pushl %edi
  830. pushl %esi
  831. pushl %edx
  832. pushl %ecx
  833. pushl %ebx
  834. ENCODE_FRAME_POINTER
  835. cld
  836. movl $(__KERNEL_PERCPU), %ecx
  837. movl %ecx, %fs
  838. UNWIND_ESPFIX_STACK
  839. GS_TO_REG %ecx
  840. movl PT_GS(%esp), %edi # get the function address
  841. movl PT_ORIG_EAX(%esp), %edx # get the error code
  842. movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
  843. REG_TO_PTGS %ecx
  844. SET_KERNEL_GS %ecx
  845. movl $(__USER_DS), %ecx
  846. movl %ecx, %ds
  847. movl %ecx, %es
  848. TRACE_IRQS_OFF
  849. movl %esp, %eax # pt_regs pointer
  850. CALL_NOSPEC %edi
  851. jmp ret_from_exception
  852. END(common_exception)
  853. ENTRY(debug)
  854. /*
  855. * #DB can happen at the first instruction of
  856. * entry_SYSENTER_32 or in Xen's SYSENTER prologue. If this
  857. * happens, then we will be running on a very small stack. We
  858. * need to detect this condition and switch to the thread
  859. * stack before calling any C code at all.
  860. *
  861. * If you edit this code, keep in mind that NMIs can happen in here.
  862. */
  863. ASM_CLAC
  864. pushl $-1 # mark this as an int
  865. SAVE_ALL
  866. ENCODE_FRAME_POINTER
  867. xorl %edx, %edx # error code 0
  868. movl %esp, %eax # pt_regs pointer
  869. /* Are we currently on the SYSENTER stack? */
  870. movl PER_CPU_VAR(cpu_entry_area), %ecx
  871. addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
  872. subl %eax, %ecx /* ecx = (end of entry_stack) - esp */
  873. cmpl $SIZEOF_entry_stack, %ecx
  874. jb .Ldebug_from_sysenter_stack
  875. TRACE_IRQS_OFF
  876. call do_debug
  877. jmp ret_from_exception
  878. .Ldebug_from_sysenter_stack:
  879. /* We're on the SYSENTER stack. Switch off. */
  880. movl %esp, %ebx
  881. movl PER_CPU_VAR(cpu_current_top_of_stack), %esp
  882. TRACE_IRQS_OFF
  883. call do_debug
  884. movl %ebx, %esp
  885. jmp ret_from_exception
  886. END(debug)
  887. /*
  888. * NMI is doubly nasty. It can happen on the first instruction of
  889. * entry_SYSENTER_32 (just like #DB), but it can also interrupt the beginning
  890. * of the #DB handler even if that #DB in turn hit before entry_SYSENTER_32
  891. * switched stacks. We handle both conditions by simply checking whether we
  892. * interrupted kernel code running on the SYSENTER stack.
  893. */
  894. ENTRY(nmi)
  895. ASM_CLAC
  896. #ifdef CONFIG_X86_ESPFIX32
  897. pushl %eax
  898. movl %ss, %eax
  899. cmpw $__ESPFIX_SS, %ax
  900. popl %eax
  901. je .Lnmi_espfix_stack
  902. #endif
  903. pushl %eax # pt_regs->orig_ax
  904. SAVE_ALL
  905. ENCODE_FRAME_POINTER
  906. xorl %edx, %edx # zero error code
  907. movl %esp, %eax # pt_regs pointer
  908. /* Are we currently on the SYSENTER stack? */
  909. movl PER_CPU_VAR(cpu_entry_area), %ecx
  910. addl $CPU_ENTRY_AREA_entry_stack + SIZEOF_entry_stack, %ecx
  911. subl %eax, %ecx /* ecx = (end of entry_stack) - esp */
  912. cmpl $SIZEOF_entry_stack, %ecx
  913. jb .Lnmi_from_sysenter_stack
  914. /* Not on SYSENTER stack. */
  915. call do_nmi
  916. jmp .Lrestore_all_notrace
  917. .Lnmi_from_sysenter_stack:
  918. /*
  919. * We're on the SYSENTER stack. Switch off. No one (not even debug)
  920. * is using the thread stack right now, so it's safe for us to use it.
  921. */
  922. movl %esp, %ebx
  923. movl PER_CPU_VAR(cpu_current_top_of_stack), %esp
  924. call do_nmi
  925. movl %ebx, %esp
  926. jmp .Lrestore_all_notrace
  927. #ifdef CONFIG_X86_ESPFIX32
  928. .Lnmi_espfix_stack:
  929. /*
  930. * create the pointer to lss back
  931. */
  932. pushl %ss
  933. pushl %esp
  934. addl $4, (%esp)
  935. /* copy the iret frame of 12 bytes */
  936. .rept 3
  937. pushl 16(%esp)
  938. .endr
  939. pushl %eax
  940. SAVE_ALL
  941. ENCODE_FRAME_POINTER
  942. FIXUP_ESPFIX_STACK # %eax == %esp
  943. xorl %edx, %edx # zero error code
  944. call do_nmi
  945. RESTORE_REGS
  946. lss 12+4(%esp), %esp # back to espfix stack
  947. jmp .Lirq_return
  948. #endif
  949. END(nmi)
  950. ENTRY(int3)
  951. ASM_CLAC
  952. pushl $-1 # mark this as an int
  953. SAVE_ALL
  954. ENCODE_FRAME_POINTER
  955. TRACE_IRQS_OFF
  956. xorl %edx, %edx # zero error code
  957. movl %esp, %eax # pt_regs pointer
  958. call do_int3
  959. jmp ret_from_exception
  960. END(int3)
  961. ENTRY(general_protection)
  962. ASM_CLAC
  963. pushl $do_general_protection
  964. jmp common_exception
  965. END(general_protection)
  966. #ifdef CONFIG_KVM_GUEST
  967. ENTRY(async_page_fault)
  968. ASM_CLAC
  969. pushl $do_async_page_fault
  970. jmp common_exception
  971. END(async_page_fault)
  972. #endif
  973. ENTRY(rewind_stack_do_exit)
  974. /* Prevent any naive code from trying to unwind to our caller. */
  975. xorl %ebp, %ebp
  976. movl PER_CPU_VAR(cpu_current_top_of_stack), %esi
  977. leal -TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%esi), %esp
  978. call do_exit
  979. 1: jmp 1b
  980. END(rewind_stack_do_exit)