sandbox-seccomp-filter.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466
  1. /*
  2. * Copyright (c) 2012 Will Drewry <wad@dataspill.org>
  3. *
  4. * Permission to use, copy, modify, and distribute this software for any
  5. * purpose with or without fee is hereby granted, provided that the above
  6. * copyright notice and this permission notice appear in all copies.
  7. *
  8. * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  9. * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  10. * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  11. * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  12. * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  13. * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  14. * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  15. */
  16. /*
  17. * Uncomment the SANDBOX_SECCOMP_FILTER_DEBUG macro below to help diagnose
  18. * filter breakage during development. *Do not* use this in production,
  19. * as it relies on making library calls that are unsafe in signal context.
  20. *
  21. * Instead, live systems the auditctl(8) may be used to monitor failures.
  22. * E.g.
  23. * auditctl -a task,always -F uid=<privsep uid>
  24. */
  25. /* #define SANDBOX_SECCOMP_FILTER_DEBUG 1 */
  26. /* XXX it should be possible to do logging via the log socket safely */
  27. #ifdef SANDBOX_SECCOMP_FILTER_DEBUG
  28. /* Use the kernel headers in case of an older toolchain. */
  29. # include <asm/siginfo.h>
  30. # define __have_siginfo_t 1
  31. # define __have_sigval_t 1
  32. # define __have_sigevent_t 1
  33. #endif /* SANDBOX_SECCOMP_FILTER_DEBUG */
  34. #include "includes.h"
  35. #ifdef SANDBOX_SECCOMP_FILTER
  36. #include <sys/types.h>
  37. #include <sys/resource.h>
  38. #include <sys/prctl.h>
  39. #include <sys/mman.h>
  40. #include <sys/syscall.h>
  41. #include <linux/net.h>
  42. #include <linux/audit.h>
  43. #include <linux/filter.h>
  44. #include <linux/seccomp.h>
  45. #include <elf.h>
  46. #include <asm/unistd.h>
  47. #ifdef __s390__
  48. #include <asm/zcrypt.h>
  49. #endif
  50. #include <errno.h>
  51. #include <signal.h>
  52. #include <stdarg.h>
  53. #include <stddef.h> /* for offsetof */
  54. #include <stdio.h>
  55. #include <stdlib.h>
  56. #include <string.h>
  57. #include <unistd.h>
  58. #include "log.h"
  59. #include "ssh-sandbox.h"
  60. #include "xmalloc.h"
  61. /* Linux seccomp_filter sandbox */
  62. #define SECCOMP_FILTER_FAIL SECCOMP_RET_KILL
  63. /* Use a signal handler to emit violations when debugging */
  64. #ifdef SANDBOX_SECCOMP_FILTER_DEBUG
  65. # undef SECCOMP_FILTER_FAIL
  66. # define SECCOMP_FILTER_FAIL SECCOMP_RET_TRAP
  67. #endif /* SANDBOX_SECCOMP_FILTER_DEBUG */
  68. #if __BYTE_ORDER == __LITTLE_ENDIAN
  69. # define ARG_LO_OFFSET 0
  70. # define ARG_HI_OFFSET sizeof(uint32_t)
  71. #elif __BYTE_ORDER == __BIG_ENDIAN
  72. # define ARG_LO_OFFSET sizeof(uint32_t)
  73. # define ARG_HI_OFFSET 0
  74. #else
  75. #error "Unknown endianness"
  76. #endif
  77. /* Simple helpers to avoid manual errors (but larger BPF programs). */
  78. #define SC_DENY(_nr, _errno) \
  79. BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (_nr), 0, 1), \
  80. BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ERRNO|(_errno))
  81. #define SC_ALLOW(_nr) \
  82. BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (_nr), 0, 1), \
  83. BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
  84. #define SC_ALLOW_ARG(_nr, _arg_nr, _arg_val) \
  85. BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (_nr), 0, 6), \
  86. /* load and test syscall argument, low word */ \
  87. BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \
  88. offsetof(struct seccomp_data, args[(_arg_nr)]) + ARG_LO_OFFSET), \
  89. BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, \
  90. ((_arg_val) & 0xFFFFFFFF), 0, 3), \
  91. /* load and test syscall argument, high word */ \
  92. BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \
  93. offsetof(struct seccomp_data, args[(_arg_nr)]) + ARG_HI_OFFSET), \
  94. BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, \
  95. (((uint32_t)((uint64_t)(_arg_val) >> 32)) & 0xFFFFFFFF), 0, 1), \
  96. BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), \
  97. /* reload syscall number; all rules expect it in accumulator */ \
  98. BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \
  99. offsetof(struct seccomp_data, nr))
  100. /* Allow if syscall argument contains only values in mask */
  101. #define SC_ALLOW_ARG_MASK(_nr, _arg_nr, _arg_mask) \
  102. BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, (_nr), 0, 8), \
  103. /* load, mask and test syscall argument, low word */ \
  104. BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \
  105. offsetof(struct seccomp_data, args[(_arg_nr)]) + ARG_LO_OFFSET), \
  106. BPF_STMT(BPF_ALU+BPF_AND+BPF_K, ~((_arg_mask) & 0xFFFFFFFF)), \
  107. BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 0, 0, 4), \
  108. /* load, mask and test syscall argument, high word */ \
  109. BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \
  110. offsetof(struct seccomp_data, args[(_arg_nr)]) + ARG_HI_OFFSET), \
  111. BPF_STMT(BPF_ALU+BPF_AND+BPF_K, \
  112. ~(((uint32_t)((uint64_t)(_arg_mask) >> 32)) & 0xFFFFFFFF)), \
  113. BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, 0, 0, 1), \
  114. BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), \
  115. /* reload syscall number; all rules expect it in accumulator */ \
  116. BPF_STMT(BPF_LD+BPF_W+BPF_ABS, \
  117. offsetof(struct seccomp_data, nr))
  118. /* Syscall filtering set for preauth. */
  119. static const struct sock_filter preauth_insns[] = {
  120. /* Ensure the syscall arch convention is as expected. */
  121. BPF_STMT(BPF_LD+BPF_W+BPF_ABS,
  122. offsetof(struct seccomp_data, arch)),
  123. BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, SECCOMP_AUDIT_ARCH, 1, 0),
  124. BPF_STMT(BPF_RET+BPF_K, SECCOMP_FILTER_FAIL),
  125. /* Load the syscall number for checking. */
  126. BPF_STMT(BPF_LD+BPF_W+BPF_ABS,
  127. offsetof(struct seccomp_data, nr)),
  128. /* Syscalls to non-fatally deny */
  129. #ifdef __NR_lstat
  130. SC_DENY(__NR_lstat, EACCES),
  131. #endif
  132. #ifdef __NR_lstat64
  133. SC_DENY(__NR_lstat64, EACCES),
  134. #endif
  135. #ifdef __NR_fstat
  136. SC_DENY(__NR_fstat, EACCES),
  137. #endif
  138. #ifdef __NR_fstat64
  139. SC_DENY(__NR_fstat64, EACCES),
  140. #endif
  141. #ifdef __NR_open
  142. SC_DENY(__NR_open, EACCES),
  143. #endif
  144. #ifdef __NR_openat
  145. SC_DENY(__NR_openat, EACCES),
  146. #endif
  147. #ifdef __NR_newfstatat
  148. SC_DENY(__NR_newfstatat, EACCES),
  149. #endif
  150. #ifdef __NR_stat
  151. SC_DENY(__NR_stat, EACCES),
  152. #endif
  153. #ifdef __NR_stat64
  154. SC_DENY(__NR_stat64, EACCES),
  155. #endif
  156. #ifdef __NR_shmget
  157. SC_DENY(__NR_shmget, EACCES),
  158. #endif
  159. #ifdef __NR_shmat
  160. SC_DENY(__NR_shmat, EACCES),
  161. #endif
  162. #ifdef __NR_shmdt
  163. SC_DENY(__NR_shmdt, EACCES),
  164. #endif
  165. #ifdef __NR_ipc
  166. SC_DENY(__NR_ipc, EACCES),
  167. #endif
  168. #ifdef __NR_statx
  169. SC_DENY(__NR_statx, EACCES),
  170. #endif
  171. /* Syscalls to permit */
  172. #ifdef __NR_brk
  173. SC_ALLOW(__NR_brk),
  174. #endif
  175. #ifdef __NR_clock_gettime
  176. SC_ALLOW(__NR_clock_gettime),
  177. #endif
  178. #ifdef __NR_clock_gettime64
  179. SC_ALLOW(__NR_clock_gettime64),
  180. #endif
  181. #ifdef __NR_close
  182. SC_ALLOW(__NR_close),
  183. #endif
  184. #ifdef __NR_exit
  185. SC_ALLOW(__NR_exit),
  186. #endif
  187. #ifdef __NR_exit_group
  188. SC_ALLOW(__NR_exit_group),
  189. #endif
  190. #if defined(__NR_flock) && defined(__s390__)
  191. SC_ALLOW(__NR_flock),
  192. #endif
  193. #ifdef __NR_futex
  194. SC_ALLOW(__NR_futex),
  195. #endif
  196. #ifdef __NR_futex_time64
  197. SC_ALLOW(__NR_futex_time64),
  198. #endif
  199. #ifdef __NR_geteuid
  200. SC_ALLOW(__NR_geteuid),
  201. #endif
  202. #ifdef __NR_geteuid32
  203. SC_ALLOW(__NR_geteuid32),
  204. #endif
  205. #ifdef __NR_getpeername /* not defined on archs that go via socketcall(2) */
  206. SC_ALLOW(__NR_getpeername),
  207. #endif
  208. #ifdef __NR_getpgid
  209. SC_ALLOW(__NR_getpgid),
  210. #endif
  211. #ifdef __NR_getpid
  212. SC_ALLOW(__NR_getpid),
  213. #endif
  214. #ifdef __NR_getuid
  215. SC_ALLOW(__NR_getuid),
  216. #endif
  217. #ifdef __NR_getuid32
  218. SC_ALLOW(__NR_getuid32),
  219. #endif
  220. #ifdef __NR_geteuid
  221. SC_ALLOW(__NR_geteuid),
  222. #endif
  223. #ifdef __NR_geteuid32
  224. SC_ALLOW(__NR_geteuid32),
  225. #endif
  226. #ifdef __NR_gettid
  227. SC_ALLOW(__NR_gettid),
  228. #endif
  229. #ifdef __NR_getrandom
  230. SC_ALLOW(__NR_getrandom),
  231. #endif
  232. #ifdef __NR_gettimeofday
  233. SC_ALLOW(__NR_gettimeofday),
  234. #endif
  235. #if defined(__NR_ipc) && defined(__s390__)
  236. SC_ALLOW(__NR_ipc),
  237. #endif
  238. #ifdef __NR_getuid
  239. SC_ALLOW(__NR_getuid),
  240. #endif
  241. #ifdef __NR_getuid32
  242. SC_ALLOW(__NR_getuid32),
  243. #endif
  244. #ifdef __NR_madvise
  245. SC_ALLOW(__NR_madvise),
  246. #endif
  247. #ifdef __NR_mmap
  248. SC_ALLOW_ARG_MASK(__NR_mmap, 2, PROT_READ|PROT_WRITE|PROT_NONE),
  249. #endif
  250. #ifdef __NR_mmap2
  251. SC_ALLOW_ARG_MASK(__NR_mmap2, 2, PROT_READ|PROT_WRITE|PROT_NONE),
  252. #endif
  253. #ifdef __NR_mprotect
  254. SC_ALLOW_ARG_MASK(__NR_mprotect, 2, PROT_READ|PROT_WRITE|PROT_NONE),
  255. #endif
  256. #ifdef __NR_mremap
  257. SC_ALLOW(__NR_mremap),
  258. #endif
  259. #ifdef __NR_munmap
  260. SC_ALLOW(__NR_munmap),
  261. #endif
  262. #ifdef __NR_nanosleep
  263. SC_ALLOW(__NR_nanosleep),
  264. #endif
  265. #ifdef __NR_clock_nanosleep
  266. SC_ALLOW(__NR_clock_nanosleep),
  267. #endif
  268. #ifdef __NR_clock_nanosleep_time64
  269. SC_ALLOW(__NR_clock_nanosleep_time64),
  270. #endif
  271. #ifdef __NR_clock_gettime64
  272. SC_ALLOW(__NR_clock_gettime64),
  273. #endif
  274. #ifdef __NR__newselect
  275. SC_ALLOW(__NR__newselect),
  276. #endif
  277. #ifdef __NR_poll
  278. SC_ALLOW(__NR_poll),
  279. #endif
  280. #ifdef __NR_pselect6
  281. SC_ALLOW(__NR_pselect6),
  282. #endif
  283. #ifdef __NR_pselect6_time64
  284. SC_ALLOW(__NR_pselect6_time64),
  285. #endif
  286. #ifdef __NR_read
  287. SC_ALLOW(__NR_read),
  288. #endif
  289. #ifdef __NR_rt_sigprocmask
  290. SC_ALLOW(__NR_rt_sigprocmask),
  291. #endif
  292. #ifdef __NR_select
  293. SC_ALLOW(__NR_select),
  294. #endif
  295. #ifdef __NR_shutdown
  296. SC_ALLOW(__NR_shutdown),
  297. #endif
  298. #ifdef __NR_sigprocmask
  299. SC_ALLOW(__NR_sigprocmask),
  300. #endif
  301. #ifdef __NR_socketcall
  302. SC_ALLOW(__NR_socketcall),
  303. #endif
  304. #ifdef __NR_time
  305. SC_ALLOW(__NR_time),
  306. #endif
  307. #ifdef __NR_write
  308. SC_ALLOW(__NR_write),
  309. #endif
  310. #ifdef __NR_socketcall
  311. SC_ALLOW_ARG(__NR_socketcall, 0, SYS_SHUTDOWN),
  312. SC_DENY(__NR_socketcall, EACCES),
  313. #endif
  314. #if defined(__NR_ioctl) && defined(__s390__)
  315. /* Allow ioctls for ICA crypto card on s390 */
  316. SC_ALLOW_ARG(__NR_ioctl, 1, Z90STAT_STATUS_MASK),
  317. SC_ALLOW_ARG(__NR_ioctl, 1, ICARSAMODEXPO),
  318. SC_ALLOW_ARG(__NR_ioctl, 1, ICARSACRT),
  319. SC_ALLOW_ARG(__NR_ioctl, 1, ZSECSENDCPRB),
  320. /* Allow ioctls for EP11 crypto card on s390 */
  321. SC_ALLOW_ARG(__NR_ioctl, 1, ZSENDEP11CPRB),
  322. #endif
  323. #if defined(__x86_64__) && defined(__ILP32__) && defined(__X32_SYSCALL_BIT)
  324. /*
  325. * On Linux x32, the clock_gettime VDSO falls back to the
  326. * x86-64 syscall under some circumstances, e.g.
  327. * https://bugs.debian.org/849923
  328. */
  329. SC_ALLOW(__NR_clock_gettime & ~__X32_SYSCALL_BIT),
  330. #endif
  331. /* Default deny */
  332. BPF_STMT(BPF_RET+BPF_K, SECCOMP_FILTER_FAIL),
  333. };
  334. static const struct sock_fprog preauth_program = {
  335. .len = (unsigned short)(sizeof(preauth_insns)/sizeof(preauth_insns[0])),
  336. .filter = (struct sock_filter *)preauth_insns,
  337. };
  338. struct ssh_sandbox {
  339. pid_t child_pid;
  340. };
  341. struct ssh_sandbox *
  342. ssh_sandbox_init(struct monitor *monitor)
  343. {
  344. struct ssh_sandbox *box;
  345. /*
  346. * Strictly, we don't need to maintain any state here but we need
  347. * to return non-NULL to satisfy the API.
  348. */
  349. debug3("%s: preparing seccomp filter sandbox", __func__);
  350. box = xcalloc(1, sizeof(*box));
  351. box->child_pid = 0;
  352. return box;
  353. }
  354. #ifdef SANDBOX_SECCOMP_FILTER_DEBUG
  355. extern struct monitor *pmonitor;
  356. void mm_log_handler(LogLevel level, const char *msg, void *ctx);
  357. static void
  358. ssh_sandbox_violation(int signum, siginfo_t *info, void *void_context)
  359. {
  360. char msg[256];
  361. snprintf(msg, sizeof(msg),
  362. "%s: unexpected system call (arch:0x%x,syscall:%d @ %p)",
  363. __func__, info->si_arch, info->si_syscall, info->si_call_addr);
  364. mm_log_handler(SYSLOG_LEVEL_FATAL, msg, pmonitor);
  365. _exit(1);
  366. }
  367. static void
  368. ssh_sandbox_child_debugging(void)
  369. {
  370. struct sigaction act;
  371. sigset_t mask;
  372. debug3("%s: installing SIGSYS handler", __func__);
  373. memset(&act, 0, sizeof(act));
  374. sigemptyset(&mask);
  375. sigaddset(&mask, SIGSYS);
  376. act.sa_sigaction = &ssh_sandbox_violation;
  377. act.sa_flags = SA_SIGINFO;
  378. if (sigaction(SIGSYS, &act, NULL) == -1)
  379. fatal("%s: sigaction(SIGSYS): %s", __func__, strerror(errno));
  380. if (sigprocmask(SIG_UNBLOCK, &mask, NULL) == -1)
  381. fatal("%s: sigprocmask(SIGSYS): %s",
  382. __func__, strerror(errno));
  383. }
  384. #endif /* SANDBOX_SECCOMP_FILTER_DEBUG */
  385. void
  386. ssh_sandbox_child(struct ssh_sandbox *box)
  387. {
  388. struct rlimit rl_zero;
  389. int nnp_failed = 0;
  390. /* Set rlimits for completeness if possible. */
  391. rl_zero.rlim_cur = rl_zero.rlim_max = 0;
  392. if (setrlimit(RLIMIT_FSIZE, &rl_zero) == -1)
  393. fatal("%s: setrlimit(RLIMIT_FSIZE, { 0, 0 }): %s",
  394. __func__, strerror(errno));
  395. if (setrlimit(RLIMIT_NOFILE, &rl_zero) == -1)
  396. fatal("%s: setrlimit(RLIMIT_NOFILE, { 0, 0 }): %s",
  397. __func__, strerror(errno));
  398. if (setrlimit(RLIMIT_NPROC, &rl_zero) == -1)
  399. fatal("%s: setrlimit(RLIMIT_NPROC, { 0, 0 }): %s",
  400. __func__, strerror(errno));
  401. #ifdef SANDBOX_SECCOMP_FILTER_DEBUG
  402. ssh_sandbox_child_debugging();
  403. #endif /* SANDBOX_SECCOMP_FILTER_DEBUG */
  404. debug3("%s: setting PR_SET_NO_NEW_PRIVS", __func__);
  405. if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) == -1) {
  406. debug("%s: prctl(PR_SET_NO_NEW_PRIVS): %s",
  407. __func__, strerror(errno));
  408. nnp_failed = 1;
  409. }
  410. debug3("%s: attaching seccomp filter program", __func__);
  411. if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &preauth_program) == -1)
  412. debug("%s: prctl(PR_SET_SECCOMP): %s",
  413. __func__, strerror(errno));
  414. else if (nnp_failed)
  415. fatal("%s: SECCOMP_MODE_FILTER activated but "
  416. "PR_SET_NO_NEW_PRIVS failed", __func__);
  417. }
  418. void
  419. ssh_sandbox_parent_finish(struct ssh_sandbox *box)
  420. {
  421. free(box);
  422. debug3("%s: finished", __func__);
  423. }
  424. void
  425. ssh_sandbox_parent_preauth(struct ssh_sandbox *box, pid_t child_pid)
  426. {
  427. box->child_pid = child_pid;
  428. }
  429. #endif /* SANDBOX_SECCOMP_FILTER */