bpf_verifier.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435
  1. /* SPDX-License-Identifier: GPL-2.0-only */
  2. /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
  3. */
  4. #ifndef _LINUX_BPF_VERIFIER_H
  5. #define _LINUX_BPF_VERIFIER_H 1
  6. #include <linux/bpf.h> /* for enum bpf_reg_type */
  7. #include <linux/filter.h> /* for MAX_BPF_STACK */
  8. #include <linux/tnum.h>
  9. /* Maximum variable offset umax_value permitted when resolving memory accesses.
  10. * In practice this is far bigger than any realistic pointer offset; this limit
  11. * ensures that umax_value + (int)off + (int)size cannot overflow a u64.
  12. */
  13. #define BPF_MAX_VAR_OFF (1 << 29)
  14. /* Maximum variable size permitted for ARG_CONST_SIZE[_OR_ZERO]. This ensures
  15. * that converting umax_value to int cannot overflow.
  16. */
  17. #define BPF_MAX_VAR_SIZ (1 << 29)
  18. /* Liveness marks, used for registers and spilled-regs (in stack slots).
  19. * Read marks propagate upwards until they find a write mark; they record that
  20. * "one of this state's descendants read this reg" (and therefore the reg is
  21. * relevant for states_equal() checks).
  22. * Write marks collect downwards and do not propagate; they record that "the
  23. * straight-line code that reached this state (from its parent) wrote this reg"
  24. * (and therefore that reads propagated from this state or its descendants
  25. * should not propagate to its parent).
  26. * A state with a write mark can receive read marks; it just won't propagate
  27. * them to its parent, since the write mark is a property, not of the state,
  28. * but of the link between it and its parent. See mark_reg_read() and
  29. * mark_stack_slot_read() in kernel/bpf/verifier.c.
  30. */
  31. enum bpf_reg_liveness {
  32. REG_LIVE_NONE = 0, /* reg hasn't been read or written this branch */
  33. REG_LIVE_READ32 = 0x1, /* reg was read, so we're sensitive to initial value */
  34. REG_LIVE_READ64 = 0x2, /* likewise, but full 64-bit content matters */
  35. REG_LIVE_READ = REG_LIVE_READ32 | REG_LIVE_READ64,
  36. REG_LIVE_WRITTEN = 0x4, /* reg was written first, screening off later reads */
  37. REG_LIVE_DONE = 0x8, /* liveness won't be updating this register anymore */
  38. };
  39. struct bpf_reg_state {
  40. /* Ordering of fields matters. See states_equal() */
  41. enum bpf_reg_type type;
  42. union {
  43. /* valid when type == PTR_TO_PACKET */
  44. u16 range;
  45. /* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE |
  46. * PTR_TO_MAP_VALUE_OR_NULL
  47. */
  48. struct bpf_map *map_ptr;
  49. /* Max size from any of the above. */
  50. unsigned long raw;
  51. };
  52. /* Fixed part of pointer offset, pointer types only */
  53. s32 off;
  54. /* For PTR_TO_PACKET, used to find other pointers with the same variable
  55. * offset, so they can share range knowledge.
  56. * For PTR_TO_MAP_VALUE_OR_NULL this is used to share which map value we
  57. * came from, when one is tested for != NULL.
  58. * For PTR_TO_SOCKET this is used to share which pointers retain the
  59. * same reference to the socket, to determine proper reference freeing.
  60. */
  61. u32 id;
  62. /* PTR_TO_SOCKET and PTR_TO_TCP_SOCK could be a ptr returned
  63. * from a pointer-cast helper, bpf_sk_fullsock() and
  64. * bpf_tcp_sock().
  65. *
  66. * Consider the following where "sk" is a reference counted
  67. * pointer returned from "sk = bpf_sk_lookup_tcp();":
  68. *
  69. * 1: sk = bpf_sk_lookup_tcp();
  70. * 2: if (!sk) { return 0; }
  71. * 3: fullsock = bpf_sk_fullsock(sk);
  72. * 4: if (!fullsock) { bpf_sk_release(sk); return 0; }
  73. * 5: tp = bpf_tcp_sock(fullsock);
  74. * 6: if (!tp) { bpf_sk_release(sk); return 0; }
  75. * 7: bpf_sk_release(sk);
  76. * 8: snd_cwnd = tp->snd_cwnd; // verifier will complain
  77. *
  78. * After bpf_sk_release(sk) at line 7, both "fullsock" ptr and
  79. * "tp" ptr should be invalidated also. In order to do that,
  80. * the reg holding "fullsock" and "sk" need to remember
  81. * the original refcounted ptr id (i.e. sk_reg->id) in ref_obj_id
  82. * such that the verifier can reset all regs which have
  83. * ref_obj_id matching the sk_reg->id.
  84. *
  85. * sk_reg->ref_obj_id is set to sk_reg->id at line 1.
  86. * sk_reg->id will stay as NULL-marking purpose only.
  87. * After NULL-marking is done, sk_reg->id can be reset to 0.
  88. *
  89. * After "fullsock = bpf_sk_fullsock(sk);" at line 3,
  90. * fullsock_reg->ref_obj_id is set to sk_reg->ref_obj_id.
  91. *
  92. * After "tp = bpf_tcp_sock(fullsock);" at line 5,
  93. * tp_reg->ref_obj_id is set to fullsock_reg->ref_obj_id
  94. * which is the same as sk_reg->ref_obj_id.
  95. *
  96. * From the verifier perspective, if sk, fullsock and tp
  97. * are not NULL, they are the same ptr with different
  98. * reg->type. In particular, bpf_sk_release(tp) is also
  99. * allowed and has the same effect as bpf_sk_release(sk).
  100. */
  101. u32 ref_obj_id;
  102. /* For scalar types (SCALAR_VALUE), this represents our knowledge of
  103. * the actual value.
  104. * For pointer types, this represents the variable part of the offset
  105. * from the pointed-to object, and is shared with all bpf_reg_states
  106. * with the same id as us.
  107. */
  108. struct tnum var_off;
  109. /* Used to determine if any memory access using this register will
  110. * result in a bad access.
  111. * These refer to the same value as var_off, not necessarily the actual
  112. * contents of the register.
  113. */
  114. s64 smin_value; /* minimum possible (s64)value */
  115. s64 smax_value; /* maximum possible (s64)value */
  116. u64 umin_value; /* minimum possible (u64)value */
  117. u64 umax_value; /* maximum possible (u64)value */
  118. /* parentage chain for liveness checking */
  119. struct bpf_reg_state *parent;
  120. /* Inside the callee two registers can be both PTR_TO_STACK like
  121. * R1=fp-8 and R2=fp-8, but one of them points to this function stack
  122. * while another to the caller's stack. To differentiate them 'frameno'
  123. * is used which is an index in bpf_verifier_state->frame[] array
  124. * pointing to bpf_func_state.
  125. */
  126. u32 frameno;
  127. /* Tracks subreg definition. The stored value is the insn_idx of the
  128. * writing insn. This is safe because subreg_def is used before any insn
  129. * patching which only happens after main verification finished.
  130. */
  131. s32 subreg_def;
  132. enum bpf_reg_liveness live;
  133. /* if (!precise && SCALAR_VALUE) min/max/tnum don't affect safety */
  134. bool precise;
  135. };
  136. enum bpf_stack_slot_type {
  137. STACK_INVALID, /* nothing was stored in this stack slot */
  138. STACK_SPILL, /* register spilled into stack */
  139. STACK_MISC, /* BPF program wrote some data into this slot */
  140. STACK_ZERO, /* BPF program wrote constant zero */
  141. };
  142. #define BPF_REG_SIZE 8 /* size of eBPF register in bytes */
  143. struct bpf_stack_state {
  144. struct bpf_reg_state spilled_ptr;
  145. u8 slot_type[BPF_REG_SIZE];
  146. };
  147. struct bpf_reference_state {
  148. /* Track each reference created with a unique id, even if the same
  149. * instruction creates the reference multiple times (eg, via CALL).
  150. */
  151. int id;
  152. /* Instruction where the allocation of this reference occurred. This
  153. * is used purely to inform the user of a reference leak.
  154. */
  155. int insn_idx;
  156. };
  157. /* state of the program:
  158. * type of all registers and stack info
  159. */
  160. struct bpf_func_state {
  161. struct bpf_reg_state regs[MAX_BPF_REG];
  162. /* index of call instruction that called into this func */
  163. int callsite;
  164. /* stack frame number of this function state from pov of
  165. * enclosing bpf_verifier_state.
  166. * 0 = main function, 1 = first callee.
  167. */
  168. u32 frameno;
  169. /* subprog number == index within subprog_stack_depth
  170. * zero == main subprog
  171. */
  172. u32 subprogno;
  173. /* The following fields should be last. See copy_func_state() */
  174. int acquired_refs;
  175. struct bpf_reference_state *refs;
  176. int allocated_stack;
  177. struct bpf_stack_state *stack;
  178. };
  179. struct bpf_idx_pair {
  180. u32 prev_idx;
  181. u32 idx;
  182. };
  183. struct bpf_id_pair {
  184. u32 old;
  185. u32 cur;
  186. };
  187. /* Maximum number of register states that can exist at once */
  188. #define BPF_ID_MAP_SIZE (MAX_BPF_REG + MAX_BPF_STACK / BPF_REG_SIZE)
  189. #define MAX_CALL_FRAMES 8
  190. struct bpf_verifier_state {
  191. /* call stack tracking */
  192. struct bpf_func_state *frame[MAX_CALL_FRAMES];
  193. struct bpf_verifier_state *parent;
  194. /*
  195. * 'branches' field is the number of branches left to explore:
  196. * 0 - all possible paths from this state reached bpf_exit or
  197. * were safely pruned
  198. * 1 - at least one path is being explored.
  199. * This state hasn't reached bpf_exit
  200. * 2 - at least two paths are being explored.
  201. * This state is an immediate parent of two children.
  202. * One is fallthrough branch with branches==1 and another
  203. * state is pushed into stack (to be explored later) also with
  204. * branches==1. The parent of this state has branches==1.
  205. * The verifier state tree connected via 'parent' pointer looks like:
  206. * 1
  207. * 1
  208. * 2 -> 1 (first 'if' pushed into stack)
  209. * 1
  210. * 2 -> 1 (second 'if' pushed into stack)
  211. * 1
  212. * 1
  213. * 1 bpf_exit.
  214. *
  215. * Once do_check() reaches bpf_exit, it calls update_branch_counts()
  216. * and the verifier state tree will look:
  217. * 1
  218. * 1
  219. * 2 -> 1 (first 'if' pushed into stack)
  220. * 1
  221. * 1 -> 1 (second 'if' pushed into stack)
  222. * 0
  223. * 0
  224. * 0 bpf_exit.
  225. * After pop_stack() the do_check() will resume at second 'if'.
  226. *
  227. * If is_state_visited() sees a state with branches > 0 it means
  228. * there is a loop. If such state is exactly equal to the current state
  229. * it's an infinite loop. Note states_equal() checks for states
  230. * equvalency, so two states being 'states_equal' does not mean
  231. * infinite loop. The exact comparison is provided by
  232. * states_maybe_looping() function. It's a stronger pre-check and
  233. * much faster than states_equal().
  234. *
  235. * This algorithm may not find all possible infinite loops or
  236. * loop iteration count may be too high.
  237. * In such cases BPF_COMPLEXITY_LIMIT_INSNS limit kicks in.
  238. */
  239. u32 branches;
  240. u32 insn_idx;
  241. u32 curframe;
  242. u32 active_spin_lock;
  243. bool speculative;
  244. /* first and last insn idx of this verifier state */
  245. u32 first_insn_idx;
  246. u32 last_insn_idx;
  247. /* jmp history recorded from first to last.
  248. * backtracking is using it to go from last to first.
  249. * For most states jmp_history_cnt is [0-3].
  250. * For loops can go up to ~40.
  251. */
  252. struct bpf_idx_pair *jmp_history;
  253. u32 jmp_history_cnt;
  254. };
  255. #define bpf_get_spilled_reg(slot, frame) \
  256. (((slot < frame->allocated_stack / BPF_REG_SIZE) && \
  257. (frame->stack[slot].slot_type[0] == STACK_SPILL)) \
  258. ? &frame->stack[slot].spilled_ptr : NULL)
  259. /* Iterate over 'frame', setting 'reg' to either NULL or a spilled register. */
  260. #define bpf_for_each_spilled_reg(iter, frame, reg) \
  261. for (iter = 0, reg = bpf_get_spilled_reg(iter, frame); \
  262. iter < frame->allocated_stack / BPF_REG_SIZE; \
  263. iter++, reg = bpf_get_spilled_reg(iter, frame))
  264. /* linked list of verifier states used to prune search */
  265. struct bpf_verifier_state_list {
  266. struct bpf_verifier_state state;
  267. struct bpf_verifier_state_list *next;
  268. int miss_cnt, hit_cnt;
  269. };
  270. /* Possible states for alu_state member. */
  271. #define BPF_ALU_SANITIZE_SRC (1U << 0)
  272. #define BPF_ALU_SANITIZE_DST (1U << 1)
  273. #define BPF_ALU_NEG_VALUE (1U << 2)
  274. #define BPF_ALU_NON_POINTER (1U << 3)
  275. #define BPF_ALU_IMMEDIATE (1U << 4)
  276. #define BPF_ALU_SANITIZE (BPF_ALU_SANITIZE_SRC | \
  277. BPF_ALU_SANITIZE_DST)
  278. struct bpf_insn_aux_data {
  279. union {
  280. enum bpf_reg_type ptr_type; /* pointer type for load/store insns */
  281. unsigned long map_state; /* pointer/poison value for maps */
  282. s32 call_imm; /* saved imm field of call insn */
  283. u32 alu_limit; /* limit for add/sub register with pointer */
  284. struct {
  285. u32 map_index; /* index into used_maps[] */
  286. u32 map_off; /* offset from value base address */
  287. };
  288. };
  289. int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
  290. bool seen; /* this insn was processed by the verifier */
  291. bool sanitize_stack_spill; /* subject to Spectre v4 sanitation */
  292. bool zext_dst; /* this insn zero extends dst reg */
  293. u8 alu_state; /* used in combination with alu_limit */
  294. bool prune_point;
  295. unsigned int orig_idx; /* original instruction index */
  296. };
  297. #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
  298. #define BPF_VERIFIER_TMP_LOG_SIZE 1024
  299. struct bpf_verifier_log {
  300. u32 level;
  301. char kbuf[BPF_VERIFIER_TMP_LOG_SIZE];
  302. char __user *ubuf;
  303. u32 len_used;
  304. u32 len_total;
  305. };
  306. static inline bool bpf_verifier_log_full(const struct bpf_verifier_log *log)
  307. {
  308. return log->len_used >= log->len_total - 1;
  309. }
  310. #define BPF_LOG_LEVEL1 1
  311. #define BPF_LOG_LEVEL2 2
  312. #define BPF_LOG_STATS 4
  313. #define BPF_LOG_LEVEL (BPF_LOG_LEVEL1 | BPF_LOG_LEVEL2)
  314. #define BPF_LOG_MASK (BPF_LOG_LEVEL | BPF_LOG_STATS)
  315. static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log)
  316. {
  317. return log->level && log->ubuf && !bpf_verifier_log_full(log);
  318. }
  319. #define BPF_MAX_SUBPROGS 256
  320. struct bpf_subprog_info {
  321. u32 start; /* insn idx of function entry point */
  322. u32 linfo_idx; /* The idx to the main_prog->aux->linfo */
  323. u16 stack_depth; /* max. stack depth used by this function */
  324. bool has_tail_call;
  325. };
  326. /* single container for all structs
  327. * one verifier_env per bpf_check() call
  328. */
  329. struct bpf_verifier_env {
  330. u32 insn_idx;
  331. u32 prev_insn_idx;
  332. struct bpf_prog *prog; /* eBPF program being verified */
  333. const struct bpf_verifier_ops *ops;
  334. struct bpf_verifier_stack_elem *head; /* stack of verifier states to be processed */
  335. int stack_size; /* number of states to be processed */
  336. bool strict_alignment; /* perform strict pointer alignment checks */
  337. bool test_state_freq; /* test verifier with different pruning frequency */
  338. struct bpf_verifier_state *cur_state; /* current verifier state */
  339. struct bpf_verifier_state_list **explored_states; /* search pruning optimization */
  340. struct bpf_verifier_state_list *free_list;
  341. struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
  342. u32 used_map_cnt; /* number of used maps */
  343. u32 id_gen; /* used to generate unique reg IDs */
  344. bool explore_alu_limits;
  345. bool allow_ptr_leaks;
  346. bool seen_direct_write;
  347. struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
  348. const struct bpf_line_info *prev_linfo;
  349. struct bpf_verifier_log log;
  350. struct bpf_subprog_info subprog_info[BPF_MAX_SUBPROGS + 1];
  351. struct bpf_id_pair idmap_scratch[BPF_ID_MAP_SIZE];
  352. struct {
  353. int *insn_state;
  354. int *insn_stack;
  355. int cur_stack;
  356. } cfg;
  357. u32 subprog_cnt;
  358. /* number of instructions analyzed by the verifier */
  359. u32 prev_insn_processed, insn_processed;
  360. /* number of jmps, calls, exits analyzed so far */
  361. u32 prev_jmps_processed, jmps_processed;
  362. /* total verification time */
  363. u64 verification_time;
  364. /* maximum number of verifier states kept in 'branching' instructions */
  365. u32 max_states_per_insn;
  366. /* total number of allocated verifier states */
  367. u32 total_states;
  368. /* some states are freed during program analysis.
  369. * this is peak number of states. this number dominates kernel
  370. * memory consumption during verification
  371. */
  372. u32 peak_states;
  373. /* longest register parentage chain walked for liveness marking */
  374. u32 longest_mark_read_walk;
  375. };
  376. __printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log,
  377. const char *fmt, va_list args);
  378. __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env,
  379. const char *fmt, ...);
  380. static inline struct bpf_func_state *cur_func(struct bpf_verifier_env *env)
  381. {
  382. struct bpf_verifier_state *cur = env->cur_state;
  383. return cur->frame[cur->curframe];
  384. }
  385. static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env)
  386. {
  387. return cur_func(env)->regs;
  388. }
  389. int bpf_prog_offload_verifier_prep(struct bpf_prog *prog);
  390. int bpf_prog_offload_verify_insn(struct bpf_verifier_env *env,
  391. int insn_idx, int prev_insn_idx);
  392. int bpf_prog_offload_finalize(struct bpf_verifier_env *env);
  393. void
  394. bpf_prog_offload_replace_insn(struct bpf_verifier_env *env, u32 off,
  395. struct bpf_insn *insn);
  396. void
  397. bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt);
  398. #endif /* _LINUX_BPF_VERIFIER_H */