sockex3_kern.c 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291
  1. /* Copyright (c) 2015 PLUMgrid, http://plumgrid.com
  2. *
  3. * This program is free software; you can redistribute it and/or
  4. * modify it under the terms of version 2 of the GNU General Public
  5. * License as published by the Free Software Foundation.
  6. */
  7. #include <uapi/linux/bpf.h>
  8. #include "bpf_helpers.h"
  9. #include <uapi/linux/in.h>
  10. #include <uapi/linux/if.h>
  11. #include <uapi/linux/if_ether.h>
  12. #include <uapi/linux/ip.h>
  13. #include <uapi/linux/ipv6.h>
  14. #include <uapi/linux/if_tunnel.h>
  15. #include <uapi/linux/mpls.h>
  16. #define IP_MF 0x2000
  17. #define IP_OFFSET 0x1FFF
  18. #define PROG(F) SEC("socket/"__stringify(F)) int bpf_func_##F
  19. struct bpf_map_def SEC("maps") jmp_table = {
  20. .type = BPF_MAP_TYPE_PROG_ARRAY,
  21. .key_size = sizeof(u32),
  22. .value_size = sizeof(u32),
  23. .max_entries = 8,
  24. };
  25. #define PARSE_VLAN 1
  26. #define PARSE_MPLS 2
  27. #define PARSE_IP 3
  28. #define PARSE_IPV6 4
  29. /* protocol dispatch routine.
  30. * It tail-calls next BPF program depending on eth proto
  31. * Note, we could have used:
  32. * bpf_tail_call(skb, &jmp_table, proto);
  33. * but it would need large prog_array
  34. */
  35. static inline void parse_eth_proto(struct __sk_buff *skb, u32 proto)
  36. {
  37. switch (proto) {
  38. case ETH_P_8021Q:
  39. case ETH_P_8021AD:
  40. bpf_tail_call(skb, &jmp_table, PARSE_VLAN);
  41. break;
  42. case ETH_P_MPLS_UC:
  43. case ETH_P_MPLS_MC:
  44. bpf_tail_call(skb, &jmp_table, PARSE_MPLS);
  45. break;
  46. case ETH_P_IP:
  47. bpf_tail_call(skb, &jmp_table, PARSE_IP);
  48. break;
  49. case ETH_P_IPV6:
  50. bpf_tail_call(skb, &jmp_table, PARSE_IPV6);
  51. break;
  52. }
  53. }
  54. struct vlan_hdr {
  55. __be16 h_vlan_TCI;
  56. __be16 h_vlan_encapsulated_proto;
  57. };
  58. struct flow_keys {
  59. __be32 src;
  60. __be32 dst;
  61. union {
  62. __be32 ports;
  63. __be16 port16[2];
  64. };
  65. __u32 ip_proto;
  66. };
  67. static inline int ip_is_fragment(struct __sk_buff *ctx, __u64 nhoff)
  68. {
  69. return load_half(ctx, nhoff + offsetof(struct iphdr, frag_off))
  70. & (IP_MF | IP_OFFSET);
  71. }
  72. static inline __u32 ipv6_addr_hash(struct __sk_buff *ctx, __u64 off)
  73. {
  74. __u64 w0 = load_word(ctx, off);
  75. __u64 w1 = load_word(ctx, off + 4);
  76. __u64 w2 = load_word(ctx, off + 8);
  77. __u64 w3 = load_word(ctx, off + 12);
  78. return (__u32)(w0 ^ w1 ^ w2 ^ w3);
  79. }
  80. struct globals {
  81. struct flow_keys flow;
  82. };
  83. struct bpf_map_def SEC("maps") percpu_map = {
  84. .type = BPF_MAP_TYPE_ARRAY,
  85. .key_size = sizeof(__u32),
  86. .value_size = sizeof(struct globals),
  87. .max_entries = 32,
  88. };
  89. /* user poor man's per_cpu until native support is ready */
  90. static struct globals *this_cpu_globals(void)
  91. {
  92. u32 key = bpf_get_smp_processor_id();
  93. return bpf_map_lookup_elem(&percpu_map, &key);
  94. }
  95. /* some simple stats for user space consumption */
  96. struct pair {
  97. __u64 packets;
  98. __u64 bytes;
  99. };
  100. struct bpf_map_def SEC("maps") hash_map = {
  101. .type = BPF_MAP_TYPE_HASH,
  102. .key_size = sizeof(struct flow_keys),
  103. .value_size = sizeof(struct pair),
  104. .max_entries = 1024,
  105. };
  106. static void update_stats(struct __sk_buff *skb, struct globals *g)
  107. {
  108. struct flow_keys key = g->flow;
  109. struct pair *value;
  110. value = bpf_map_lookup_elem(&hash_map, &key);
  111. if (value) {
  112. __sync_fetch_and_add(&value->packets, 1);
  113. __sync_fetch_and_add(&value->bytes, skb->len);
  114. } else {
  115. struct pair val = {1, skb->len};
  116. bpf_map_update_elem(&hash_map, &key, &val, BPF_ANY);
  117. }
  118. }
  119. static __always_inline void parse_ip_proto(struct __sk_buff *skb,
  120. struct globals *g, __u32 ip_proto)
  121. {
  122. __u32 nhoff = skb->cb[0];
  123. int poff;
  124. switch (ip_proto) {
  125. case IPPROTO_GRE: {
  126. struct gre_hdr {
  127. __be16 flags;
  128. __be16 proto;
  129. };
  130. __u32 gre_flags = load_half(skb,
  131. nhoff + offsetof(struct gre_hdr, flags));
  132. __u32 gre_proto = load_half(skb,
  133. nhoff + offsetof(struct gre_hdr, proto));
  134. if (gre_flags & (GRE_VERSION|GRE_ROUTING))
  135. break;
  136. nhoff += 4;
  137. if (gre_flags & GRE_CSUM)
  138. nhoff += 4;
  139. if (gre_flags & GRE_KEY)
  140. nhoff += 4;
  141. if (gre_flags & GRE_SEQ)
  142. nhoff += 4;
  143. skb->cb[0] = nhoff;
  144. parse_eth_proto(skb, gre_proto);
  145. break;
  146. }
  147. case IPPROTO_IPIP:
  148. parse_eth_proto(skb, ETH_P_IP);
  149. break;
  150. case IPPROTO_IPV6:
  151. parse_eth_proto(skb, ETH_P_IPV6);
  152. break;
  153. case IPPROTO_TCP:
  154. case IPPROTO_UDP:
  155. g->flow.ports = load_word(skb, nhoff);
  156. case IPPROTO_ICMP:
  157. g->flow.ip_proto = ip_proto;
  158. update_stats(skb, g);
  159. break;
  160. default:
  161. break;
  162. }
  163. }
  164. PROG(PARSE_IP)(struct __sk_buff *skb)
  165. {
  166. struct globals *g = this_cpu_globals();
  167. __u32 nhoff, verlen, ip_proto;
  168. if (!g)
  169. return 0;
  170. nhoff = skb->cb[0];
  171. if (unlikely(ip_is_fragment(skb, nhoff)))
  172. return 0;
  173. ip_proto = load_byte(skb, nhoff + offsetof(struct iphdr, protocol));
  174. if (ip_proto != IPPROTO_GRE) {
  175. g->flow.src = load_word(skb, nhoff + offsetof(struct iphdr, saddr));
  176. g->flow.dst = load_word(skb, nhoff + offsetof(struct iphdr, daddr));
  177. }
  178. verlen = load_byte(skb, nhoff + 0/*offsetof(struct iphdr, ihl)*/);
  179. nhoff += (verlen & 0xF) << 2;
  180. skb->cb[0] = nhoff;
  181. parse_ip_proto(skb, g, ip_proto);
  182. return 0;
  183. }
  184. PROG(PARSE_IPV6)(struct __sk_buff *skb)
  185. {
  186. struct globals *g = this_cpu_globals();
  187. __u32 nhoff, ip_proto;
  188. if (!g)
  189. return 0;
  190. nhoff = skb->cb[0];
  191. ip_proto = load_byte(skb,
  192. nhoff + offsetof(struct ipv6hdr, nexthdr));
  193. g->flow.src = ipv6_addr_hash(skb,
  194. nhoff + offsetof(struct ipv6hdr, saddr));
  195. g->flow.dst = ipv6_addr_hash(skb,
  196. nhoff + offsetof(struct ipv6hdr, daddr));
  197. nhoff += sizeof(struct ipv6hdr);
  198. skb->cb[0] = nhoff;
  199. parse_ip_proto(skb, g, ip_proto);
  200. return 0;
  201. }
  202. PROG(PARSE_VLAN)(struct __sk_buff *skb)
  203. {
  204. __u32 nhoff, proto;
  205. nhoff = skb->cb[0];
  206. proto = load_half(skb, nhoff + offsetof(struct vlan_hdr,
  207. h_vlan_encapsulated_proto));
  208. nhoff += sizeof(struct vlan_hdr);
  209. skb->cb[0] = nhoff;
  210. parse_eth_proto(skb, proto);
  211. return 0;
  212. }
  213. PROG(PARSE_MPLS)(struct __sk_buff *skb)
  214. {
  215. __u32 nhoff, label;
  216. nhoff = skb->cb[0];
  217. label = load_word(skb, nhoff);
  218. nhoff += sizeof(struct mpls_label);
  219. skb->cb[0] = nhoff;
  220. if (label & MPLS_LS_S_MASK) {
  221. __u8 verlen = load_byte(skb, nhoff);
  222. if ((verlen & 0xF0) == 4)
  223. parse_eth_proto(skb, ETH_P_IP);
  224. else
  225. parse_eth_proto(skb, ETH_P_IPV6);
  226. } else {
  227. parse_eth_proto(skb, ETH_P_MPLS_UC);
  228. }
  229. return 0;
  230. }
  231. SEC("socket/0")
  232. int main_prog(struct __sk_buff *skb)
  233. {
  234. __u32 nhoff = ETH_HLEN;
  235. __u32 proto = load_half(skb, 12);
  236. skb->cb[0] = nhoff;
  237. parse_eth_proto(skb, proto);
  238. return 0;
  239. }
  240. char _license[] SEC("license") = "GPL";