local_storage.c 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384
  1. //SPDX-License-Identifier: GPL-2.0
  2. #include <linux/bpf-cgroup.h>
  3. #include <linux/bpf.h>
  4. #include <linux/bug.h>
  5. #include <linux/filter.h>
  6. #include <linux/mm.h>
  7. #include <linux/rbtree.h>
  8. #include <linux/slab.h>
  9. DEFINE_PER_CPU(void*, bpf_cgroup_storage);
  10. #ifdef CONFIG_CGROUP_BPF
  11. #define LOCAL_STORAGE_CREATE_FLAG_MASK \
  12. (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
  13. struct bpf_cgroup_storage_map {
  14. struct bpf_map map;
  15. spinlock_t lock;
  16. struct bpf_prog *prog;
  17. struct rb_root root;
  18. struct list_head list;
  19. };
  20. static struct bpf_cgroup_storage_map *map_to_storage(struct bpf_map *map)
  21. {
  22. return container_of(map, struct bpf_cgroup_storage_map, map);
  23. }
  24. static int bpf_cgroup_storage_key_cmp(
  25. const struct bpf_cgroup_storage_key *key1,
  26. const struct bpf_cgroup_storage_key *key2)
  27. {
  28. if (key1->cgroup_inode_id < key2->cgroup_inode_id)
  29. return -1;
  30. else if (key1->cgroup_inode_id > key2->cgroup_inode_id)
  31. return 1;
  32. else if (key1->attach_type < key2->attach_type)
  33. return -1;
  34. else if (key1->attach_type > key2->attach_type)
  35. return 1;
  36. return 0;
  37. }
  38. static struct bpf_cgroup_storage *cgroup_storage_lookup(
  39. struct bpf_cgroup_storage_map *map, struct bpf_cgroup_storage_key *key,
  40. bool locked)
  41. {
  42. struct rb_root *root = &map->root;
  43. struct rb_node *node;
  44. if (!locked)
  45. spin_lock_bh(&map->lock);
  46. node = root->rb_node;
  47. while (node) {
  48. struct bpf_cgroup_storage *storage;
  49. storage = container_of(node, struct bpf_cgroup_storage, node);
  50. switch (bpf_cgroup_storage_key_cmp(key, &storage->key)) {
  51. case -1:
  52. node = node->rb_left;
  53. break;
  54. case 1:
  55. node = node->rb_right;
  56. break;
  57. default:
  58. if (!locked)
  59. spin_unlock_bh(&map->lock);
  60. return storage;
  61. }
  62. }
  63. if (!locked)
  64. spin_unlock_bh(&map->lock);
  65. return NULL;
  66. }
  67. static int cgroup_storage_insert(struct bpf_cgroup_storage_map *map,
  68. struct bpf_cgroup_storage *storage)
  69. {
  70. struct rb_root *root = &map->root;
  71. struct rb_node **new = &(root->rb_node), *parent = NULL;
  72. while (*new) {
  73. struct bpf_cgroup_storage *this;
  74. this = container_of(*new, struct bpf_cgroup_storage, node);
  75. parent = *new;
  76. switch (bpf_cgroup_storage_key_cmp(&storage->key, &this->key)) {
  77. case -1:
  78. new = &((*new)->rb_left);
  79. break;
  80. case 1:
  81. new = &((*new)->rb_right);
  82. break;
  83. default:
  84. return -EEXIST;
  85. }
  86. }
  87. rb_link_node(&storage->node, parent, new);
  88. rb_insert_color(&storage->node, root);
  89. return 0;
  90. }
  91. static void *cgroup_storage_lookup_elem(struct bpf_map *_map, void *_key)
  92. {
  93. struct bpf_cgroup_storage_map *map = map_to_storage(_map);
  94. struct bpf_cgroup_storage_key *key = _key;
  95. struct bpf_cgroup_storage *storage;
  96. storage = cgroup_storage_lookup(map, key, false);
  97. if (!storage)
  98. return NULL;
  99. return &READ_ONCE(storage->buf)->data[0];
  100. }
  101. static int cgroup_storage_update_elem(struct bpf_map *map, void *_key,
  102. void *value, u64 flags)
  103. {
  104. struct bpf_cgroup_storage_key *key = _key;
  105. struct bpf_cgroup_storage *storage;
  106. struct bpf_storage_buffer *new;
  107. if (flags != BPF_ANY && flags != BPF_EXIST)
  108. return -EINVAL;
  109. storage = cgroup_storage_lookup((struct bpf_cgroup_storage_map *)map,
  110. key, false);
  111. if (!storage)
  112. return -ENOENT;
  113. new = kmalloc_node(sizeof(struct bpf_storage_buffer) +
  114. map->value_size,
  115. __GFP_ZERO | GFP_ATOMIC | __GFP_NOWARN,
  116. map->numa_node);
  117. if (!new)
  118. return -ENOMEM;
  119. memcpy(&new->data[0], value, map->value_size);
  120. new = xchg(&storage->buf, new);
  121. kfree_rcu(new, rcu);
  122. return 0;
  123. }
  124. static int cgroup_storage_get_next_key(struct bpf_map *_map, void *_key,
  125. void *_next_key)
  126. {
  127. struct bpf_cgroup_storage_map *map = map_to_storage(_map);
  128. struct bpf_cgroup_storage_key *key = _key;
  129. struct bpf_cgroup_storage_key *next = _next_key;
  130. struct bpf_cgroup_storage *storage;
  131. spin_lock_bh(&map->lock);
  132. if (list_empty(&map->list))
  133. goto enoent;
  134. if (key) {
  135. storage = cgroup_storage_lookup(map, key, true);
  136. if (!storage)
  137. goto enoent;
  138. storage = list_next_entry(storage, list);
  139. if (!storage)
  140. goto enoent;
  141. } else {
  142. storage = list_first_entry(&map->list,
  143. struct bpf_cgroup_storage, list);
  144. }
  145. spin_unlock_bh(&map->lock);
  146. next->attach_type = storage->key.attach_type;
  147. next->cgroup_inode_id = storage->key.cgroup_inode_id;
  148. return 0;
  149. enoent:
  150. spin_unlock_bh(&map->lock);
  151. return -ENOENT;
  152. }
  153. static struct bpf_map *cgroup_storage_map_alloc(union bpf_attr *attr)
  154. {
  155. int numa_node = bpf_map_attr_numa_node(attr);
  156. struct bpf_cgroup_storage_map *map;
  157. if (attr->key_size != sizeof(struct bpf_cgroup_storage_key))
  158. return ERR_PTR(-EINVAL);
  159. if (attr->value_size == 0)
  160. return ERR_PTR(-EINVAL);
  161. if (attr->value_size > PAGE_SIZE)
  162. return ERR_PTR(-E2BIG);
  163. if (attr->map_flags & ~LOCAL_STORAGE_CREATE_FLAG_MASK)
  164. /* reserved bits should not be used */
  165. return ERR_PTR(-EINVAL);
  166. if (attr->max_entries)
  167. /* max_entries is not used and enforced to be 0 */
  168. return ERR_PTR(-EINVAL);
  169. map = kmalloc_node(sizeof(struct bpf_cgroup_storage_map),
  170. __GFP_ZERO | GFP_USER, numa_node);
  171. if (!map)
  172. return ERR_PTR(-ENOMEM);
  173. map->map.pages = round_up(sizeof(struct bpf_cgroup_storage_map),
  174. PAGE_SIZE) >> PAGE_SHIFT;
  175. /* copy mandatory map attributes */
  176. bpf_map_init_from_attr(&map->map, attr);
  177. spin_lock_init(&map->lock);
  178. map->root = RB_ROOT;
  179. INIT_LIST_HEAD(&map->list);
  180. return &map->map;
  181. }
  182. static void cgroup_storage_map_free(struct bpf_map *_map)
  183. {
  184. struct bpf_cgroup_storage_map *map = map_to_storage(_map);
  185. WARN_ON(!RB_EMPTY_ROOT(&map->root));
  186. WARN_ON(!list_empty(&map->list));
  187. kfree(map);
  188. }
  189. static int cgroup_storage_delete_elem(struct bpf_map *map, void *key)
  190. {
  191. return -EINVAL;
  192. }
  193. const struct bpf_map_ops cgroup_storage_map_ops = {
  194. .map_alloc = cgroup_storage_map_alloc,
  195. .map_free = cgroup_storage_map_free,
  196. .map_get_next_key = cgroup_storage_get_next_key,
  197. .map_lookup_elem = cgroup_storage_lookup_elem,
  198. .map_update_elem = cgroup_storage_update_elem,
  199. .map_delete_elem = cgroup_storage_delete_elem,
  200. .map_check_btf = map_check_no_btf,
  201. };
  202. int bpf_cgroup_storage_assign(struct bpf_prog *prog, struct bpf_map *_map)
  203. {
  204. struct bpf_cgroup_storage_map *map = map_to_storage(_map);
  205. int ret = -EBUSY;
  206. spin_lock_bh(&map->lock);
  207. if (map->prog && map->prog != prog)
  208. goto unlock;
  209. if (prog->aux->cgroup_storage && prog->aux->cgroup_storage != _map)
  210. goto unlock;
  211. map->prog = prog;
  212. prog->aux->cgroup_storage = _map;
  213. ret = 0;
  214. unlock:
  215. spin_unlock_bh(&map->lock);
  216. return ret;
  217. }
  218. void bpf_cgroup_storage_release(struct bpf_prog *prog, struct bpf_map *_map)
  219. {
  220. struct bpf_cgroup_storage_map *map = map_to_storage(_map);
  221. spin_lock_bh(&map->lock);
  222. if (map->prog == prog) {
  223. WARN_ON(prog->aux->cgroup_storage != _map);
  224. map->prog = NULL;
  225. prog->aux->cgroup_storage = NULL;
  226. }
  227. spin_unlock_bh(&map->lock);
  228. }
  229. struct bpf_cgroup_storage *bpf_cgroup_storage_alloc(struct bpf_prog *prog)
  230. {
  231. struct bpf_cgroup_storage *storage;
  232. struct bpf_map *map;
  233. u32 pages;
  234. map = prog->aux->cgroup_storage;
  235. if (!map)
  236. return NULL;
  237. pages = round_up(sizeof(struct bpf_cgroup_storage) +
  238. sizeof(struct bpf_storage_buffer) +
  239. map->value_size, PAGE_SIZE) >> PAGE_SHIFT;
  240. if (bpf_map_charge_memlock(map, pages))
  241. return ERR_PTR(-EPERM);
  242. storage = kmalloc_node(sizeof(struct bpf_cgroup_storage),
  243. __GFP_ZERO | GFP_USER, map->numa_node);
  244. if (!storage) {
  245. bpf_map_uncharge_memlock(map, pages);
  246. return ERR_PTR(-ENOMEM);
  247. }
  248. storage->buf = kmalloc_node(sizeof(struct bpf_storage_buffer) +
  249. map->value_size, __GFP_ZERO | GFP_USER,
  250. map->numa_node);
  251. if (!storage->buf) {
  252. bpf_map_uncharge_memlock(map, pages);
  253. kfree(storage);
  254. return ERR_PTR(-ENOMEM);
  255. }
  256. storage->map = (struct bpf_cgroup_storage_map *)map;
  257. return storage;
  258. }
  259. void bpf_cgroup_storage_free(struct bpf_cgroup_storage *storage)
  260. {
  261. u32 pages;
  262. struct bpf_map *map;
  263. if (!storage)
  264. return;
  265. map = &storage->map->map;
  266. pages = round_up(sizeof(struct bpf_cgroup_storage) +
  267. sizeof(struct bpf_storage_buffer) +
  268. map->value_size, PAGE_SIZE) >> PAGE_SHIFT;
  269. bpf_map_uncharge_memlock(map, pages);
  270. kfree_rcu(storage->buf, rcu);
  271. kfree_rcu(storage, rcu);
  272. }
  273. void bpf_cgroup_storage_link(struct bpf_cgroup_storage *storage,
  274. struct cgroup *cgroup,
  275. enum bpf_attach_type type)
  276. {
  277. struct bpf_cgroup_storage_map *map;
  278. if (!storage)
  279. return;
  280. storage->key.attach_type = type;
  281. storage->key.cgroup_inode_id = cgroup->kn->id.id;
  282. map = storage->map;
  283. spin_lock_bh(&map->lock);
  284. WARN_ON(cgroup_storage_insert(map, storage));
  285. list_add(&storage->list, &map->list);
  286. spin_unlock_bh(&map->lock);
  287. }
  288. void bpf_cgroup_storage_unlink(struct bpf_cgroup_storage *storage)
  289. {
  290. struct bpf_cgroup_storage_map *map;
  291. struct rb_root *root;
  292. if (!storage)
  293. return;
  294. map = storage->map;
  295. spin_lock_bh(&map->lock);
  296. root = &map->root;
  297. rb_erase(&storage->node, root);
  298. list_del(&storage->list);
  299. spin_unlock_bh(&map->lock);
  300. }
  301. #endif