nfnetlink_acct.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513
  1. /*
  2. * (C) 2011 Pablo Neira Ayuso <pablo@netfilter.org>
  3. * (C) 2011 Intra2net AG <http://www.intra2net.com>
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License version 2 as
  7. * published by the Free Software Foundation (or any later at your option).
  8. */
  9. #include <linux/init.h>
  10. #include <linux/module.h>
  11. #include <linux/kernel.h>
  12. #include <linux/skbuff.h>
  13. #include <linux/atomic.h>
  14. #include <linux/netlink.h>
  15. #include <linux/rculist.h>
  16. #include <linux/slab.h>
  17. #include <linux/types.h>
  18. #include <linux/errno.h>
  19. #include <net/netlink.h>
  20. #include <net/sock.h>
  21. #include <linux/netfilter.h>
  22. #include <linux/netfilter/nfnetlink.h>
  23. #include <linux/netfilter/nfnetlink_acct.h>
  24. MODULE_LICENSE("GPL");
  25. MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
  26. MODULE_DESCRIPTION("nfacct: Extended Netfilter accounting infrastructure");
  27. static LIST_HEAD(nfnl_acct_list);
  28. struct nf_acct {
  29. atomic64_t pkts;
  30. atomic64_t bytes;
  31. unsigned long flags;
  32. struct list_head head;
  33. atomic_t refcnt;
  34. char name[NFACCT_NAME_MAX];
  35. struct rcu_head rcu_head;
  36. char data[0];
  37. };
  38. struct nfacct_filter {
  39. u32 value;
  40. u32 mask;
  41. };
  42. #define NFACCT_F_QUOTA (NFACCT_F_QUOTA_PKTS | NFACCT_F_QUOTA_BYTES)
  43. #define NFACCT_OVERQUOTA_BIT 2 /* NFACCT_F_OVERQUOTA */
  44. static int
  45. nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
  46. const struct nlmsghdr *nlh, const struct nlattr * const tb[])
  47. {
  48. struct nf_acct *nfacct, *matching = NULL;
  49. char *acct_name;
  50. unsigned int size = 0;
  51. u32 flags = 0;
  52. if (!tb[NFACCT_NAME])
  53. return -EINVAL;
  54. acct_name = nla_data(tb[NFACCT_NAME]);
  55. if (strlen(acct_name) == 0)
  56. return -EINVAL;
  57. list_for_each_entry(nfacct, &nfnl_acct_list, head) {
  58. if (strncmp(nfacct->name, acct_name, NFACCT_NAME_MAX) != 0)
  59. continue;
  60. if (nlh->nlmsg_flags & NLM_F_EXCL)
  61. return -EEXIST;
  62. matching = nfacct;
  63. break;
  64. }
  65. if (matching) {
  66. if (nlh->nlmsg_flags & NLM_F_REPLACE) {
  67. /* reset counters if you request a replacement. */
  68. atomic64_set(&matching->pkts, 0);
  69. atomic64_set(&matching->bytes, 0);
  70. smp_mb__before_atomic();
  71. /* reset overquota flag if quota is enabled. */
  72. if ((matching->flags & NFACCT_F_QUOTA))
  73. clear_bit(NFACCT_OVERQUOTA_BIT,
  74. &matching->flags);
  75. return 0;
  76. }
  77. return -EBUSY;
  78. }
  79. if (tb[NFACCT_FLAGS]) {
  80. flags = ntohl(nla_get_be32(tb[NFACCT_FLAGS]));
  81. if (flags & ~NFACCT_F_QUOTA)
  82. return -EOPNOTSUPP;
  83. if ((flags & NFACCT_F_QUOTA) == NFACCT_F_QUOTA)
  84. return -EINVAL;
  85. if (flags & NFACCT_F_OVERQUOTA)
  86. return -EINVAL;
  87. size += sizeof(u64);
  88. }
  89. nfacct = kzalloc(sizeof(struct nf_acct) + size, GFP_KERNEL);
  90. if (nfacct == NULL)
  91. return -ENOMEM;
  92. if (flags & NFACCT_F_QUOTA) {
  93. u64 *quota = (u64 *)nfacct->data;
  94. *quota = be64_to_cpu(nla_get_be64(tb[NFACCT_QUOTA]));
  95. nfacct->flags = flags;
  96. }
  97. strncpy(nfacct->name, nla_data(tb[NFACCT_NAME]), NFACCT_NAME_MAX);
  98. if (tb[NFACCT_BYTES]) {
  99. atomic64_set(&nfacct->bytes,
  100. be64_to_cpu(nla_get_be64(tb[NFACCT_BYTES])));
  101. }
  102. if (tb[NFACCT_PKTS]) {
  103. atomic64_set(&nfacct->pkts,
  104. be64_to_cpu(nla_get_be64(tb[NFACCT_PKTS])));
  105. }
  106. atomic_set(&nfacct->refcnt, 1);
  107. list_add_tail_rcu(&nfacct->head, &nfnl_acct_list);
  108. return 0;
  109. }
  110. static int
  111. nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
  112. int event, struct nf_acct *acct)
  113. {
  114. struct nlmsghdr *nlh;
  115. struct nfgenmsg *nfmsg;
  116. unsigned int flags = portid ? NLM_F_MULTI : 0;
  117. u64 pkts, bytes;
  118. u32 old_flags;
  119. event |= NFNL_SUBSYS_ACCT << 8;
  120. nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
  121. if (nlh == NULL)
  122. goto nlmsg_failure;
  123. nfmsg = nlmsg_data(nlh);
  124. nfmsg->nfgen_family = AF_UNSPEC;
  125. nfmsg->version = NFNETLINK_V0;
  126. nfmsg->res_id = 0;
  127. if (nla_put_string(skb, NFACCT_NAME, acct->name))
  128. goto nla_put_failure;
  129. old_flags = acct->flags;
  130. if (type == NFNL_MSG_ACCT_GET_CTRZERO) {
  131. pkts = atomic64_xchg(&acct->pkts, 0);
  132. bytes = atomic64_xchg(&acct->bytes, 0);
  133. smp_mb__before_atomic();
  134. if (acct->flags & NFACCT_F_QUOTA)
  135. clear_bit(NFACCT_OVERQUOTA_BIT, &acct->flags);
  136. } else {
  137. pkts = atomic64_read(&acct->pkts);
  138. bytes = atomic64_read(&acct->bytes);
  139. }
  140. if (nla_put_be64(skb, NFACCT_PKTS, cpu_to_be64(pkts)) ||
  141. nla_put_be64(skb, NFACCT_BYTES, cpu_to_be64(bytes)) ||
  142. nla_put_be32(skb, NFACCT_USE, htonl(atomic_read(&acct->refcnt))))
  143. goto nla_put_failure;
  144. if (acct->flags & NFACCT_F_QUOTA) {
  145. u64 *quota = (u64 *)acct->data;
  146. if (nla_put_be32(skb, NFACCT_FLAGS, htonl(old_flags)) ||
  147. nla_put_be64(skb, NFACCT_QUOTA, cpu_to_be64(*quota)))
  148. goto nla_put_failure;
  149. }
  150. nlmsg_end(skb, nlh);
  151. return skb->len;
  152. nlmsg_failure:
  153. nla_put_failure:
  154. nlmsg_cancel(skb, nlh);
  155. return -1;
  156. }
  157. static int
  158. nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb)
  159. {
  160. struct nf_acct *cur, *last;
  161. const struct nfacct_filter *filter = cb->data;
  162. if (cb->args[2])
  163. return 0;
  164. last = (struct nf_acct *)cb->args[1];
  165. if (cb->args[1])
  166. cb->args[1] = 0;
  167. rcu_read_lock();
  168. list_for_each_entry_rcu(cur, &nfnl_acct_list, head) {
  169. if (last) {
  170. if (cur != last)
  171. continue;
  172. last = NULL;
  173. }
  174. if (filter && (cur->flags & filter->mask) != filter->value)
  175. continue;
  176. if (nfnl_acct_fill_info(skb, NETLINK_CB(cb->skb).portid,
  177. cb->nlh->nlmsg_seq,
  178. NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
  179. NFNL_MSG_ACCT_NEW, cur) < 0) {
  180. cb->args[1] = (unsigned long)cur;
  181. break;
  182. }
  183. }
  184. if (!cb->args[1])
  185. cb->args[2] = 1;
  186. rcu_read_unlock();
  187. return skb->len;
  188. }
  189. static int nfnl_acct_done(struct netlink_callback *cb)
  190. {
  191. kfree(cb->data);
  192. return 0;
  193. }
  194. static const struct nla_policy filter_policy[NFACCT_FILTER_MAX + 1] = {
  195. [NFACCT_FILTER_MASK] = { .type = NLA_U32 },
  196. [NFACCT_FILTER_VALUE] = { .type = NLA_U32 },
  197. };
  198. static struct nfacct_filter *
  199. nfacct_filter_alloc(const struct nlattr * const attr)
  200. {
  201. struct nfacct_filter *filter;
  202. struct nlattr *tb[NFACCT_FILTER_MAX + 1];
  203. int err;
  204. err = nla_parse_nested(tb, NFACCT_FILTER_MAX, attr, filter_policy);
  205. if (err < 0)
  206. return ERR_PTR(err);
  207. filter = kzalloc(sizeof(struct nfacct_filter), GFP_KERNEL);
  208. if (!filter)
  209. return ERR_PTR(-ENOMEM);
  210. filter->mask = ntohl(nla_get_be32(tb[NFACCT_FILTER_MASK]));
  211. filter->value = ntohl(nla_get_be32(tb[NFACCT_FILTER_VALUE]));
  212. return filter;
  213. }
  214. static int
  215. nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb,
  216. const struct nlmsghdr *nlh, const struct nlattr * const tb[])
  217. {
  218. int ret = -ENOENT;
  219. struct nf_acct *cur;
  220. char *acct_name;
  221. if (nlh->nlmsg_flags & NLM_F_DUMP) {
  222. struct netlink_dump_control c = {
  223. .dump = nfnl_acct_dump,
  224. .done = nfnl_acct_done,
  225. };
  226. if (tb[NFACCT_FILTER]) {
  227. struct nfacct_filter *filter;
  228. filter = nfacct_filter_alloc(tb[NFACCT_FILTER]);
  229. if (IS_ERR(filter))
  230. return PTR_ERR(filter);
  231. c.data = filter;
  232. }
  233. return netlink_dump_start(nfnl, skb, nlh, &c);
  234. }
  235. if (!tb[NFACCT_NAME])
  236. return -EINVAL;
  237. acct_name = nla_data(tb[NFACCT_NAME]);
  238. list_for_each_entry(cur, &nfnl_acct_list, head) {
  239. struct sk_buff *skb2;
  240. if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX)!= 0)
  241. continue;
  242. skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
  243. if (skb2 == NULL) {
  244. ret = -ENOMEM;
  245. break;
  246. }
  247. ret = nfnl_acct_fill_info(skb2, NETLINK_CB(skb).portid,
  248. nlh->nlmsg_seq,
  249. NFNL_MSG_TYPE(nlh->nlmsg_type),
  250. NFNL_MSG_ACCT_NEW, cur);
  251. if (ret <= 0) {
  252. kfree_skb(skb2);
  253. break;
  254. }
  255. ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid,
  256. MSG_DONTWAIT);
  257. if (ret > 0)
  258. ret = 0;
  259. /* this avoids a loop in nfnetlink. */
  260. return ret == -EAGAIN ? -ENOBUFS : ret;
  261. }
  262. return ret;
  263. }
  264. /* try to delete object, fail if it is still in use. */
  265. static int nfnl_acct_try_del(struct nf_acct *cur)
  266. {
  267. int ret = 0;
  268. /* we want to avoid races with nfnl_acct_find_get. */
  269. if (atomic_dec_and_test(&cur->refcnt)) {
  270. /* We are protected by nfnl mutex. */
  271. list_del_rcu(&cur->head);
  272. kfree_rcu(cur, rcu_head);
  273. } else {
  274. /* still in use, restore reference counter. */
  275. atomic_inc(&cur->refcnt);
  276. ret = -EBUSY;
  277. }
  278. return ret;
  279. }
  280. static int
  281. nfnl_acct_del(struct sock *nfnl, struct sk_buff *skb,
  282. const struct nlmsghdr *nlh, const struct nlattr * const tb[])
  283. {
  284. char *acct_name;
  285. struct nf_acct *cur;
  286. int ret = -ENOENT;
  287. if (!tb[NFACCT_NAME]) {
  288. list_for_each_entry(cur, &nfnl_acct_list, head)
  289. nfnl_acct_try_del(cur);
  290. return 0;
  291. }
  292. acct_name = nla_data(tb[NFACCT_NAME]);
  293. list_for_each_entry(cur, &nfnl_acct_list, head) {
  294. if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX) != 0)
  295. continue;
  296. ret = nfnl_acct_try_del(cur);
  297. if (ret < 0)
  298. return ret;
  299. break;
  300. }
  301. return ret;
  302. }
  303. static const struct nla_policy nfnl_acct_policy[NFACCT_MAX+1] = {
  304. [NFACCT_NAME] = { .type = NLA_NUL_STRING, .len = NFACCT_NAME_MAX-1 },
  305. [NFACCT_BYTES] = { .type = NLA_U64 },
  306. [NFACCT_PKTS] = { .type = NLA_U64 },
  307. [NFACCT_FLAGS] = { .type = NLA_U32 },
  308. [NFACCT_QUOTA] = { .type = NLA_U64 },
  309. [NFACCT_FILTER] = {.type = NLA_NESTED },
  310. };
  311. static const struct nfnl_callback nfnl_acct_cb[NFNL_MSG_ACCT_MAX] = {
  312. [NFNL_MSG_ACCT_NEW] = { .call = nfnl_acct_new,
  313. .attr_count = NFACCT_MAX,
  314. .policy = nfnl_acct_policy },
  315. [NFNL_MSG_ACCT_GET] = { .call = nfnl_acct_get,
  316. .attr_count = NFACCT_MAX,
  317. .policy = nfnl_acct_policy },
  318. [NFNL_MSG_ACCT_GET_CTRZERO] = { .call = nfnl_acct_get,
  319. .attr_count = NFACCT_MAX,
  320. .policy = nfnl_acct_policy },
  321. [NFNL_MSG_ACCT_DEL] = { .call = nfnl_acct_del,
  322. .attr_count = NFACCT_MAX,
  323. .policy = nfnl_acct_policy },
  324. };
  325. static const struct nfnetlink_subsystem nfnl_acct_subsys = {
  326. .name = "acct",
  327. .subsys_id = NFNL_SUBSYS_ACCT,
  328. .cb_count = NFNL_MSG_ACCT_MAX,
  329. .cb = nfnl_acct_cb,
  330. };
  331. MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_ACCT);
  332. struct nf_acct *nfnl_acct_find_get(const char *acct_name)
  333. {
  334. struct nf_acct *cur, *acct = NULL;
  335. rcu_read_lock();
  336. list_for_each_entry_rcu(cur, &nfnl_acct_list, head) {
  337. if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX)!= 0)
  338. continue;
  339. if (!try_module_get(THIS_MODULE))
  340. goto err;
  341. if (!atomic_inc_not_zero(&cur->refcnt)) {
  342. module_put(THIS_MODULE);
  343. goto err;
  344. }
  345. acct = cur;
  346. break;
  347. }
  348. err:
  349. rcu_read_unlock();
  350. return acct;
  351. }
  352. EXPORT_SYMBOL_GPL(nfnl_acct_find_get);
  353. void nfnl_acct_put(struct nf_acct *acct)
  354. {
  355. atomic_dec(&acct->refcnt);
  356. module_put(THIS_MODULE);
  357. }
  358. EXPORT_SYMBOL_GPL(nfnl_acct_put);
  359. void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct)
  360. {
  361. atomic64_inc(&nfacct->pkts);
  362. atomic64_add(skb->len, &nfacct->bytes);
  363. }
  364. EXPORT_SYMBOL_GPL(nfnl_acct_update);
  365. static void nfnl_overquota_report(struct nf_acct *nfacct)
  366. {
  367. int ret;
  368. struct sk_buff *skb;
  369. skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
  370. if (skb == NULL)
  371. return;
  372. ret = nfnl_acct_fill_info(skb, 0, 0, NFNL_MSG_ACCT_OVERQUOTA, 0,
  373. nfacct);
  374. if (ret <= 0) {
  375. kfree_skb(skb);
  376. return;
  377. }
  378. netlink_broadcast(init_net.nfnl, skb, 0, NFNLGRP_ACCT_QUOTA,
  379. GFP_ATOMIC);
  380. }
  381. int nfnl_acct_overquota(const struct sk_buff *skb, struct nf_acct *nfacct)
  382. {
  383. u64 now;
  384. u64 *quota;
  385. int ret = NFACCT_UNDERQUOTA;
  386. /* no place here if we don't have a quota */
  387. if (!(nfacct->flags & NFACCT_F_QUOTA))
  388. return NFACCT_NO_QUOTA;
  389. quota = (u64 *)nfacct->data;
  390. now = (nfacct->flags & NFACCT_F_QUOTA_PKTS) ?
  391. atomic64_read(&nfacct->pkts) : atomic64_read(&nfacct->bytes);
  392. ret = now > *quota;
  393. if (now >= *quota &&
  394. !test_and_set_bit(NFACCT_OVERQUOTA_BIT, &nfacct->flags)) {
  395. nfnl_overquota_report(nfacct);
  396. }
  397. return ret;
  398. }
  399. EXPORT_SYMBOL_GPL(nfnl_acct_overquota);
  400. static int __init nfnl_acct_init(void)
  401. {
  402. int ret;
  403. pr_info("nfnl_acct: registering with nfnetlink.\n");
  404. ret = nfnetlink_subsys_register(&nfnl_acct_subsys);
  405. if (ret < 0) {
  406. pr_err("nfnl_acct_init: cannot register with nfnetlink.\n");
  407. goto err_out;
  408. }
  409. return 0;
  410. err_out:
  411. return ret;
  412. }
  413. static void __exit nfnl_acct_exit(void)
  414. {
  415. struct nf_acct *cur, *tmp;
  416. pr_info("nfnl_acct: unregistering from nfnetlink.\n");
  417. nfnetlink_subsys_unregister(&nfnl_acct_subsys);
  418. list_for_each_entry_safe(cur, tmp, &nfnl_acct_list, head) {
  419. list_del_rcu(&cur->head);
  420. /* We are sure that our objects have no clients at this point,
  421. * it's safe to release them all without checking refcnt. */
  422. kfree_rcu(cur, rcu_head);
  423. }
  424. }
  425. module_init(nfnl_acct_init);
  426. module_exit(nfnl_acct_exit);