nfnetlink_acct.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548
  1. /*
  2. * (C) 2011 Pablo Neira Ayuso <pablo@netfilter.org>
  3. * (C) 2011 Intra2net AG <http://www.intra2net.com>
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License version 2 as
  7. * published by the Free Software Foundation (or any later at your option).
  8. */
  9. #include <linux/init.h>
  10. #include <linux/module.h>
  11. #include <linux/kernel.h>
  12. #include <linux/skbuff.h>
  13. #include <linux/atomic.h>
  14. #include <linux/netlink.h>
  15. #include <linux/rculist.h>
  16. #include <linux/slab.h>
  17. #include <linux/types.h>
  18. #include <linux/errno.h>
  19. #include <net/netlink.h>
  20. #include <net/sock.h>
  21. #include <linux/netfilter.h>
  22. #include <linux/netfilter/nfnetlink.h>
  23. #include <linux/netfilter/nfnetlink_acct.h>
  24. MODULE_LICENSE("GPL");
  25. MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
  26. MODULE_DESCRIPTION("nfacct: Extended Netfilter accounting infrastructure");
  27. struct nf_acct {
  28. atomic64_t pkts;
  29. atomic64_t bytes;
  30. unsigned long flags;
  31. struct list_head head;
  32. atomic_t refcnt;
  33. char name[NFACCT_NAME_MAX];
  34. struct rcu_head rcu_head;
  35. char data[0];
  36. };
  37. struct nfacct_filter {
  38. u32 value;
  39. u32 mask;
  40. };
  41. #define NFACCT_F_QUOTA (NFACCT_F_QUOTA_PKTS | NFACCT_F_QUOTA_BYTES)
  42. #define NFACCT_OVERQUOTA_BIT 2 /* NFACCT_F_OVERQUOTA */
  43. static int nfnl_acct_new(struct net *net, struct sock *nfnl,
  44. struct sk_buff *skb, const struct nlmsghdr *nlh,
  45. const struct nlattr * const tb[])
  46. {
  47. struct nf_acct *nfacct, *matching = NULL;
  48. char *acct_name;
  49. unsigned int size = 0;
  50. u32 flags = 0;
  51. if (!tb[NFACCT_NAME])
  52. return -EINVAL;
  53. acct_name = nla_data(tb[NFACCT_NAME]);
  54. if (strlen(acct_name) == 0)
  55. return -EINVAL;
  56. list_for_each_entry(nfacct, &net->nfnl_acct_list, head) {
  57. if (strncmp(nfacct->name, acct_name, NFACCT_NAME_MAX) != 0)
  58. continue;
  59. if (nlh->nlmsg_flags & NLM_F_EXCL)
  60. return -EEXIST;
  61. matching = nfacct;
  62. break;
  63. }
  64. if (matching) {
  65. if (nlh->nlmsg_flags & NLM_F_REPLACE) {
  66. /* reset counters if you request a replacement. */
  67. atomic64_set(&matching->pkts, 0);
  68. atomic64_set(&matching->bytes, 0);
  69. smp_mb__before_atomic();
  70. /* reset overquota flag if quota is enabled. */
  71. if ((matching->flags & NFACCT_F_QUOTA))
  72. clear_bit(NFACCT_OVERQUOTA_BIT,
  73. &matching->flags);
  74. return 0;
  75. }
  76. return -EBUSY;
  77. }
  78. if (tb[NFACCT_FLAGS]) {
  79. flags = ntohl(nla_get_be32(tb[NFACCT_FLAGS]));
  80. if (flags & ~NFACCT_F_QUOTA)
  81. return -EOPNOTSUPP;
  82. if ((flags & NFACCT_F_QUOTA) == NFACCT_F_QUOTA)
  83. return -EINVAL;
  84. if (flags & NFACCT_F_OVERQUOTA)
  85. return -EINVAL;
  86. if ((flags & NFACCT_F_QUOTA) && !tb[NFACCT_QUOTA])
  87. return -EINVAL;
  88. size += sizeof(u64);
  89. }
  90. nfacct = kzalloc(sizeof(struct nf_acct) + size, GFP_KERNEL);
  91. if (nfacct == NULL)
  92. return -ENOMEM;
  93. if (flags & NFACCT_F_QUOTA) {
  94. u64 *quota = (u64 *)nfacct->data;
  95. *quota = be64_to_cpu(nla_get_be64(tb[NFACCT_QUOTA]));
  96. nfacct->flags = flags;
  97. }
  98. strncpy(nfacct->name, nla_data(tb[NFACCT_NAME]), NFACCT_NAME_MAX);
  99. if (tb[NFACCT_BYTES]) {
  100. atomic64_set(&nfacct->bytes,
  101. be64_to_cpu(nla_get_be64(tb[NFACCT_BYTES])));
  102. }
  103. if (tb[NFACCT_PKTS]) {
  104. atomic64_set(&nfacct->pkts,
  105. be64_to_cpu(nla_get_be64(tb[NFACCT_PKTS])));
  106. }
  107. atomic_set(&nfacct->refcnt, 1);
  108. list_add_tail_rcu(&nfacct->head, &net->nfnl_acct_list);
  109. return 0;
  110. }
  111. static int
  112. nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
  113. int event, struct nf_acct *acct)
  114. {
  115. struct nlmsghdr *nlh;
  116. struct nfgenmsg *nfmsg;
  117. unsigned int flags = portid ? NLM_F_MULTI : 0;
  118. u64 pkts, bytes;
  119. u32 old_flags;
  120. event |= NFNL_SUBSYS_ACCT << 8;
  121. nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
  122. if (nlh == NULL)
  123. goto nlmsg_failure;
  124. nfmsg = nlmsg_data(nlh);
  125. nfmsg->nfgen_family = AF_UNSPEC;
  126. nfmsg->version = NFNETLINK_V0;
  127. nfmsg->res_id = 0;
  128. if (nla_put_string(skb, NFACCT_NAME, acct->name))
  129. goto nla_put_failure;
  130. old_flags = acct->flags;
  131. if (type == NFNL_MSG_ACCT_GET_CTRZERO) {
  132. pkts = atomic64_xchg(&acct->pkts, 0);
  133. bytes = atomic64_xchg(&acct->bytes, 0);
  134. smp_mb__before_atomic();
  135. if (acct->flags & NFACCT_F_QUOTA)
  136. clear_bit(NFACCT_OVERQUOTA_BIT, &acct->flags);
  137. } else {
  138. pkts = atomic64_read(&acct->pkts);
  139. bytes = atomic64_read(&acct->bytes);
  140. }
  141. if (nla_put_be64(skb, NFACCT_PKTS, cpu_to_be64(pkts),
  142. NFACCT_PAD) ||
  143. nla_put_be64(skb, NFACCT_BYTES, cpu_to_be64(bytes),
  144. NFACCT_PAD) ||
  145. nla_put_be32(skb, NFACCT_USE, htonl(atomic_read(&acct->refcnt))))
  146. goto nla_put_failure;
  147. if (acct->flags & NFACCT_F_QUOTA) {
  148. u64 *quota = (u64 *)acct->data;
  149. if (nla_put_be32(skb, NFACCT_FLAGS, htonl(old_flags)) ||
  150. nla_put_be64(skb, NFACCT_QUOTA, cpu_to_be64(*quota),
  151. NFACCT_PAD))
  152. goto nla_put_failure;
  153. }
  154. nlmsg_end(skb, nlh);
  155. return skb->len;
  156. nlmsg_failure:
  157. nla_put_failure:
  158. nlmsg_cancel(skb, nlh);
  159. return -1;
  160. }
  161. static int
  162. nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb)
  163. {
  164. struct net *net = sock_net(skb->sk);
  165. struct nf_acct *cur, *last;
  166. const struct nfacct_filter *filter = cb->data;
  167. if (cb->args[2])
  168. return 0;
  169. last = (struct nf_acct *)cb->args[1];
  170. if (cb->args[1])
  171. cb->args[1] = 0;
  172. rcu_read_lock();
  173. list_for_each_entry_rcu(cur, &net->nfnl_acct_list, head) {
  174. if (last) {
  175. if (cur != last)
  176. continue;
  177. last = NULL;
  178. }
  179. if (filter && (cur->flags & filter->mask) != filter->value)
  180. continue;
  181. if (nfnl_acct_fill_info(skb, NETLINK_CB(cb->skb).portid,
  182. cb->nlh->nlmsg_seq,
  183. NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
  184. NFNL_MSG_ACCT_NEW, cur) < 0) {
  185. cb->args[1] = (unsigned long)cur;
  186. break;
  187. }
  188. }
  189. if (!cb->args[1])
  190. cb->args[2] = 1;
  191. rcu_read_unlock();
  192. return skb->len;
  193. }
  194. static int nfnl_acct_done(struct netlink_callback *cb)
  195. {
  196. kfree(cb->data);
  197. return 0;
  198. }
  199. static const struct nla_policy filter_policy[NFACCT_FILTER_MAX + 1] = {
  200. [NFACCT_FILTER_MASK] = { .type = NLA_U32 },
  201. [NFACCT_FILTER_VALUE] = { .type = NLA_U32 },
  202. };
  203. static struct nfacct_filter *
  204. nfacct_filter_alloc(const struct nlattr * const attr)
  205. {
  206. struct nfacct_filter *filter;
  207. struct nlattr *tb[NFACCT_FILTER_MAX + 1];
  208. int err;
  209. err = nla_parse_nested(tb, NFACCT_FILTER_MAX, attr, filter_policy);
  210. if (err < 0)
  211. return ERR_PTR(err);
  212. if (!tb[NFACCT_FILTER_MASK] || !tb[NFACCT_FILTER_VALUE])
  213. return ERR_PTR(-EINVAL);
  214. filter = kzalloc(sizeof(struct nfacct_filter), GFP_KERNEL);
  215. if (!filter)
  216. return ERR_PTR(-ENOMEM);
  217. filter->mask = ntohl(nla_get_be32(tb[NFACCT_FILTER_MASK]));
  218. filter->value = ntohl(nla_get_be32(tb[NFACCT_FILTER_VALUE]));
  219. return filter;
  220. }
  221. static int nfnl_acct_get(struct net *net, struct sock *nfnl,
  222. struct sk_buff *skb, const struct nlmsghdr *nlh,
  223. const struct nlattr * const tb[])
  224. {
  225. int ret = -ENOENT;
  226. struct nf_acct *cur;
  227. char *acct_name;
  228. if (nlh->nlmsg_flags & NLM_F_DUMP) {
  229. struct netlink_dump_control c = {
  230. .dump = nfnl_acct_dump,
  231. .done = nfnl_acct_done,
  232. };
  233. if (tb[NFACCT_FILTER]) {
  234. struct nfacct_filter *filter;
  235. filter = nfacct_filter_alloc(tb[NFACCT_FILTER]);
  236. if (IS_ERR(filter))
  237. return PTR_ERR(filter);
  238. c.data = filter;
  239. }
  240. return netlink_dump_start(nfnl, skb, nlh, &c);
  241. }
  242. if (!tb[NFACCT_NAME])
  243. return -EINVAL;
  244. acct_name = nla_data(tb[NFACCT_NAME]);
  245. list_for_each_entry(cur, &net->nfnl_acct_list, head) {
  246. struct sk_buff *skb2;
  247. if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX)!= 0)
  248. continue;
  249. skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
  250. if (skb2 == NULL) {
  251. ret = -ENOMEM;
  252. break;
  253. }
  254. ret = nfnl_acct_fill_info(skb2, NETLINK_CB(skb).portid,
  255. nlh->nlmsg_seq,
  256. NFNL_MSG_TYPE(nlh->nlmsg_type),
  257. NFNL_MSG_ACCT_NEW, cur);
  258. if (ret <= 0) {
  259. kfree_skb(skb2);
  260. break;
  261. }
  262. ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid,
  263. MSG_DONTWAIT);
  264. if (ret > 0)
  265. ret = 0;
  266. /* this avoids a loop in nfnetlink. */
  267. return ret == -EAGAIN ? -ENOBUFS : ret;
  268. }
  269. return ret;
  270. }
  271. /* try to delete object, fail if it is still in use. */
  272. static int nfnl_acct_try_del(struct nf_acct *cur)
  273. {
  274. int ret = 0;
  275. /* We want to avoid races with nfnl_acct_put. So only when the current
  276. * refcnt is 1, we decrease it to 0.
  277. */
  278. if (atomic_cmpxchg(&cur->refcnt, 1, 0) == 1) {
  279. /* We are protected by nfnl mutex. */
  280. list_del_rcu(&cur->head);
  281. kfree_rcu(cur, rcu_head);
  282. } else {
  283. ret = -EBUSY;
  284. }
  285. return ret;
  286. }
  287. static int nfnl_acct_del(struct net *net, struct sock *nfnl,
  288. struct sk_buff *skb, const struct nlmsghdr *nlh,
  289. const struct nlattr * const tb[])
  290. {
  291. struct nf_acct *cur, *tmp;
  292. int ret = -ENOENT;
  293. char *acct_name;
  294. if (!tb[NFACCT_NAME]) {
  295. list_for_each_entry_safe(cur, tmp, &net->nfnl_acct_list, head)
  296. nfnl_acct_try_del(cur);
  297. return 0;
  298. }
  299. acct_name = nla_data(tb[NFACCT_NAME]);
  300. list_for_each_entry(cur, &net->nfnl_acct_list, head) {
  301. if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX) != 0)
  302. continue;
  303. ret = nfnl_acct_try_del(cur);
  304. if (ret < 0)
  305. return ret;
  306. break;
  307. }
  308. return ret;
  309. }
  310. static const struct nla_policy nfnl_acct_policy[NFACCT_MAX+1] = {
  311. [NFACCT_NAME] = { .type = NLA_NUL_STRING, .len = NFACCT_NAME_MAX-1 },
  312. [NFACCT_BYTES] = { .type = NLA_U64 },
  313. [NFACCT_PKTS] = { .type = NLA_U64 },
  314. [NFACCT_FLAGS] = { .type = NLA_U32 },
  315. [NFACCT_QUOTA] = { .type = NLA_U64 },
  316. [NFACCT_FILTER] = {.type = NLA_NESTED },
  317. };
  318. static const struct nfnl_callback nfnl_acct_cb[NFNL_MSG_ACCT_MAX] = {
  319. [NFNL_MSG_ACCT_NEW] = { .call = nfnl_acct_new,
  320. .attr_count = NFACCT_MAX,
  321. .policy = nfnl_acct_policy },
  322. [NFNL_MSG_ACCT_GET] = { .call = nfnl_acct_get,
  323. .attr_count = NFACCT_MAX,
  324. .policy = nfnl_acct_policy },
  325. [NFNL_MSG_ACCT_GET_CTRZERO] = { .call = nfnl_acct_get,
  326. .attr_count = NFACCT_MAX,
  327. .policy = nfnl_acct_policy },
  328. [NFNL_MSG_ACCT_DEL] = { .call = nfnl_acct_del,
  329. .attr_count = NFACCT_MAX,
  330. .policy = nfnl_acct_policy },
  331. };
  332. static const struct nfnetlink_subsystem nfnl_acct_subsys = {
  333. .name = "acct",
  334. .subsys_id = NFNL_SUBSYS_ACCT,
  335. .cb_count = NFNL_MSG_ACCT_MAX,
  336. .cb = nfnl_acct_cb,
  337. };
  338. MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_ACCT);
  339. struct nf_acct *nfnl_acct_find_get(struct net *net, const char *acct_name)
  340. {
  341. struct nf_acct *cur, *acct = NULL;
  342. rcu_read_lock();
  343. list_for_each_entry_rcu(cur, &net->nfnl_acct_list, head) {
  344. if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX)!= 0)
  345. continue;
  346. if (!try_module_get(THIS_MODULE))
  347. goto err;
  348. if (!atomic_inc_not_zero(&cur->refcnt)) {
  349. module_put(THIS_MODULE);
  350. goto err;
  351. }
  352. acct = cur;
  353. break;
  354. }
  355. err:
  356. rcu_read_unlock();
  357. return acct;
  358. }
  359. EXPORT_SYMBOL_GPL(nfnl_acct_find_get);
  360. void nfnl_acct_put(struct nf_acct *acct)
  361. {
  362. if (atomic_dec_and_test(&acct->refcnt))
  363. kfree_rcu(acct, rcu_head);
  364. module_put(THIS_MODULE);
  365. }
  366. EXPORT_SYMBOL_GPL(nfnl_acct_put);
  367. void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct)
  368. {
  369. atomic64_inc(&nfacct->pkts);
  370. atomic64_add(skb->len, &nfacct->bytes);
  371. }
  372. EXPORT_SYMBOL_GPL(nfnl_acct_update);
  373. static void nfnl_overquota_report(struct net *net, struct nf_acct *nfacct)
  374. {
  375. int ret;
  376. struct sk_buff *skb;
  377. skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
  378. if (skb == NULL)
  379. return;
  380. ret = nfnl_acct_fill_info(skb, 0, 0, NFNL_MSG_ACCT_OVERQUOTA, 0,
  381. nfacct);
  382. if (ret <= 0) {
  383. kfree_skb(skb);
  384. return;
  385. }
  386. netlink_broadcast(net->nfnl, skb, 0, NFNLGRP_ACCT_QUOTA,
  387. GFP_ATOMIC);
  388. }
  389. int nfnl_acct_overquota(struct net *net, const struct sk_buff *skb,
  390. struct nf_acct *nfacct)
  391. {
  392. u64 now;
  393. u64 *quota;
  394. int ret = NFACCT_UNDERQUOTA;
  395. /* no place here if we don't have a quota */
  396. if (!(nfacct->flags & NFACCT_F_QUOTA))
  397. return NFACCT_NO_QUOTA;
  398. quota = (u64 *)nfacct->data;
  399. now = (nfacct->flags & NFACCT_F_QUOTA_PKTS) ?
  400. atomic64_read(&nfacct->pkts) : atomic64_read(&nfacct->bytes);
  401. ret = now > *quota;
  402. if (now >= *quota &&
  403. !test_and_set_bit(NFACCT_OVERQUOTA_BIT, &nfacct->flags)) {
  404. nfnl_overquota_report(net, nfacct);
  405. }
  406. return ret;
  407. }
  408. EXPORT_SYMBOL_GPL(nfnl_acct_overquota);
  409. static int __net_init nfnl_acct_net_init(struct net *net)
  410. {
  411. INIT_LIST_HEAD(&net->nfnl_acct_list);
  412. return 0;
  413. }
  414. static void __net_exit nfnl_acct_net_exit(struct net *net)
  415. {
  416. struct nf_acct *cur, *tmp;
  417. list_for_each_entry_safe(cur, tmp, &net->nfnl_acct_list, head) {
  418. list_del_rcu(&cur->head);
  419. if (atomic_dec_and_test(&cur->refcnt))
  420. kfree_rcu(cur, rcu_head);
  421. }
  422. }
  423. static struct pernet_operations nfnl_acct_ops = {
  424. .init = nfnl_acct_net_init,
  425. .exit = nfnl_acct_net_exit,
  426. };
  427. static int __init nfnl_acct_init(void)
  428. {
  429. int ret;
  430. ret = register_pernet_subsys(&nfnl_acct_ops);
  431. if (ret < 0) {
  432. pr_err("nfnl_acct_init: failed to register pernet ops\n");
  433. goto err_out;
  434. }
  435. pr_info("nfnl_acct: registering with nfnetlink.\n");
  436. ret = nfnetlink_subsys_register(&nfnl_acct_subsys);
  437. if (ret < 0) {
  438. pr_err("nfnl_acct_init: cannot register with nfnetlink.\n");
  439. goto cleanup_pernet;
  440. }
  441. return 0;
  442. cleanup_pernet:
  443. unregister_pernet_subsys(&nfnl_acct_ops);
  444. err_out:
  445. return ret;
  446. }
  447. static void __exit nfnl_acct_exit(void)
  448. {
  449. pr_info("nfnl_acct: unregistering from nfnetlink.\n");
  450. nfnetlink_subsys_unregister(&nfnl_acct_subsys);
  451. unregister_pernet_subsys(&nfnl_acct_ops);
  452. }
  453. module_init(nfnl_acct_init);
  454. module_exit(nfnl_acct_exit);