sch_prio.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485
  1. /*
  2. * net/sched/sch_prio.c Simple 3-band priority "scheduler".
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public License
  6. * as published by the Free Software Foundation; either version
  7. * 2 of the License, or (at your option) any later version.
  8. *
  9. * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  10. * Fixes: 19990609: J Hadi Salim <hadi@nortelnetworks.com>:
  11. * Init -- EINVAL when opt undefined
  12. */
  13. #include <linux/module.h>
  14. #include <linux/slab.h>
  15. #include <linux/types.h>
  16. #include <linux/kernel.h>
  17. #include <linux/string.h>
  18. #include <linux/errno.h>
  19. #include <linux/skbuff.h>
  20. #include <net/netlink.h>
  21. #include <net/pkt_sched.h>
  22. #include <net/pkt_cls.h>
  23. struct prio_sched_data {
  24. int bands;
  25. struct tcf_proto __rcu *filter_list;
  26. struct tcf_block *block;
  27. u8 prio2band[TC_PRIO_MAX+1];
  28. struct Qdisc *queues[TCQ_PRIO_BANDS];
  29. };
  30. static struct Qdisc *
  31. prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
  32. {
  33. struct prio_sched_data *q = qdisc_priv(sch);
  34. u32 band = skb->priority;
  35. struct tcf_result res;
  36. struct tcf_proto *fl;
  37. int err;
  38. *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
  39. if (TC_H_MAJ(skb->priority) != sch->handle) {
  40. fl = rcu_dereference_bh(q->filter_list);
  41. err = tcf_classify(skb, fl, &res, false);
  42. #ifdef CONFIG_NET_CLS_ACT
  43. switch (err) {
  44. case TC_ACT_STOLEN:
  45. case TC_ACT_QUEUED:
  46. case TC_ACT_TRAP:
  47. *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
  48. /* fall through */
  49. case TC_ACT_SHOT:
  50. return NULL;
  51. }
  52. #endif
  53. if (!fl || err < 0) {
  54. if (TC_H_MAJ(band))
  55. band = 0;
  56. return q->queues[q->prio2band[band & TC_PRIO_MAX]];
  57. }
  58. band = res.classid;
  59. }
  60. band = TC_H_MIN(band) - 1;
  61. if (band >= q->bands)
  62. return q->queues[q->prio2band[0]];
  63. return q->queues[band];
  64. }
  65. static int
  66. prio_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
  67. {
  68. struct Qdisc *qdisc;
  69. int ret;
  70. qdisc = prio_classify(skb, sch, &ret);
  71. #ifdef CONFIG_NET_CLS_ACT
  72. if (qdisc == NULL) {
  73. if (ret & __NET_XMIT_BYPASS)
  74. qdisc_qstats_drop(sch);
  75. __qdisc_drop(skb, to_free);
  76. return ret;
  77. }
  78. #endif
  79. ret = qdisc_enqueue(skb, qdisc, to_free);
  80. if (ret == NET_XMIT_SUCCESS) {
  81. qdisc_qstats_backlog_inc(sch, skb);
  82. sch->q.qlen++;
  83. return NET_XMIT_SUCCESS;
  84. }
  85. if (net_xmit_drop_count(ret))
  86. qdisc_qstats_drop(sch);
  87. return ret;
  88. }
  89. static struct sk_buff *prio_peek(struct Qdisc *sch)
  90. {
  91. struct prio_sched_data *q = qdisc_priv(sch);
  92. int prio;
  93. for (prio = 0; prio < q->bands; prio++) {
  94. struct Qdisc *qdisc = q->queues[prio];
  95. struct sk_buff *skb = qdisc->ops->peek(qdisc);
  96. if (skb)
  97. return skb;
  98. }
  99. return NULL;
  100. }
  101. static struct sk_buff *prio_dequeue(struct Qdisc *sch)
  102. {
  103. struct prio_sched_data *q = qdisc_priv(sch);
  104. int prio;
  105. for (prio = 0; prio < q->bands; prio++) {
  106. struct Qdisc *qdisc = q->queues[prio];
  107. struct sk_buff *skb = qdisc_dequeue_peeked(qdisc);
  108. if (skb) {
  109. qdisc_bstats_update(sch, skb);
  110. qdisc_qstats_backlog_dec(sch, skb);
  111. sch->q.qlen--;
  112. return skb;
  113. }
  114. }
  115. return NULL;
  116. }
  117. static void
  118. prio_reset(struct Qdisc *sch)
  119. {
  120. int prio;
  121. struct prio_sched_data *q = qdisc_priv(sch);
  122. for (prio = 0; prio < q->bands; prio++)
  123. qdisc_reset(q->queues[prio]);
  124. sch->qstats.backlog = 0;
  125. sch->q.qlen = 0;
  126. }
  127. static int prio_offload(struct Qdisc *sch, struct tc_prio_qopt *qopt)
  128. {
  129. struct net_device *dev = qdisc_dev(sch);
  130. struct tc_prio_qopt_offload opt = {
  131. .handle = sch->handle,
  132. .parent = sch->parent,
  133. };
  134. if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
  135. return -EOPNOTSUPP;
  136. if (qopt) {
  137. opt.command = TC_PRIO_REPLACE;
  138. opt.replace_params.bands = qopt->bands;
  139. memcpy(&opt.replace_params.priomap, qopt->priomap,
  140. TC_PRIO_MAX + 1);
  141. opt.replace_params.qstats = &sch->qstats;
  142. } else {
  143. opt.command = TC_PRIO_DESTROY;
  144. }
  145. return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_PRIO, &opt);
  146. }
  147. static void
  148. prio_destroy(struct Qdisc *sch)
  149. {
  150. int prio;
  151. struct prio_sched_data *q = qdisc_priv(sch);
  152. tcf_block_put(q->block);
  153. prio_offload(sch, NULL);
  154. for (prio = 0; prio < q->bands; prio++)
  155. qdisc_destroy(q->queues[prio]);
  156. }
  157. static int prio_tune(struct Qdisc *sch, struct nlattr *opt,
  158. struct netlink_ext_ack *extack)
  159. {
  160. struct prio_sched_data *q = qdisc_priv(sch);
  161. struct Qdisc *queues[TCQ_PRIO_BANDS];
  162. int oldbands = q->bands, i;
  163. struct tc_prio_qopt *qopt;
  164. if (nla_len(opt) < sizeof(*qopt))
  165. return -EINVAL;
  166. qopt = nla_data(opt);
  167. if (qopt->bands > TCQ_PRIO_BANDS || qopt->bands < 2)
  168. return -EINVAL;
  169. for (i = 0; i <= TC_PRIO_MAX; i++) {
  170. if (qopt->priomap[i] >= qopt->bands)
  171. return -EINVAL;
  172. }
  173. /* Before commit, make sure we can allocate all new qdiscs */
  174. for (i = oldbands; i < qopt->bands; i++) {
  175. queues[i] = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
  176. TC_H_MAKE(sch->handle, i + 1),
  177. extack);
  178. if (!queues[i]) {
  179. while (i > oldbands)
  180. qdisc_destroy(queues[--i]);
  181. return -ENOMEM;
  182. }
  183. }
  184. prio_offload(sch, qopt);
  185. sch_tree_lock(sch);
  186. q->bands = qopt->bands;
  187. memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1);
  188. for (i = q->bands; i < oldbands; i++) {
  189. struct Qdisc *child = q->queues[i];
  190. qdisc_tree_reduce_backlog(child, child->q.qlen,
  191. child->qstats.backlog);
  192. qdisc_destroy(child);
  193. }
  194. for (i = oldbands; i < q->bands; i++) {
  195. q->queues[i] = queues[i];
  196. if (q->queues[i] != &noop_qdisc)
  197. qdisc_hash_add(q->queues[i], true);
  198. }
  199. sch_tree_unlock(sch);
  200. return 0;
  201. }
  202. static int prio_init(struct Qdisc *sch, struct nlattr *opt,
  203. struct netlink_ext_ack *extack)
  204. {
  205. struct prio_sched_data *q = qdisc_priv(sch);
  206. int err;
  207. if (!opt)
  208. return -EINVAL;
  209. err = tcf_block_get(&q->block, &q->filter_list, sch, extack);
  210. if (err)
  211. return err;
  212. return prio_tune(sch, opt, extack);
  213. }
  214. static int prio_dump_offload(struct Qdisc *sch)
  215. {
  216. struct net_device *dev = qdisc_dev(sch);
  217. struct tc_prio_qopt_offload hw_stats = {
  218. .command = TC_PRIO_STATS,
  219. .handle = sch->handle,
  220. .parent = sch->parent,
  221. {
  222. .stats = {
  223. .bstats = &sch->bstats,
  224. .qstats = &sch->qstats,
  225. },
  226. },
  227. };
  228. int err;
  229. sch->flags &= ~TCQ_F_OFFLOADED;
  230. if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc)
  231. return 0;
  232. err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_PRIO,
  233. &hw_stats);
  234. if (err == -EOPNOTSUPP)
  235. return 0;
  236. if (!err)
  237. sch->flags |= TCQ_F_OFFLOADED;
  238. return err;
  239. }
  240. static int prio_dump(struct Qdisc *sch, struct sk_buff *skb)
  241. {
  242. struct prio_sched_data *q = qdisc_priv(sch);
  243. unsigned char *b = skb_tail_pointer(skb);
  244. struct tc_prio_qopt opt;
  245. int err;
  246. opt.bands = q->bands;
  247. memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX + 1);
  248. err = prio_dump_offload(sch);
  249. if (err)
  250. goto nla_put_failure;
  251. if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
  252. goto nla_put_failure;
  253. return skb->len;
  254. nla_put_failure:
  255. nlmsg_trim(skb, b);
  256. return -1;
  257. }
  258. static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
  259. struct Qdisc **old, struct netlink_ext_ack *extack)
  260. {
  261. struct prio_sched_data *q = qdisc_priv(sch);
  262. struct tc_prio_qopt_offload graft_offload;
  263. struct net_device *dev = qdisc_dev(sch);
  264. unsigned long band = arg - 1;
  265. bool any_qdisc_is_offloaded;
  266. int err;
  267. if (!new) {
  268. new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
  269. TC_H_MAKE(sch->handle, arg), extack);
  270. if (!new)
  271. new = &noop_qdisc;
  272. else
  273. qdisc_hash_add(new, true);
  274. }
  275. *old = qdisc_replace(sch, new, &q->queues[band]);
  276. if (!tc_can_offload(dev))
  277. return 0;
  278. graft_offload.handle = sch->handle;
  279. graft_offload.parent = sch->parent;
  280. graft_offload.graft_params.band = band;
  281. graft_offload.graft_params.child_handle = new->handle;
  282. graft_offload.command = TC_PRIO_GRAFT;
  283. err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_PRIO,
  284. &graft_offload);
  285. /* Don't report error if the graft is part of destroy operation. */
  286. if (err && new != &noop_qdisc) {
  287. /* Don't report error if the parent, the old child and the new
  288. * one are not offloaded.
  289. */
  290. any_qdisc_is_offloaded = sch->flags & TCQ_F_OFFLOADED;
  291. any_qdisc_is_offloaded |= new->flags & TCQ_F_OFFLOADED;
  292. if (*old)
  293. any_qdisc_is_offloaded |= (*old)->flags &
  294. TCQ_F_OFFLOADED;
  295. if (any_qdisc_is_offloaded)
  296. NL_SET_ERR_MSG(extack, "Offloading graft operation failed.");
  297. }
  298. return 0;
  299. }
  300. static struct Qdisc *
  301. prio_leaf(struct Qdisc *sch, unsigned long arg)
  302. {
  303. struct prio_sched_data *q = qdisc_priv(sch);
  304. unsigned long band = arg - 1;
  305. return q->queues[band];
  306. }
  307. static unsigned long prio_find(struct Qdisc *sch, u32 classid)
  308. {
  309. struct prio_sched_data *q = qdisc_priv(sch);
  310. unsigned long band = TC_H_MIN(classid);
  311. if (band - 1 >= q->bands)
  312. return 0;
  313. return band;
  314. }
  315. static unsigned long prio_bind(struct Qdisc *sch, unsigned long parent, u32 classid)
  316. {
  317. return prio_find(sch, classid);
  318. }
  319. static void prio_unbind(struct Qdisc *q, unsigned long cl)
  320. {
  321. }
  322. static int prio_dump_class(struct Qdisc *sch, unsigned long cl, struct sk_buff *skb,
  323. struct tcmsg *tcm)
  324. {
  325. struct prio_sched_data *q = qdisc_priv(sch);
  326. tcm->tcm_handle |= TC_H_MIN(cl);
  327. tcm->tcm_info = q->queues[cl-1]->handle;
  328. return 0;
  329. }
  330. static int prio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
  331. struct gnet_dump *d)
  332. {
  333. struct prio_sched_data *q = qdisc_priv(sch);
  334. struct Qdisc *cl_q;
  335. cl_q = q->queues[cl - 1];
  336. if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
  337. d, cl_q->cpu_bstats, &cl_q->bstats) < 0 ||
  338. gnet_stats_copy_queue(d, NULL, &cl_q->qstats, cl_q->q.qlen) < 0)
  339. return -1;
  340. return 0;
  341. }
  342. static void prio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
  343. {
  344. struct prio_sched_data *q = qdisc_priv(sch);
  345. int prio;
  346. if (arg->stop)
  347. return;
  348. for (prio = 0; prio < q->bands; prio++) {
  349. if (arg->count < arg->skip) {
  350. arg->count++;
  351. continue;
  352. }
  353. if (arg->fn(sch, prio + 1, arg) < 0) {
  354. arg->stop = 1;
  355. break;
  356. }
  357. arg->count++;
  358. }
  359. }
  360. static struct tcf_block *prio_tcf_block(struct Qdisc *sch, unsigned long cl,
  361. struct netlink_ext_ack *extack)
  362. {
  363. struct prio_sched_data *q = qdisc_priv(sch);
  364. if (cl)
  365. return NULL;
  366. return q->block;
  367. }
  368. static const struct Qdisc_class_ops prio_class_ops = {
  369. .graft = prio_graft,
  370. .leaf = prio_leaf,
  371. .find = prio_find,
  372. .walk = prio_walk,
  373. .tcf_block = prio_tcf_block,
  374. .bind_tcf = prio_bind,
  375. .unbind_tcf = prio_unbind,
  376. .dump = prio_dump_class,
  377. .dump_stats = prio_dump_class_stats,
  378. };
  379. static struct Qdisc_ops prio_qdisc_ops __read_mostly = {
  380. .next = NULL,
  381. .cl_ops = &prio_class_ops,
  382. .id = "prio",
  383. .priv_size = sizeof(struct prio_sched_data),
  384. .enqueue = prio_enqueue,
  385. .dequeue = prio_dequeue,
  386. .peek = prio_peek,
  387. .init = prio_init,
  388. .reset = prio_reset,
  389. .destroy = prio_destroy,
  390. .change = prio_tune,
  391. .dump = prio_dump,
  392. .owner = THIS_MODULE,
  393. };
  394. static int __init prio_module_init(void)
  395. {
  396. return register_qdisc(&prio_qdisc_ops);
  397. }
  398. static void __exit prio_module_exit(void)
  399. {
  400. unregister_qdisc(&prio_qdisc_ops);
  401. }
  402. module_init(prio_module_init)
  403. module_exit(prio_module_exit)
  404. MODULE_LICENSE("GPL");