hfsc.c 40 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635
  1. /* $OpenBSD: hfsc.c,v 1.21 2015/04/18 11:12:33 dlg Exp $ */
  2. /*
  3. * Copyright (c) 2012-2013 Henning Brauer <henning@openbsd.org>
  4. * Copyright (c) 1997-1999 Carnegie Mellon University. All Rights Reserved.
  5. *
  6. * Permission to use, copy, modify, and distribute this software and
  7. * its documentation is hereby granted (including for commercial or
  8. * for-profit use), provided that both the copyright notice and this
  9. * permission notice appear in all copies of the software, derivative
  10. * works, or modified versions, and any portions thereof.
  11. *
  12. * THIS SOFTWARE IS EXPERIMENTAL AND IS KNOWN TO HAVE BUGS, SOME OF
  13. * WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON PROVIDES THIS
  14. * SOFTWARE IN ITS ``AS IS'' CONDITION, AND ANY EXPRESS OR IMPLIED
  15. * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  16. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  17. * DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY BE LIABLE
  18. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  19. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
  20. * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  21. * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  22. * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  23. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
  24. * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
  25. * DAMAGE.
  26. *
  27. * Carnegie Mellon encourages (but does not require) users of this
  28. * software to return any improvements or extensions that they make,
  29. * and to grant Carnegie Mellon the rights to redistribute these
  30. * changes without encumbrance.
  31. */
  32. /*
  33. * H-FSC is described in Proceedings of SIGCOMM'97,
  34. * "A Hierarchical Fair Service Curve Algorithm for Link-Sharing,
  35. * Real-Time and Priority Service"
  36. * by Ion Stoica, Hui Zhang, and T. S. Eugene Ng.
  37. *
  38. * Oleg Cherevko <olwi@aq.ml.com.ua> added the upperlimit for link-sharing.
  39. * when a class has an upperlimit, the fit-time is computed from the
  40. * upperlimit service curve. the link-sharing scheduler does not schedule
  41. * a class whose fit-time exceeds the current time.
  42. */
  43. #include <sys/param.h>
  44. #include <sys/malloc.h>
  45. #include <sys/pool.h>
  46. #include <sys/mbuf.h>
  47. #include <sys/socket.h>
  48. #include <sys/systm.h>
  49. #include <sys/errno.h>
  50. #include <sys/queue.h>
  51. #include <sys/kernel.h>
  52. #include <sys/timeout.h>
  53. #include <net/if.h>
  54. #include <net/if_var.h>
  55. #include <netinet/in.h>
  56. #include <net/pfvar.h>
  57. #include <net/hfsc.h>
  58. /* need to provide dummies for hfsc-less kernels to reduce the if.h horror */
  59. #include "pf.h"
  60. #if NPF > 0
  61. /*
  62. * kernel internal service curve representation
  63. * coordinates are given by 64 bit unsigned integers.
  64. * x-axis: unit is clock count. for the intel x86 architecture,
  65. * the raw Pentium TSC (Timestamp Counter) value is used.
  66. * virtual time is also calculated in this time scale.
  67. * y-axis: unit is byte.
  68. *
  69. * the service curve parameters are converted to the internal
  70. * representation.
  71. * the slope values are scaled to avoid overflow.
  72. * the inverse slope values as well as the y-projection of the 1st
  73. * segment are kept in order to to avoid 64-bit divide operations
  74. * that are expensive on 32-bit architectures.
  75. *
  76. * note: Intel Pentium TSC never wraps around in several thousands of years.
  77. * x-axis doesn't wrap around for 1089 years with 1GHz clock.
  78. * y-axis doesn't wrap around for 4358 years with 1Gbps bandwidth.
  79. */
  80. /* kernel internal representation of a service curve */
  81. struct hfsc_internal_sc {
  82. u_int64_t sm1; /* scaled slope of the 1st segment */
  83. u_int64_t ism1; /* scaled inverse-slope of the 1st segment */
  84. u_int64_t dx; /* the x-projection of the 1st segment */
  85. u_int64_t dy; /* the y-projection of the 1st segment */
  86. u_int64_t sm2; /* scaled slope of the 2nd segment */
  87. u_int64_t ism2; /* scaled inverse-slope of the 2nd segment */
  88. };
  89. /* runtime service curve */
  90. struct hfsc_runtime_sc {
  91. u_int64_t x; /* current starting position on x-axis */
  92. u_int64_t y; /* current starting position on x-axis */
  93. u_int64_t sm1; /* scaled slope of the 1st segment */
  94. u_int64_t ism1; /* scaled inverse-slope of the 1st segment */
  95. u_int64_t dx; /* the x-projection of the 1st segment */
  96. u_int64_t dy; /* the y-projection of the 1st segment */
  97. u_int64_t sm2; /* scaled slope of the 2nd segment */
  98. u_int64_t ism2; /* scaled inverse-slope of the 2nd segment */
  99. };
  100. struct hfsc_classq {
  101. struct mbuf_list q; /* Queue of packets */
  102. int qlimit; /* Queue limit */
  103. };
  104. /* for TAILQ based ellist and actlist implementation */
  105. struct hfsc_class;
  106. typedef TAILQ_HEAD(hfsc_eligible, hfsc_class) hfsc_ellist_t;
  107. typedef TAILQ_ENTRY(hfsc_class) hfsc_elentry_t;
  108. typedef TAILQ_HEAD(hfsc_active, hfsc_class) hfsc_actlist_t;
  109. typedef TAILQ_ENTRY(hfsc_class) hfsc_actentry_t;
  110. #define hfsc_ellist_first(s) TAILQ_FIRST(s)
  111. #define hfsc_actlist_first(s) TAILQ_FIRST(s)
  112. #define hfsc_actlist_last(s) TAILQ_LAST(s, hfsc_active)
  113. struct hfsc_class {
  114. u_int cl_id; /* class id (just for debug) */
  115. u_int32_t cl_handle; /* class handle */
  116. struct hfsc_if *cl_hif; /* back pointer to struct hfsc_if */
  117. int cl_flags; /* misc flags */
  118. struct hfsc_class *cl_parent; /* parent class */
  119. struct hfsc_class *cl_siblings; /* sibling classes */
  120. struct hfsc_class *cl_children; /* child classes */
  121. struct hfsc_classq cl_q; /* class queue structure */
  122. /* struct red *cl_red;*/ /* RED state */
  123. struct altq_pktattr *cl_pktattr; /* saved header used by ECN */
  124. u_int64_t cl_total; /* total work in bytes */
  125. u_int64_t cl_cumul; /* cumulative work in bytes
  126. done by real-time criteria */
  127. u_int64_t cl_d; /* deadline */
  128. u_int64_t cl_e; /* eligible time */
  129. u_int64_t cl_vt; /* virtual time */
  130. u_int64_t cl_f; /* time when this class will fit for
  131. link-sharing, max(myf, cfmin) */
  132. u_int64_t cl_myf; /* my fit-time (as calculated from this
  133. class's own upperlimit curve) */
  134. u_int64_t cl_myfadj; /* my fit-time adjustment
  135. (to cancel history dependence) */
  136. u_int64_t cl_cfmin; /* earliest children's fit-time (used
  137. with cl_myf to obtain cl_f) */
  138. u_int64_t cl_cvtmin; /* minimal virtual time among the
  139. children fit for link-sharing
  140. (monotonic within a period) */
  141. u_int64_t cl_vtadj; /* intra-period cumulative vt
  142. adjustment */
  143. u_int64_t cl_vtoff; /* inter-period cumulative vt offset */
  144. u_int64_t cl_cvtmax; /* max child's vt in the last period */
  145. u_int64_t cl_initvt; /* init virtual time (for debugging) */
  146. struct hfsc_internal_sc *cl_rsc; /* internal real-time service curve */
  147. struct hfsc_internal_sc *cl_fsc; /* internal fair service curve */
  148. struct hfsc_internal_sc *cl_usc; /* internal upperlimit service curve */
  149. struct hfsc_runtime_sc cl_deadline; /* deadline curve */
  150. struct hfsc_runtime_sc cl_eligible; /* eligible curve */
  151. struct hfsc_runtime_sc cl_virtual; /* virtual curve */
  152. struct hfsc_runtime_sc cl_ulimit; /* upperlimit curve */
  153. u_int cl_vtperiod; /* vt period sequence no */
  154. u_int cl_parentperiod; /* parent's vt period seqno */
  155. int cl_nactive; /* number of active children */
  156. hfsc_actlist_t *cl_actc; /* active children list */
  157. hfsc_actentry_t cl_actlist; /* active children list entry */
  158. hfsc_elentry_t cl_ellist; /* eligible list entry */
  159. struct {
  160. struct hfsc_pktcntr xmit_cnt;
  161. struct hfsc_pktcntr drop_cnt;
  162. u_int period;
  163. } cl_stats;
  164. };
  165. /*
  166. * hfsc interface state
  167. */
  168. struct hfsc_if {
  169. struct hfsc_if *hif_next; /* interface state list */
  170. struct ifqueue *hif_ifq; /* backpointer to ifq */
  171. struct hfsc_class *hif_rootclass; /* root class */
  172. struct hfsc_class *hif_defaultclass; /* default class */
  173. struct hfsc_class **hif_class_tbl;
  174. struct hfsc_class *hif_pollcache; /* cache for poll operation */
  175. u_int hif_allocated; /* # of slots in hif_class_tbl */
  176. u_int hif_classes; /* # of classes in the tree */
  177. u_int hif_packets; /* # of packets in the tree */
  178. u_int hif_classid; /* class id sequence number */
  179. hfsc_ellist_t *hif_eligible; /* eligible list */
  180. struct timeout hif_defer; /* for queues that weren't ready */
  181. };
  182. /*
  183. * function prototypes
  184. */
  185. struct hfsc_class *hfsc_class_create(struct hfsc_if *,
  186. struct hfsc_sc *, struct hfsc_sc *,
  187. struct hfsc_sc *, struct hfsc_class *, int,
  188. int, int);
  189. int hfsc_class_destroy(struct hfsc_class *);
  190. struct hfsc_class *hfsc_nextclass(struct hfsc_class *);
  191. int hfsc_addq(struct hfsc_class *, struct mbuf *);
  192. struct mbuf *hfsc_getq(struct hfsc_class *);
  193. struct mbuf *hfsc_pollq(struct hfsc_class *);
  194. void hfsc_purgeq(struct hfsc_class *);
  195. void hfsc_deferred(void *);
  196. void hfsc_update_cfmin(struct hfsc_class *);
  197. void hfsc_set_active(struct hfsc_class *, int);
  198. void hfsc_set_passive(struct hfsc_class *);
  199. void hfsc_init_ed(struct hfsc_class *, int);
  200. void hfsc_update_ed(struct hfsc_class *, int);
  201. void hfsc_update_d(struct hfsc_class *, int);
  202. void hfsc_init_vf(struct hfsc_class *, int);
  203. void hfsc_update_vf(struct hfsc_class *, int, u_int64_t);
  204. hfsc_ellist_t *hfsc_ellist_alloc(void);
  205. void hfsc_ellist_destroy(hfsc_ellist_t *);
  206. void hfsc_ellist_insert(struct hfsc_class *);
  207. void hfsc_ellist_remove(struct hfsc_class *);
  208. void hfsc_ellist_update(struct hfsc_class *);
  209. struct hfsc_class *hfsc_ellist_get_mindl(hfsc_ellist_t *, u_int64_t);
  210. hfsc_actlist_t *hfsc_actlist_alloc(void);
  211. void hfsc_actlist_destroy(hfsc_actlist_t *);
  212. void hfsc_actlist_insert(struct hfsc_class *);
  213. void hfsc_actlist_remove(struct hfsc_class *);
  214. void hfsc_actlist_update(struct hfsc_class *);
  215. struct hfsc_class *hfsc_actlist_firstfit(struct hfsc_class *,
  216. u_int64_t);
  217. static __inline u_int64_t seg_x2y(u_int64_t, u_int64_t);
  218. static __inline u_int64_t seg_y2x(u_int64_t, u_int64_t);
  219. static __inline u_int64_t m2sm(u_int);
  220. static __inline u_int64_t m2ism(u_int);
  221. static __inline u_int64_t d2dx(u_int);
  222. static __inline u_int sm2m(u_int64_t);
  223. static __inline u_int dx2d(u_int64_t);
  224. void hfsc_sc2isc(struct hfsc_sc *, struct hfsc_internal_sc *);
  225. void hfsc_rtsc_init(struct hfsc_runtime_sc *,
  226. struct hfsc_internal_sc *, u_int64_t, u_int64_t);
  227. u_int64_t hfsc_rtsc_y2x(struct hfsc_runtime_sc *, u_int64_t);
  228. u_int64_t hfsc_rtsc_x2y(struct hfsc_runtime_sc *, u_int64_t);
  229. void hfsc_rtsc_min(struct hfsc_runtime_sc *,
  230. struct hfsc_internal_sc *, u_int64_t, u_int64_t);
  231. void hfsc_getclstats(struct hfsc_class_stats *, struct hfsc_class *);
  232. struct hfsc_class *hfsc_clh2cph(struct hfsc_if *, u_int32_t);
  233. #define HFSC_CLK_SHIFT 8
  234. #define HFSC_FREQ (1000000 << HFSC_CLK_SHIFT)
  235. #define HFSC_CLK_PER_TICK (HFSC_FREQ / hz)
  236. #define HFSC_HT_INFINITY 0xffffffffffffffffLL /* infinite time value */
  237. struct pool hfsc_class_pl, hfsc_internal_sc_pl;
  238. u_int64_t
  239. hfsc_microuptime(void)
  240. {
  241. struct timeval tv;
  242. microuptime(&tv);
  243. return (((u_int64_t)(tv.tv_sec) * 1000000 + tv.tv_usec) <<
  244. HFSC_CLK_SHIFT);
  245. }
  246. static inline u_int
  247. hfsc_more_slots(u_int current)
  248. {
  249. u_int want = current * 2;
  250. return (want > HFSC_MAX_CLASSES ? HFSC_MAX_CLASSES : want);
  251. }
  252. static void
  253. hfsc_grow_class_tbl(struct hfsc_if *hif, u_int howmany)
  254. {
  255. struct hfsc_class **newtbl, **old;
  256. newtbl = mallocarray(howmany, sizeof(void *), M_DEVBUF,
  257. M_WAITOK | M_ZERO);
  258. old = hif->hif_class_tbl;
  259. memcpy(newtbl, old, hif->hif_allocated * sizeof(void *));
  260. hif->hif_class_tbl = newtbl;
  261. hif->hif_allocated = howmany;
  262. free(old, M_DEVBUF, 0);
  263. }
  264. void
  265. hfsc_initialize(void)
  266. {
  267. pool_init(&hfsc_class_pl, sizeof(struct hfsc_class), 0, 0, PR_WAITOK,
  268. "hfscclass", NULL);
  269. pool_init(&hfsc_internal_sc_pl, sizeof(struct hfsc_internal_sc), 0, 0,
  270. PR_WAITOK, "hfscintsc", NULL);
  271. }
  272. int
  273. hfsc_attach(struct ifnet *ifp)
  274. {
  275. struct hfsc_if *hif;
  276. size_t tblsize;
  277. tblsize = HFSC_DEFAULT_CLASSES * sizeof(void *);
  278. if (ifp == NULL || ifp->if_snd.ifq_hfsc != NULL)
  279. return (0);
  280. hif = malloc(sizeof(struct hfsc_if), M_DEVBUF, M_WAITOK | M_ZERO);
  281. hif->hif_eligible = hfsc_ellist_alloc();
  282. hif->hif_class_tbl = malloc(tblsize, M_DEVBUF, M_WAITOK | M_ZERO);
  283. hif->hif_allocated = HFSC_DEFAULT_CLASSES;
  284. hif->hif_ifq = &ifp->if_snd;
  285. ifp->if_snd.ifq_hfsc = hif;
  286. timeout_set(&hif->hif_defer, hfsc_deferred, ifp);
  287. /* XXX HRTIMER don't schedule it yet, only when some packets wait. */
  288. timeout_add(&hif->hif_defer, 1);
  289. return (0);
  290. }
  291. int
  292. hfsc_detach(struct ifnet *ifp)
  293. {
  294. struct hfsc_if *hif;
  295. if (ifp == NULL)
  296. return (0);
  297. hif = ifp->if_snd.ifq_hfsc;
  298. timeout_del(&hif->hif_defer);
  299. ifp->if_snd.ifq_hfsc = NULL;
  300. hfsc_ellist_destroy(hif->hif_eligible);
  301. free(hif->hif_class_tbl, M_DEVBUF, 0);
  302. free(hif, M_DEVBUF, 0);
  303. return (0);
  304. }
  305. int
  306. hfsc_addqueue(struct pf_queuespec *q)
  307. {
  308. struct hfsc_if *hif;
  309. struct hfsc_class *cl, *parent;
  310. struct hfsc_sc rtsc, lssc, ulsc;
  311. if (q->kif->pfik_ifp == NULL)
  312. return (0);
  313. if ((hif = q->kif->pfik_ifp->if_snd.ifq_hfsc) == NULL)
  314. return (EINVAL);
  315. if (q->parent_qid == HFSC_NULLCLASS_HANDLE &&
  316. hif->hif_rootclass == NULL)
  317. parent = NULL;
  318. else if ((parent = hfsc_clh2cph(hif, q->parent_qid)) == NULL)
  319. return (EINVAL);
  320. if (q->qid == 0)
  321. return (EINVAL);
  322. if (hfsc_clh2cph(hif, q->qid) != NULL)
  323. return (EBUSY);
  324. rtsc.m1 = q->realtime.m1.absolute;
  325. rtsc.d = q->realtime.d;
  326. rtsc.m2 = q->realtime.m2.absolute;
  327. lssc.m1 = q->linkshare.m1.absolute;
  328. lssc.d = q->linkshare.d;
  329. lssc.m2 = q->linkshare.m2.absolute;
  330. ulsc.m1 = q->upperlimit.m1.absolute;
  331. ulsc.d = q->upperlimit.d;
  332. ulsc.m2 = q->upperlimit.m2.absolute;
  333. cl = hfsc_class_create(hif, &rtsc, &lssc, &ulsc,
  334. parent, q->qlimit, q->flags, q->qid);
  335. if (cl == NULL)
  336. return (ENOMEM);
  337. return (0);
  338. }
  339. int
  340. hfsc_delqueue(struct pf_queuespec *q)
  341. {
  342. struct hfsc_if *hif;
  343. struct hfsc_class *cl;
  344. if (q->kif->pfik_ifp == NULL)
  345. return (0);
  346. if ((hif = q->kif->pfik_ifp->if_snd.ifq_hfsc) == NULL)
  347. return (EINVAL);
  348. if ((cl = hfsc_clh2cph(hif, q->qid)) == NULL)
  349. return (EINVAL);
  350. return (hfsc_class_destroy(cl));
  351. }
  352. int
  353. hfsc_qstats(struct pf_queuespec *q, void *ubuf, int *nbytes)
  354. {
  355. struct hfsc_if *hif;
  356. struct hfsc_class *cl;
  357. struct hfsc_class_stats stats;
  358. int error = 0;
  359. if (q->kif->pfik_ifp == NULL)
  360. return (EBADF);
  361. if ((hif = q->kif->pfik_ifp->if_snd.ifq_hfsc) == NULL)
  362. return (EBADF);
  363. if ((cl = hfsc_clh2cph(hif, q->qid)) == NULL)
  364. return (EINVAL);
  365. if (*nbytes < sizeof(stats))
  366. return (EINVAL);
  367. hfsc_getclstats(&stats, cl);
  368. if ((error = copyout((caddr_t)&stats, ubuf, sizeof(stats))) != 0)
  369. return (error);
  370. *nbytes = sizeof(stats);
  371. return (0);
  372. }
  373. void
  374. hfsc_purge(struct ifqueue *ifq)
  375. {
  376. struct hfsc_if *hif = ifq->ifq_hfsc;
  377. struct hfsc_class *cl;
  378. for (cl = hif->hif_rootclass; cl != NULL; cl = hfsc_nextclass(cl))
  379. if (ml_len(&cl->cl_q.q) > 0)
  380. hfsc_purgeq(cl);
  381. hif->hif_ifq->ifq_len = 0;
  382. }
  383. struct hfsc_class *
  384. hfsc_class_create(struct hfsc_if *hif, struct hfsc_sc *rsc,
  385. struct hfsc_sc *fsc, struct hfsc_sc *usc, struct hfsc_class *parent,
  386. int qlimit, int flags, int qid)
  387. {
  388. struct hfsc_class *cl, *p;
  389. int i, s;
  390. if (qlimit == 0)
  391. qlimit = HFSC_DEFAULT_QLIMIT;
  392. if (hif->hif_classes >= hif->hif_allocated) {
  393. u_int newslots = hfsc_more_slots(hif->hif_allocated);
  394. if (newslots == hif->hif_allocated)
  395. return (NULL);
  396. hfsc_grow_class_tbl(hif, newslots);
  397. }
  398. cl = pool_get(&hfsc_class_pl, PR_WAITOK | PR_ZERO);
  399. cl->cl_actc = hfsc_actlist_alloc();
  400. ml_init(&cl->cl_q.q);
  401. cl->cl_q.qlimit = qlimit;
  402. cl->cl_flags = flags;
  403. if (rsc != NULL && (rsc->m1 != 0 || rsc->m2 != 0)) {
  404. cl->cl_rsc = pool_get(&hfsc_internal_sc_pl, PR_WAITOK);
  405. hfsc_sc2isc(rsc, cl->cl_rsc);
  406. hfsc_rtsc_init(&cl->cl_deadline, cl->cl_rsc, 0, 0);
  407. hfsc_rtsc_init(&cl->cl_eligible, cl->cl_rsc, 0, 0);
  408. }
  409. if (fsc != NULL && (fsc->m1 != 0 || fsc->m2 != 0)) {
  410. cl->cl_fsc = pool_get(&hfsc_internal_sc_pl, PR_WAITOK);
  411. hfsc_sc2isc(fsc, cl->cl_fsc);
  412. hfsc_rtsc_init(&cl->cl_virtual, cl->cl_fsc, 0, 0);
  413. }
  414. if (usc != NULL && (usc->m1 != 0 || usc->m2 != 0)) {
  415. cl->cl_usc = pool_get(&hfsc_internal_sc_pl, PR_WAITOK);
  416. hfsc_sc2isc(usc, cl->cl_usc);
  417. hfsc_rtsc_init(&cl->cl_ulimit, cl->cl_usc, 0, 0);
  418. }
  419. cl->cl_id = hif->hif_classid++;
  420. cl->cl_handle = qid;
  421. cl->cl_hif = hif;
  422. cl->cl_parent = parent;
  423. s = splnet();
  424. hif->hif_classes++;
  425. /*
  426. * find a free slot in the class table. if the slot matching
  427. * the lower bits of qid is free, use this slot. otherwise,
  428. * use the first free slot.
  429. */
  430. i = qid % hif->hif_allocated;
  431. if (hif->hif_class_tbl[i] == NULL)
  432. hif->hif_class_tbl[i] = cl;
  433. else {
  434. for (i = 0; i < hif->hif_allocated; i++)
  435. if (hif->hif_class_tbl[i] == NULL) {
  436. hif->hif_class_tbl[i] = cl;
  437. break;
  438. }
  439. if (i == hif->hif_allocated) {
  440. splx(s);
  441. goto err_ret;
  442. }
  443. }
  444. if (flags & HFSC_DEFAULTCLASS)
  445. hif->hif_defaultclass = cl;
  446. if (parent == NULL)
  447. hif->hif_rootclass = cl;
  448. else {
  449. /* add this class to the children list of the parent */
  450. if ((p = parent->cl_children) == NULL)
  451. parent->cl_children = cl;
  452. else {
  453. while (p->cl_siblings != NULL)
  454. p = p->cl_siblings;
  455. p->cl_siblings = cl;
  456. }
  457. }
  458. splx(s);
  459. return (cl);
  460. err_ret:
  461. if (cl->cl_actc != NULL)
  462. hfsc_actlist_destroy(cl->cl_actc);
  463. if (cl->cl_fsc != NULL)
  464. pool_put(&hfsc_internal_sc_pl, cl->cl_fsc);
  465. if (cl->cl_rsc != NULL)
  466. pool_put(&hfsc_internal_sc_pl, cl->cl_rsc);
  467. if (cl->cl_usc != NULL)
  468. pool_put(&hfsc_internal_sc_pl, cl->cl_usc);
  469. pool_put(&hfsc_class_pl, cl);
  470. return (NULL);
  471. }
  472. int
  473. hfsc_class_destroy(struct hfsc_class *cl)
  474. {
  475. int i, s;
  476. if (cl == NULL)
  477. return (0);
  478. if (cl->cl_children != NULL)
  479. return (EBUSY);
  480. s = splnet();
  481. if (ml_len(&cl->cl_q.q) > 0)
  482. hfsc_purgeq(cl);
  483. if (cl->cl_parent != NULL) {
  484. struct hfsc_class *p = cl->cl_parent->cl_children;
  485. if (p == cl)
  486. cl->cl_parent->cl_children = cl->cl_siblings;
  487. else do {
  488. if (p->cl_siblings == cl) {
  489. p->cl_siblings = cl->cl_siblings;
  490. break;
  491. }
  492. } while ((p = p->cl_siblings) != NULL);
  493. }
  494. for (i = 0; i < cl->cl_hif->hif_allocated; i++)
  495. if (cl->cl_hif->hif_class_tbl[i] == cl) {
  496. cl->cl_hif->hif_class_tbl[i] = NULL;
  497. break;
  498. }
  499. cl->cl_hif->hif_classes--;
  500. splx(s);
  501. hfsc_actlist_destroy(cl->cl_actc);
  502. if (cl == cl->cl_hif->hif_rootclass)
  503. cl->cl_hif->hif_rootclass = NULL;
  504. if (cl == cl->cl_hif->hif_defaultclass)
  505. cl->cl_hif->hif_defaultclass = NULL;
  506. if (cl->cl_usc != NULL)
  507. pool_put(&hfsc_internal_sc_pl, cl->cl_usc);
  508. if (cl->cl_fsc != NULL)
  509. pool_put(&hfsc_internal_sc_pl, cl->cl_fsc);
  510. if (cl->cl_rsc != NULL)
  511. pool_put(&hfsc_internal_sc_pl, cl->cl_rsc);
  512. pool_put(&hfsc_class_pl, cl);
  513. return (0);
  514. }
  515. /*
  516. * hfsc_nextclass returns the next class in the tree.
  517. * usage:
  518. * for (cl = hif->hif_rootclass; cl != NULL; cl = hfsc_nextclass(cl))
  519. * do_something;
  520. */
  521. struct hfsc_class *
  522. hfsc_nextclass(struct hfsc_class *cl)
  523. {
  524. if (cl->cl_children != NULL)
  525. cl = cl->cl_children;
  526. else if (cl->cl_siblings != NULL)
  527. cl = cl->cl_siblings;
  528. else {
  529. while ((cl = cl->cl_parent) != NULL)
  530. if (cl->cl_siblings) {
  531. cl = cl->cl_siblings;
  532. break;
  533. }
  534. }
  535. return (cl);
  536. }
  537. int
  538. hfsc_enqueue(struct ifqueue *ifq, struct mbuf *m)
  539. {
  540. struct hfsc_if *hif = ifq->ifq_hfsc;
  541. struct hfsc_class *cl;
  542. if ((cl = hfsc_clh2cph(hif, m->m_pkthdr.pf.qid)) == NULL ||
  543. cl->cl_children != NULL) {
  544. cl = hif->hif_defaultclass;
  545. if (cl == NULL) {
  546. m_freem(m);
  547. return (ENOBUFS);
  548. }
  549. cl->cl_pktattr = NULL;
  550. }
  551. if (hfsc_addq(cl, m) != 0) {
  552. /* drop occurred. mbuf needs to be freed */
  553. PKTCNTR_INC(&cl->cl_stats.drop_cnt, m->m_pkthdr.len);
  554. m_freem(m);
  555. return (ENOBUFS);
  556. }
  557. ifq->ifq_len++;
  558. cl->cl_hif->hif_packets++;
  559. m->m_pkthdr.pf.prio = IFQ_MAXPRIO;
  560. /* successfully queued. */
  561. if (ml_len(&cl->cl_q.q) == 1)
  562. hfsc_set_active(cl, m->m_pkthdr.len);
  563. return (0);
  564. }
  565. struct mbuf *
  566. hfsc_dequeue(struct ifqueue *ifq, int remove)
  567. {
  568. struct hfsc_if *hif = ifq->ifq_hfsc;
  569. struct hfsc_class *cl, *tcl;
  570. struct mbuf *m;
  571. int next_len, realtime = 0;
  572. u_int64_t cur_time;
  573. if (hif->hif_packets == 0)
  574. return (NULL);
  575. cur_time = hfsc_microuptime();
  576. if (remove && hif->hif_pollcache != NULL) {
  577. cl = hif->hif_pollcache;
  578. hif->hif_pollcache = NULL;
  579. /* check if the class was scheduled by real-time criteria */
  580. if (cl->cl_rsc != NULL)
  581. realtime = (cl->cl_e <= cur_time);
  582. } else {
  583. /*
  584. * if there are eligible classes, use real-time criteria.
  585. * find the class with the minimum deadline among
  586. * the eligible classes.
  587. */
  588. if ((cl = hfsc_ellist_get_mindl(hif->hif_eligible, cur_time)) !=
  589. NULL) {
  590. realtime = 1;
  591. } else {
  592. /*
  593. * use link-sharing criteria
  594. * get the class with the minimum vt in the hierarchy
  595. */
  596. cl = NULL;
  597. tcl = hif->hif_rootclass;
  598. while (tcl != NULL && tcl->cl_children != NULL) {
  599. tcl = hfsc_actlist_firstfit(tcl, cur_time);
  600. if (tcl == NULL)
  601. continue;
  602. /*
  603. * update parent's cl_cvtmin.
  604. * don't update if the new vt is smaller.
  605. */
  606. if (tcl->cl_parent->cl_cvtmin < tcl->cl_vt)
  607. tcl->cl_parent->cl_cvtmin = tcl->cl_vt;
  608. cl = tcl;
  609. }
  610. /* XXX HRTIMER plan hfsc_deferred precisely here. */
  611. if (cl == NULL)
  612. return (NULL);
  613. }
  614. if (!remove) {
  615. hif->hif_pollcache = cl;
  616. m = hfsc_pollq(cl);
  617. return (m);
  618. }
  619. }
  620. if ((m = hfsc_getq(cl)) == NULL)
  621. panic("hfsc_dequeue");
  622. cl->cl_hif->hif_packets--;
  623. ifq->ifq_len--;
  624. PKTCNTR_INC(&cl->cl_stats.xmit_cnt, m->m_pkthdr.len);
  625. hfsc_update_vf(cl, m->m_pkthdr.len, cur_time);
  626. if (realtime)
  627. cl->cl_cumul += m->m_pkthdr.len;
  628. if (ml_len(&cl->cl_q.q) > 0) {
  629. if (cl->cl_rsc != NULL) {
  630. /* update ed */
  631. next_len = cl->cl_q.q.ml_head->m_pkthdr.len;
  632. if (realtime)
  633. hfsc_update_ed(cl, next_len);
  634. else
  635. hfsc_update_d(cl, next_len);
  636. }
  637. } else {
  638. /* the class becomes passive */
  639. hfsc_set_passive(cl);
  640. }
  641. return (m);
  642. }
  643. void
  644. hfsc_deferred(void *arg)
  645. {
  646. struct ifnet *ifp = arg;
  647. int s;
  648. s = splnet();
  649. if (HFSC_ENABLED(&ifp->if_snd) && !IFQ_IS_EMPTY(&ifp->if_snd))
  650. if_start(ifp);
  651. splx(s);
  652. /* XXX HRTIMER nearest virtual/fit time is likely less than 1/HZ. */
  653. timeout_add(&ifp->if_snd.ifq_hfsc->hif_defer, 1);
  654. }
  655. int
  656. hfsc_addq(struct hfsc_class *cl, struct mbuf *m)
  657. {
  658. if (ml_len(&cl->cl_q.q) >= cl->cl_q.qlimit)
  659. return (-1);
  660. ml_enqueue(&cl->cl_q.q, m);
  661. return (0);
  662. }
  663. struct mbuf *
  664. hfsc_getq(struct hfsc_class *cl)
  665. {
  666. return (ml_dequeue(&cl->cl_q.q));
  667. }
  668. struct mbuf *
  669. hfsc_pollq(struct hfsc_class *cl)
  670. {
  671. /* XXX */
  672. return (cl->cl_q.q.ml_head);
  673. }
  674. void
  675. hfsc_purgeq(struct hfsc_class *cl)
  676. {
  677. struct mbuf *m;
  678. if (ml_empty(&cl->cl_q.q))
  679. return;
  680. while ((m = hfsc_getq(cl)) != NULL) {
  681. PKTCNTR_INC(&cl->cl_stats.drop_cnt, m->m_pkthdr.len);
  682. m_freem(m);
  683. cl->cl_hif->hif_packets--;
  684. cl->cl_hif->hif_ifq->ifq_len--;
  685. }
  686. hfsc_update_vf(cl, 0, 0); /* remove cl from the actlist */
  687. hfsc_set_passive(cl);
  688. }
  689. void
  690. hfsc_set_active(struct hfsc_class *cl, int len)
  691. {
  692. if (cl->cl_rsc != NULL)
  693. hfsc_init_ed(cl, len);
  694. if (cl->cl_fsc != NULL)
  695. hfsc_init_vf(cl, len);
  696. cl->cl_stats.period++;
  697. }
  698. void
  699. hfsc_set_passive(struct hfsc_class *cl)
  700. {
  701. if (cl->cl_rsc != NULL)
  702. hfsc_ellist_remove(cl);
  703. /*
  704. * actlist is handled in hfsc_update_vf() so that hfsc_update_vf(cl, 0,
  705. * 0) needs to be called explicitly to remove a class from actlist
  706. */
  707. }
  708. void
  709. hfsc_init_ed(struct hfsc_class *cl, int next_len)
  710. {
  711. u_int64_t cur_time;
  712. cur_time = hfsc_microuptime();
  713. /* update the deadline curve */
  714. hfsc_rtsc_min(&cl->cl_deadline, cl->cl_rsc, cur_time, cl->cl_cumul);
  715. /*
  716. * update the eligible curve.
  717. * for concave, it is equal to the deadline curve.
  718. * for convex, it is a linear curve with slope m2.
  719. */
  720. cl->cl_eligible = cl->cl_deadline;
  721. if (cl->cl_rsc->sm1 <= cl->cl_rsc->sm2) {
  722. cl->cl_eligible.dx = 0;
  723. cl->cl_eligible.dy = 0;
  724. }
  725. /* compute e and d */
  726. cl->cl_e = hfsc_rtsc_y2x(&cl->cl_eligible, cl->cl_cumul);
  727. cl->cl_d = hfsc_rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
  728. hfsc_ellist_insert(cl);
  729. }
  730. void
  731. hfsc_update_ed(struct hfsc_class *cl, int next_len)
  732. {
  733. cl->cl_e = hfsc_rtsc_y2x(&cl->cl_eligible, cl->cl_cumul);
  734. cl->cl_d = hfsc_rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
  735. hfsc_ellist_update(cl);
  736. }
  737. void
  738. hfsc_update_d(struct hfsc_class *cl, int next_len)
  739. {
  740. cl->cl_d = hfsc_rtsc_y2x(&cl->cl_deadline, cl->cl_cumul + next_len);
  741. }
  742. void
  743. hfsc_init_vf(struct hfsc_class *cl, int len)
  744. {
  745. struct hfsc_class *max_cl, *p;
  746. u_int64_t vt, f, cur_time;
  747. int go_active;
  748. cur_time = 0;
  749. go_active = 1;
  750. for ( ; cl->cl_parent != NULL; cl = cl->cl_parent) {
  751. if (go_active && cl->cl_nactive++ == 0)
  752. go_active = 1;
  753. else
  754. go_active = 0;
  755. if (go_active) {
  756. max_cl = hfsc_actlist_last(cl->cl_parent->cl_actc);
  757. if (max_cl != NULL) {
  758. /*
  759. * set vt to the average of the min and max
  760. * classes. if the parent's period didn't
  761. * change, don't decrease vt of the class.
  762. */
  763. vt = max_cl->cl_vt;
  764. if (cl->cl_parent->cl_cvtmin != 0)
  765. vt = (cl->cl_parent->cl_cvtmin + vt)/2;
  766. if (cl->cl_parent->cl_vtperiod !=
  767. cl->cl_parentperiod || vt > cl->cl_vt)
  768. cl->cl_vt = vt;
  769. } else {
  770. /*
  771. * first child for a new parent backlog period.
  772. * add parent's cvtmax to vtoff of children
  773. * to make a new vt (vtoff + vt) larger than
  774. * the vt in the last period for all children.
  775. */
  776. vt = cl->cl_parent->cl_cvtmax;
  777. for (p = cl->cl_parent->cl_children; p != NULL;
  778. p = p->cl_siblings)
  779. p->cl_vtoff += vt;
  780. cl->cl_vt = 0;
  781. cl->cl_parent->cl_cvtmax = 0;
  782. cl->cl_parent->cl_cvtmin = 0;
  783. }
  784. cl->cl_initvt = cl->cl_vt;
  785. /* update the virtual curve */
  786. vt = cl->cl_vt + cl->cl_vtoff;
  787. hfsc_rtsc_min(&cl->cl_virtual, cl->cl_fsc, vt,
  788. cl->cl_total);
  789. if (cl->cl_virtual.x == vt) {
  790. cl->cl_virtual.x -= cl->cl_vtoff;
  791. cl->cl_vtoff = 0;
  792. }
  793. cl->cl_vtadj = 0;
  794. cl->cl_vtperiod++; /* increment vt period */
  795. cl->cl_parentperiod = cl->cl_parent->cl_vtperiod;
  796. if (cl->cl_parent->cl_nactive == 0)
  797. cl->cl_parentperiod++;
  798. cl->cl_f = 0;
  799. hfsc_actlist_insert(cl);
  800. if (cl->cl_usc != NULL) {
  801. /* class has upper limit curve */
  802. if (cur_time == 0)
  803. cur_time = hfsc_microuptime();
  804. /* update the ulimit curve */
  805. hfsc_rtsc_min(&cl->cl_ulimit, cl->cl_usc, cur_time,
  806. cl->cl_total);
  807. /* compute myf */
  808. cl->cl_myf = hfsc_rtsc_y2x(&cl->cl_ulimit,
  809. cl->cl_total);
  810. cl->cl_myfadj = 0;
  811. }
  812. }
  813. if (cl->cl_myf > cl->cl_cfmin)
  814. f = cl->cl_myf;
  815. else
  816. f = cl->cl_cfmin;
  817. if (f != cl->cl_f) {
  818. cl->cl_f = f;
  819. hfsc_update_cfmin(cl->cl_parent);
  820. }
  821. }
  822. }
  823. void
  824. hfsc_update_vf(struct hfsc_class *cl, int len, u_int64_t cur_time)
  825. {
  826. u_int64_t f, myf_bound, delta;
  827. int go_passive;
  828. go_passive = ml_empty(&cl->cl_q.q);
  829. for (; cl->cl_parent != NULL; cl = cl->cl_parent) {
  830. cl->cl_total += len;
  831. if (cl->cl_fsc == NULL || cl->cl_nactive == 0)
  832. continue;
  833. if (go_passive && --cl->cl_nactive == 0)
  834. go_passive = 1;
  835. else
  836. go_passive = 0;
  837. if (go_passive) {
  838. /* no more active child, going passive */
  839. /* update cvtmax of the parent class */
  840. if (cl->cl_vt > cl->cl_parent->cl_cvtmax)
  841. cl->cl_parent->cl_cvtmax = cl->cl_vt;
  842. /* remove this class from the vt list */
  843. hfsc_actlist_remove(cl);
  844. hfsc_update_cfmin(cl->cl_parent);
  845. continue;
  846. }
  847. /*
  848. * update vt and f
  849. */
  850. cl->cl_vt = hfsc_rtsc_y2x(&cl->cl_virtual, cl->cl_total)
  851. - cl->cl_vtoff + cl->cl_vtadj;
  852. /*
  853. * if vt of the class is smaller than cvtmin,
  854. * the class was skipped in the past due to non-fit.
  855. * if so, we need to adjust vtadj.
  856. */
  857. if (cl->cl_vt < cl->cl_parent->cl_cvtmin) {
  858. cl->cl_vtadj += cl->cl_parent->cl_cvtmin - cl->cl_vt;
  859. cl->cl_vt = cl->cl_parent->cl_cvtmin;
  860. }
  861. /* update the vt list */
  862. hfsc_actlist_update(cl);
  863. if (cl->cl_usc != NULL) {
  864. cl->cl_myf = cl->cl_myfadj +
  865. hfsc_rtsc_y2x(&cl->cl_ulimit, cl->cl_total);
  866. /*
  867. * if myf lags behind by more than one clock tick
  868. * from the current time, adjust myfadj to prevent
  869. * a rate-limited class from going greedy.
  870. * in a steady state under rate-limiting, myf
  871. * fluctuates within one clock tick.
  872. */
  873. myf_bound = cur_time - HFSC_CLK_PER_TICK;
  874. if (cl->cl_myf < myf_bound) {
  875. delta = cur_time - cl->cl_myf;
  876. cl->cl_myfadj += delta;
  877. cl->cl_myf += delta;
  878. }
  879. }
  880. /* cl_f is max(cl_myf, cl_cfmin) */
  881. if (cl->cl_myf > cl->cl_cfmin)
  882. f = cl->cl_myf;
  883. else
  884. f = cl->cl_cfmin;
  885. if (f != cl->cl_f) {
  886. cl->cl_f = f;
  887. hfsc_update_cfmin(cl->cl_parent);
  888. }
  889. }
  890. }
  891. void
  892. hfsc_update_cfmin(struct hfsc_class *cl)
  893. {
  894. struct hfsc_class *p;
  895. u_int64_t cfmin;
  896. if (TAILQ_EMPTY(cl->cl_actc)) {
  897. cl->cl_cfmin = 0;
  898. return;
  899. }
  900. cfmin = HFSC_HT_INFINITY;
  901. TAILQ_FOREACH(p, cl->cl_actc, cl_actlist) {
  902. if (p->cl_f == 0) {
  903. cl->cl_cfmin = 0;
  904. return;
  905. }
  906. if (p->cl_f < cfmin)
  907. cfmin = p->cl_f;
  908. }
  909. cl->cl_cfmin = cfmin;
  910. }
  911. /*
  912. * eligible list holds backlogged classes being sorted by their eligible times.
  913. * there is one eligible list per interface.
  914. */
  915. hfsc_ellist_t *
  916. hfsc_ellist_alloc(void)
  917. {
  918. hfsc_ellist_t *head;
  919. head = malloc(sizeof(hfsc_ellist_t), M_DEVBUF, M_WAITOK);
  920. TAILQ_INIT(head);
  921. return (head);
  922. }
  923. void
  924. hfsc_ellist_destroy(hfsc_ellist_t *head)
  925. {
  926. free(head, M_DEVBUF, 0);
  927. }
  928. void
  929. hfsc_ellist_insert(struct hfsc_class *cl)
  930. {
  931. struct hfsc_if *hif = cl->cl_hif;
  932. struct hfsc_class *p;
  933. /* check the last entry first */
  934. if ((p = TAILQ_LAST(hif->hif_eligible, hfsc_eligible)) == NULL ||
  935. p->cl_e <= cl->cl_e) {
  936. TAILQ_INSERT_TAIL(hif->hif_eligible, cl, cl_ellist);
  937. return;
  938. }
  939. TAILQ_FOREACH(p, hif->hif_eligible, cl_ellist) {
  940. if (cl->cl_e < p->cl_e) {
  941. TAILQ_INSERT_BEFORE(p, cl, cl_ellist);
  942. return;
  943. }
  944. }
  945. }
  946. void
  947. hfsc_ellist_remove(struct hfsc_class *cl)
  948. {
  949. struct hfsc_if *hif = cl->cl_hif;
  950. TAILQ_REMOVE(hif->hif_eligible, cl, cl_ellist);
  951. }
  952. void
  953. hfsc_ellist_update(struct hfsc_class *cl)
  954. {
  955. struct hfsc_if *hif = cl->cl_hif;
  956. struct hfsc_class *p, *last;
  957. /*
  958. * the eligible time of a class increases monotonically.
  959. * if the next entry has a larger eligible time, nothing to do.
  960. */
  961. p = TAILQ_NEXT(cl, cl_ellist);
  962. if (p == NULL || cl->cl_e <= p->cl_e)
  963. return;
  964. /* check the last entry */
  965. last = TAILQ_LAST(hif->hif_eligible, hfsc_eligible);
  966. if (last->cl_e <= cl->cl_e) {
  967. TAILQ_REMOVE(hif->hif_eligible, cl, cl_ellist);
  968. TAILQ_INSERT_TAIL(hif->hif_eligible, cl, cl_ellist);
  969. return;
  970. }
  971. /*
  972. * the new position must be between the next entry
  973. * and the last entry
  974. */
  975. while ((p = TAILQ_NEXT(p, cl_ellist)) != NULL) {
  976. if (cl->cl_e < p->cl_e) {
  977. TAILQ_REMOVE(hif->hif_eligible, cl, cl_ellist);
  978. TAILQ_INSERT_BEFORE(p, cl, cl_ellist);
  979. return;
  980. }
  981. }
  982. }
  983. /* find the class with the minimum deadline among the eligible classes */
  984. struct hfsc_class *
  985. hfsc_ellist_get_mindl(hfsc_ellist_t *head, u_int64_t cur_time)
  986. {
  987. struct hfsc_class *p, *cl = NULL;
  988. TAILQ_FOREACH(p, head, cl_ellist) {
  989. if (p->cl_e > cur_time)
  990. break;
  991. if (cl == NULL || p->cl_d < cl->cl_d)
  992. cl = p;
  993. }
  994. return (cl);
  995. }
  996. /*
  997. * active children list holds backlogged child classes being sorted
  998. * by their virtual time.
  999. * each intermediate class has one active children list.
  1000. */
  1001. hfsc_actlist_t *
  1002. hfsc_actlist_alloc(void)
  1003. {
  1004. hfsc_actlist_t *head;
  1005. head = malloc(sizeof(hfsc_actlist_t), M_DEVBUF, M_WAITOK);
  1006. TAILQ_INIT(head);
  1007. return (head);
  1008. }
  1009. void
  1010. hfsc_actlist_destroy(hfsc_actlist_t *head)
  1011. {
  1012. free(head, M_DEVBUF, 0);
  1013. }
  1014. void
  1015. hfsc_actlist_insert(struct hfsc_class *cl)
  1016. {
  1017. struct hfsc_class *p;
  1018. /* check the last entry first */
  1019. if ((p = TAILQ_LAST(cl->cl_parent->cl_actc, hfsc_active)) == NULL
  1020. || p->cl_vt <= cl->cl_vt) {
  1021. TAILQ_INSERT_TAIL(cl->cl_parent->cl_actc, cl, cl_actlist);
  1022. return;
  1023. }
  1024. TAILQ_FOREACH(p, cl->cl_parent->cl_actc, cl_actlist) {
  1025. if (cl->cl_vt < p->cl_vt) {
  1026. TAILQ_INSERT_BEFORE(p, cl, cl_actlist);
  1027. return;
  1028. }
  1029. }
  1030. }
  1031. void
  1032. hfsc_actlist_remove(struct hfsc_class *cl)
  1033. {
  1034. TAILQ_REMOVE(cl->cl_parent->cl_actc, cl, cl_actlist);
  1035. }
  1036. void
  1037. hfsc_actlist_update(struct hfsc_class *cl)
  1038. {
  1039. struct hfsc_class *p, *last;
  1040. /*
  1041. * the virtual time of a class increases monotonically during its
  1042. * backlogged period.
  1043. * if the next entry has a larger virtual time, nothing to do.
  1044. */
  1045. p = TAILQ_NEXT(cl, cl_actlist);
  1046. if (p == NULL || cl->cl_vt < p->cl_vt)
  1047. return;
  1048. /* check the last entry */
  1049. last = TAILQ_LAST(cl->cl_parent->cl_actc, hfsc_active);
  1050. if (last->cl_vt <= cl->cl_vt) {
  1051. TAILQ_REMOVE(cl->cl_parent->cl_actc, cl, cl_actlist);
  1052. TAILQ_INSERT_TAIL(cl->cl_parent->cl_actc, cl, cl_actlist);
  1053. return;
  1054. }
  1055. /*
  1056. * the new position must be between the next entry
  1057. * and the last entry
  1058. */
  1059. while ((p = TAILQ_NEXT(p, cl_actlist)) != NULL) {
  1060. if (cl->cl_vt < p->cl_vt) {
  1061. TAILQ_REMOVE(cl->cl_parent->cl_actc, cl, cl_actlist);
  1062. TAILQ_INSERT_BEFORE(p, cl, cl_actlist);
  1063. return;
  1064. }
  1065. }
  1066. }
  1067. struct hfsc_class *
  1068. hfsc_actlist_firstfit(struct hfsc_class *cl, u_int64_t cur_time)
  1069. {
  1070. struct hfsc_class *p;
  1071. TAILQ_FOREACH(p, cl->cl_actc, cl_actlist)
  1072. if (p->cl_f <= cur_time)
  1073. return (p);
  1074. return (NULL);
  1075. }
  1076. /*
  1077. * service curve support functions
  1078. *
  1079. * external service curve parameters
  1080. * m: bits/sec
  1081. * d: msec
  1082. * internal service curve parameters
  1083. * sm: (bytes/tsc_interval) << SM_SHIFT
  1084. * ism: (tsc_count/byte) << ISM_SHIFT
  1085. * dx: tsc_count
  1086. *
  1087. * SM_SHIFT and ISM_SHIFT are scaled in order to keep effective digits.
  1088. * we should be able to handle 100K-1Gbps linkspeed with 200Hz-1GHz CPU
  1089. * speed. SM_SHIFT and ISM_SHIFT are selected to have at least 3 effective
  1090. * digits in decimal using the following table.
  1091. *
  1092. * bits/sec 100Kbps 1Mbps 10Mbps 100Mbps 1Gbps
  1093. * ----------+-------------------------------------------------------
  1094. * bytes/nsec 12.5e-6 125e-6 1250e-6 12500e-6 125000e-6
  1095. * sm(500MHz) 25.0e-6 250e-6 2500e-6 25000e-6 250000e-6
  1096. * sm(200MHz) 62.5e-6 625e-6 6250e-6 62500e-6 625000e-6
  1097. *
  1098. * nsec/byte 80000 8000 800 80 8
  1099. * ism(500MHz) 40000 4000 400 40 4
  1100. * ism(200MHz) 16000 1600 160 16 1.6
  1101. */
  1102. #define SM_SHIFT 24
  1103. #define ISM_SHIFT 10
  1104. #define SM_MASK ((1LL << SM_SHIFT) - 1)
  1105. #define ISM_MASK ((1LL << ISM_SHIFT) - 1)
  1106. static __inline u_int64_t
  1107. seg_x2y(u_int64_t x, u_int64_t sm)
  1108. {
  1109. u_int64_t y;
  1110. /*
  1111. * compute
  1112. * y = x * sm >> SM_SHIFT
  1113. * but divide it for the upper and lower bits to avoid overflow
  1114. */
  1115. y = (x >> SM_SHIFT) * sm + (((x & SM_MASK) * sm) >> SM_SHIFT);
  1116. return (y);
  1117. }
  1118. static __inline u_int64_t
  1119. seg_y2x(u_int64_t y, u_int64_t ism)
  1120. {
  1121. u_int64_t x;
  1122. if (y == 0)
  1123. x = 0;
  1124. else if (ism == HFSC_HT_INFINITY)
  1125. x = HFSC_HT_INFINITY;
  1126. else {
  1127. x = (y >> ISM_SHIFT) * ism
  1128. + (((y & ISM_MASK) * ism) >> ISM_SHIFT);
  1129. }
  1130. return (x);
  1131. }
  1132. static __inline u_int64_t
  1133. m2sm(u_int m)
  1134. {
  1135. u_int64_t sm;
  1136. sm = ((u_int64_t)m << SM_SHIFT) / 8 / HFSC_FREQ;
  1137. return (sm);
  1138. }
  1139. static __inline u_int64_t
  1140. m2ism(u_int m)
  1141. {
  1142. u_int64_t ism;
  1143. if (m == 0)
  1144. ism = HFSC_HT_INFINITY;
  1145. else
  1146. ism = ((u_int64_t)HFSC_FREQ << ISM_SHIFT) * 8 / m;
  1147. return (ism);
  1148. }
  1149. static __inline u_int64_t
  1150. d2dx(u_int d)
  1151. {
  1152. u_int64_t dx;
  1153. dx = ((u_int64_t)d * HFSC_FREQ) / 1000;
  1154. return (dx);
  1155. }
  1156. static __inline u_int
  1157. sm2m(u_int64_t sm)
  1158. {
  1159. u_int64_t m;
  1160. m = (sm * 8 * HFSC_FREQ) >> SM_SHIFT;
  1161. return ((u_int)m);
  1162. }
  1163. static __inline u_int
  1164. dx2d(u_int64_t dx)
  1165. {
  1166. u_int64_t d;
  1167. d = dx * 1000 / HFSC_FREQ;
  1168. return ((u_int)d);
  1169. }
  1170. void
  1171. hfsc_sc2isc(struct hfsc_sc *sc, struct hfsc_internal_sc *isc)
  1172. {
  1173. isc->sm1 = m2sm(sc->m1);
  1174. isc->ism1 = m2ism(sc->m1);
  1175. isc->dx = d2dx(sc->d);
  1176. isc->dy = seg_x2y(isc->dx, isc->sm1);
  1177. isc->sm2 = m2sm(sc->m2);
  1178. isc->ism2 = m2ism(sc->m2);
  1179. }
  1180. /*
  1181. * initialize the runtime service curve with the given internal
  1182. * service curve starting at (x, y).
  1183. */
  1184. void
  1185. hfsc_rtsc_init(struct hfsc_runtime_sc *rtsc, struct hfsc_internal_sc * isc,
  1186. u_int64_t x, u_int64_t y)
  1187. {
  1188. rtsc->x = x;
  1189. rtsc->y = y;
  1190. rtsc->sm1 = isc->sm1;
  1191. rtsc->ism1 = isc->ism1;
  1192. rtsc->dx = isc->dx;
  1193. rtsc->dy = isc->dy;
  1194. rtsc->sm2 = isc->sm2;
  1195. rtsc->ism2 = isc->ism2;
  1196. }
  1197. /*
  1198. * calculate the y-projection of the runtime service curve by the
  1199. * given x-projection value
  1200. */
  1201. u_int64_t
  1202. hfsc_rtsc_y2x(struct hfsc_runtime_sc *rtsc, u_int64_t y)
  1203. {
  1204. u_int64_t x;
  1205. if (y < rtsc->y)
  1206. x = rtsc->x;
  1207. else if (y <= rtsc->y + rtsc->dy) {
  1208. /* x belongs to the 1st segment */
  1209. if (rtsc->dy == 0)
  1210. x = rtsc->x + rtsc->dx;
  1211. else
  1212. x = rtsc->x + seg_y2x(y - rtsc->y, rtsc->ism1);
  1213. } else {
  1214. /* x belongs to the 2nd segment */
  1215. x = rtsc->x + rtsc->dx
  1216. + seg_y2x(y - rtsc->y - rtsc->dy, rtsc->ism2);
  1217. }
  1218. return (x);
  1219. }
  1220. u_int64_t
  1221. hfsc_rtsc_x2y(struct hfsc_runtime_sc *rtsc, u_int64_t x)
  1222. {
  1223. u_int64_t y;
  1224. if (x <= rtsc->x)
  1225. y = rtsc->y;
  1226. else if (x <= rtsc->x + rtsc->dx)
  1227. /* y belongs to the 1st segment */
  1228. y = rtsc->y + seg_x2y(x - rtsc->x, rtsc->sm1);
  1229. else
  1230. /* y belongs to the 2nd segment */
  1231. y = rtsc->y + rtsc->dy
  1232. + seg_x2y(x - rtsc->x - rtsc->dx, rtsc->sm2);
  1233. return (y);
  1234. }
  1235. /*
  1236. * update the runtime service curve by taking the minimum of the current
  1237. * runtime service curve and the service curve starting at (x, y).
  1238. */
  1239. void
  1240. hfsc_rtsc_min(struct hfsc_runtime_sc *rtsc, struct hfsc_internal_sc *isc,
  1241. u_int64_t x, u_int64_t y)
  1242. {
  1243. u_int64_t y1, y2, dx, dy;
  1244. if (isc->sm1 <= isc->sm2) {
  1245. /* service curve is convex */
  1246. y1 = hfsc_rtsc_x2y(rtsc, x);
  1247. if (y1 < y)
  1248. /* the current rtsc is smaller */
  1249. return;
  1250. rtsc->x = x;
  1251. rtsc->y = y;
  1252. return;
  1253. }
  1254. /*
  1255. * service curve is concave
  1256. * compute the two y values of the current rtsc
  1257. * y1: at x
  1258. * y2: at (x + dx)
  1259. */
  1260. y1 = hfsc_rtsc_x2y(rtsc, x);
  1261. if (y1 <= y) {
  1262. /* rtsc is below isc, no change to rtsc */
  1263. return;
  1264. }
  1265. y2 = hfsc_rtsc_x2y(rtsc, x + isc->dx);
  1266. if (y2 >= y + isc->dy) {
  1267. /* rtsc is above isc, replace rtsc by isc */
  1268. rtsc->x = x;
  1269. rtsc->y = y;
  1270. rtsc->dx = isc->dx;
  1271. rtsc->dy = isc->dy;
  1272. return;
  1273. }
  1274. /*
  1275. * the two curves intersect
  1276. * compute the offsets (dx, dy) using the reverse
  1277. * function of seg_x2y()
  1278. * seg_x2y(dx, sm1) == seg_x2y(dx, sm2) + (y1 - y)
  1279. */
  1280. dx = ((y1 - y) << SM_SHIFT) / (isc->sm1 - isc->sm2);
  1281. /*
  1282. * check if (x, y1) belongs to the 1st segment of rtsc.
  1283. * if so, add the offset.
  1284. */
  1285. if (rtsc->x + rtsc->dx > x)
  1286. dx += rtsc->x + rtsc->dx - x;
  1287. dy = seg_x2y(dx, isc->sm1);
  1288. rtsc->x = x;
  1289. rtsc->y = y;
  1290. rtsc->dx = dx;
  1291. rtsc->dy = dy;
  1292. return;
  1293. }
  1294. void
  1295. hfsc_getclstats(struct hfsc_class_stats *sp, struct hfsc_class *cl)
  1296. {
  1297. sp->class_id = cl->cl_id;
  1298. sp->class_handle = cl->cl_handle;
  1299. if (cl->cl_rsc != NULL) {
  1300. sp->rsc.m1 = sm2m(cl->cl_rsc->sm1);
  1301. sp->rsc.d = dx2d(cl->cl_rsc->dx);
  1302. sp->rsc.m2 = sm2m(cl->cl_rsc->sm2);
  1303. } else {
  1304. sp->rsc.m1 = 0;
  1305. sp->rsc.d = 0;
  1306. sp->rsc.m2 = 0;
  1307. }
  1308. if (cl->cl_fsc != NULL) {
  1309. sp->fsc.m1 = sm2m(cl->cl_fsc->sm1);
  1310. sp->fsc.d = dx2d(cl->cl_fsc->dx);
  1311. sp->fsc.m2 = sm2m(cl->cl_fsc->sm2);
  1312. } else {
  1313. sp->fsc.m1 = 0;
  1314. sp->fsc.d = 0;
  1315. sp->fsc.m2 = 0;
  1316. }
  1317. if (cl->cl_usc != NULL) {
  1318. sp->usc.m1 = sm2m(cl->cl_usc->sm1);
  1319. sp->usc.d = dx2d(cl->cl_usc->dx);
  1320. sp->usc.m2 = sm2m(cl->cl_usc->sm2);
  1321. } else {
  1322. sp->usc.m1 = 0;
  1323. sp->usc.d = 0;
  1324. sp->usc.m2 = 0;
  1325. }
  1326. sp->total = cl->cl_total;
  1327. sp->cumul = cl->cl_cumul;
  1328. sp->d = cl->cl_d;
  1329. sp->e = cl->cl_e;
  1330. sp->vt = cl->cl_vt;
  1331. sp->f = cl->cl_f;
  1332. sp->initvt = cl->cl_initvt;
  1333. sp->vtperiod = cl->cl_vtperiod;
  1334. sp->parentperiod = cl->cl_parentperiod;
  1335. sp->nactive = cl->cl_nactive;
  1336. sp->vtoff = cl->cl_vtoff;
  1337. sp->cvtmax = cl->cl_cvtmax;
  1338. sp->myf = cl->cl_myf;
  1339. sp->cfmin = cl->cl_cfmin;
  1340. sp->cvtmin = cl->cl_cvtmin;
  1341. sp->myfadj = cl->cl_myfadj;
  1342. sp->vtadj = cl->cl_vtadj;
  1343. sp->cur_time = hfsc_microuptime();
  1344. sp->machclk_freq = HFSC_FREQ;
  1345. sp->qlength = ml_len(&cl->cl_q.q);
  1346. sp->qlimit = cl->cl_q.qlimit;
  1347. sp->xmit_cnt = cl->cl_stats.xmit_cnt;
  1348. sp->drop_cnt = cl->cl_stats.drop_cnt;
  1349. sp->period = cl->cl_stats.period;
  1350. sp->qtype = 0;
  1351. }
  1352. /* convert a class handle to the corresponding class pointer */
  1353. struct hfsc_class *
  1354. hfsc_clh2cph(struct hfsc_if *hif, u_int32_t chandle)
  1355. {
  1356. int i;
  1357. struct hfsc_class *cl;
  1358. if (chandle == 0)
  1359. return (NULL);
  1360. /*
  1361. * first, try the slot corresponding to the lower bits of the handle.
  1362. * if it does not match, do the linear table search.
  1363. */
  1364. i = chandle % hif->hif_allocated;
  1365. if ((cl = hif->hif_class_tbl[i]) != NULL && cl->cl_handle == chandle)
  1366. return (cl);
  1367. for (i = 0; i < hif->hif_allocated; i++)
  1368. if ((cl = hif->hif_class_tbl[i]) != NULL &&
  1369. cl->cl_handle == chandle)
  1370. return (cl);
  1371. return (NULL);
  1372. }
  1373. #else /* NPF > 0 */
  1374. void
  1375. hfsc_purge(struct ifqueue *q)
  1376. {
  1377. panic("hfsc_purge called on hfsc-less kernel");
  1378. }
  1379. int
  1380. hfsc_enqueue(struct ifqueue *q, struct mbuf *m)
  1381. {
  1382. panic("hfsc_enqueue called on hfsc-less kernel");
  1383. }
  1384. struct mbuf *
  1385. hfsc_dequeue(struct ifqueue *q, int i)
  1386. {
  1387. panic("hfsc_enqueue called on hfsc-less kernel");
  1388. }
  1389. #endif