ackvec.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410
  1. /*
  2. * net/dccp/ackvec.c
  3. *
  4. * An implementation of Ack Vectors for the DCCP protocol
  5. * Copyright (c) 2007 University of Aberdeen, Scotland, UK
  6. * Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
  7. *
  8. * This program is free software; you can redistribute it and/or modify it
  9. * under the terms of the GNU General Public License as published by the
  10. * Free Software Foundation; version 2 of the License;
  11. */
  12. #include "dccp.h"
  13. #include <linux/kernel.h>
  14. #include <linux/slab.h>
  15. #include <linux/export.h>
  16. static struct kmem_cache *dccp_ackvec_slab;
  17. static struct kmem_cache *dccp_ackvec_record_slab;
  18. struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority)
  19. {
  20. struct dccp_ackvec *av = kmem_cache_zalloc(dccp_ackvec_slab, priority);
  21. if (av != NULL) {
  22. av->av_buf_head = av->av_buf_tail = DCCPAV_MAX_ACKVEC_LEN - 1;
  23. INIT_LIST_HEAD(&av->av_records);
  24. }
  25. return av;
  26. }
  27. static void dccp_ackvec_purge_records(struct dccp_ackvec *av)
  28. {
  29. struct dccp_ackvec_record *cur, *next;
  30. list_for_each_entry_safe(cur, next, &av->av_records, avr_node)
  31. kmem_cache_free(dccp_ackvec_record_slab, cur);
  32. INIT_LIST_HEAD(&av->av_records);
  33. }
  34. void dccp_ackvec_free(struct dccp_ackvec *av)
  35. {
  36. if (likely(av != NULL)) {
  37. dccp_ackvec_purge_records(av);
  38. kmem_cache_free(dccp_ackvec_slab, av);
  39. }
  40. }
  41. /**
  42. * dccp_ackvec_update_records - Record information about sent Ack Vectors
  43. * @av: Ack Vector records to update
  44. * @seqno: Sequence number of the packet carrying the Ack Vector just sent
  45. * @nonce_sum: The sum of all buffer nonces contained in the Ack Vector
  46. */
  47. int dccp_ackvec_update_records(struct dccp_ackvec *av, u64 seqno, u8 nonce_sum)
  48. {
  49. struct dccp_ackvec_record *avr;
  50. avr = kmem_cache_alloc(dccp_ackvec_record_slab, GFP_ATOMIC);
  51. if (avr == NULL)
  52. return -ENOBUFS;
  53. avr->avr_ack_seqno = seqno;
  54. avr->avr_ack_ptr = av->av_buf_head;
  55. avr->avr_ack_ackno = av->av_buf_ackno;
  56. avr->avr_ack_nonce = nonce_sum;
  57. avr->avr_ack_runlen = dccp_ackvec_runlen(av->av_buf + av->av_buf_head);
  58. /*
  59. * When the buffer overflows, we keep no more than one record. This is
  60. * the simplest way of disambiguating sender-Acks dating from before the
  61. * overflow from sender-Acks which refer to after the overflow; a simple
  62. * solution is preferable here since we are handling an exception.
  63. */
  64. if (av->av_overflow)
  65. dccp_ackvec_purge_records(av);
  66. /*
  67. * Since GSS is incremented for each packet, the list is automatically
  68. * arranged in descending order of @ack_seqno.
  69. */
  70. list_add(&avr->avr_node, &av->av_records);
  71. dccp_pr_debug("Added Vector, ack_seqno=%llu, ack_ackno=%llu (rl=%u)\n",
  72. (unsigned long long)avr->avr_ack_seqno,
  73. (unsigned long long)avr->avr_ack_ackno,
  74. avr->avr_ack_runlen);
  75. return 0;
  76. }
  77. static struct dccp_ackvec_record *dccp_ackvec_lookup(struct list_head *av_list,
  78. const u64 ackno)
  79. {
  80. struct dccp_ackvec_record *avr;
  81. /*
  82. * Exploit that records are inserted in descending order of sequence
  83. * number, start with the oldest record first. If @ackno is `before'
  84. * the earliest ack_ackno, the packet is too old to be considered.
  85. */
  86. list_for_each_entry_reverse(avr, av_list, avr_node) {
  87. if (avr->avr_ack_seqno == ackno)
  88. return avr;
  89. if (before48(ackno, avr->avr_ack_seqno))
  90. break;
  91. }
  92. return NULL;
  93. }
  94. /*
  95. * Buffer index and length computation using modulo-buffersize arithmetic.
  96. * Note that, as pointers move from right to left, head is `before' tail.
  97. */
  98. static inline u16 __ackvec_idx_add(const u16 a, const u16 b)
  99. {
  100. return (a + b) % DCCPAV_MAX_ACKVEC_LEN;
  101. }
  102. static inline u16 __ackvec_idx_sub(const u16 a, const u16 b)
  103. {
  104. return __ackvec_idx_add(a, DCCPAV_MAX_ACKVEC_LEN - b);
  105. }
  106. u16 dccp_ackvec_buflen(const struct dccp_ackvec *av)
  107. {
  108. if (unlikely(av->av_overflow))
  109. return DCCPAV_MAX_ACKVEC_LEN;
  110. return __ackvec_idx_sub(av->av_buf_tail, av->av_buf_head);
  111. }
  112. /**
  113. * dccp_ackvec_update_old - Update previous state as per RFC 4340, 11.4.1
  114. * @av: non-empty buffer to update
  115. * @distance: negative or zero distance of @seqno from buf_ackno downward
  116. * @seqno: the (old) sequence number whose record is to be updated
  117. * @state: state in which packet carrying @seqno was received
  118. */
  119. static void dccp_ackvec_update_old(struct dccp_ackvec *av, s64 distance,
  120. u64 seqno, enum dccp_ackvec_states state)
  121. {
  122. u16 ptr = av->av_buf_head;
  123. BUG_ON(distance > 0);
  124. if (unlikely(dccp_ackvec_is_empty(av)))
  125. return;
  126. do {
  127. u8 runlen = dccp_ackvec_runlen(av->av_buf + ptr);
  128. if (distance + runlen >= 0) {
  129. /*
  130. * Only update the state if packet has not been received
  131. * yet. This is OK as per the second table in RFC 4340,
  132. * 11.4.1; i.e. here we are using the following table:
  133. * RECEIVED
  134. * 0 1 3
  135. * S +---+---+---+
  136. * T 0 | 0 | 0 | 0 |
  137. * O +---+---+---+
  138. * R 1 | 1 | 1 | 1 |
  139. * E +---+---+---+
  140. * D 3 | 0 | 1 | 3 |
  141. * +---+---+---+
  142. * The "Not Received" state was set by reserve_seats().
  143. */
  144. if (av->av_buf[ptr] == DCCPAV_NOT_RECEIVED)
  145. av->av_buf[ptr] = state;
  146. else
  147. dccp_pr_debug("Not changing %llu state to %u\n",
  148. (unsigned long long)seqno, state);
  149. break;
  150. }
  151. distance += runlen + 1;
  152. ptr = __ackvec_idx_add(ptr, 1);
  153. } while (ptr != av->av_buf_tail);
  154. }
  155. /* Mark @num entries after buf_head as "Not yet received". */
  156. static void dccp_ackvec_reserve_seats(struct dccp_ackvec *av, u16 num)
  157. {
  158. u16 start = __ackvec_idx_add(av->av_buf_head, 1),
  159. len = DCCPAV_MAX_ACKVEC_LEN - start;
  160. /* check for buffer wrap-around */
  161. if (num > len) {
  162. memset(av->av_buf + start, DCCPAV_NOT_RECEIVED, len);
  163. start = 0;
  164. num -= len;
  165. }
  166. if (num)
  167. memset(av->av_buf + start, DCCPAV_NOT_RECEIVED, num);
  168. }
  169. /**
  170. * dccp_ackvec_add_new - Record one or more new entries in Ack Vector buffer
  171. * @av: container of buffer to update (can be empty or non-empty)
  172. * @num_packets: number of packets to register (must be >= 1)
  173. * @seqno: sequence number of the first packet in @num_packets
  174. * @state: state in which packet carrying @seqno was received
  175. */
  176. static void dccp_ackvec_add_new(struct dccp_ackvec *av, u32 num_packets,
  177. u64 seqno, enum dccp_ackvec_states state)
  178. {
  179. u32 num_cells = num_packets;
  180. if (num_packets > DCCPAV_BURST_THRESH) {
  181. u32 lost_packets = num_packets - 1;
  182. DCCP_WARN("Warning: large burst loss (%u)\n", lost_packets);
  183. /*
  184. * We received 1 packet and have a loss of size "num_packets-1"
  185. * which we squeeze into num_cells-1 rather than reserving an
  186. * entire byte for each lost packet.
  187. * The reason is that the vector grows in O(burst_length); when
  188. * it grows too large there will no room left for the payload.
  189. * This is a trade-off: if a few packets out of the burst show
  190. * up later, their state will not be changed; it is simply too
  191. * costly to reshuffle/reallocate/copy the buffer each time.
  192. * Should such problems persist, we will need to switch to a
  193. * different underlying data structure.
  194. */
  195. for (num_packets = num_cells = 1; lost_packets; ++num_cells) {
  196. u8 len = min_t(u32, lost_packets, DCCPAV_MAX_RUNLEN);
  197. av->av_buf_head = __ackvec_idx_sub(av->av_buf_head, 1);
  198. av->av_buf[av->av_buf_head] = DCCPAV_NOT_RECEIVED | len;
  199. lost_packets -= len;
  200. }
  201. }
  202. if (num_cells + dccp_ackvec_buflen(av) >= DCCPAV_MAX_ACKVEC_LEN) {
  203. DCCP_CRIT("Ack Vector buffer overflow: dropping old entries\n");
  204. av->av_overflow = true;
  205. }
  206. av->av_buf_head = __ackvec_idx_sub(av->av_buf_head, num_packets);
  207. if (av->av_overflow)
  208. av->av_buf_tail = av->av_buf_head;
  209. av->av_buf[av->av_buf_head] = state;
  210. av->av_buf_ackno = seqno;
  211. if (num_packets > 1)
  212. dccp_ackvec_reserve_seats(av, num_packets - 1);
  213. }
  214. /**
  215. * dccp_ackvec_input - Register incoming packet in the buffer
  216. */
  217. void dccp_ackvec_input(struct dccp_ackvec *av, struct sk_buff *skb)
  218. {
  219. u64 seqno = DCCP_SKB_CB(skb)->dccpd_seq;
  220. enum dccp_ackvec_states state = DCCPAV_RECEIVED;
  221. if (dccp_ackvec_is_empty(av)) {
  222. dccp_ackvec_add_new(av, 1, seqno, state);
  223. av->av_tail_ackno = seqno;
  224. } else {
  225. s64 num_packets = dccp_delta_seqno(av->av_buf_ackno, seqno);
  226. u8 *current_head = av->av_buf + av->av_buf_head;
  227. if (num_packets == 1 &&
  228. dccp_ackvec_state(current_head) == state &&
  229. dccp_ackvec_runlen(current_head) < DCCPAV_MAX_RUNLEN) {
  230. *current_head += 1;
  231. av->av_buf_ackno = seqno;
  232. } else if (num_packets > 0) {
  233. dccp_ackvec_add_new(av, num_packets, seqno, state);
  234. } else {
  235. dccp_ackvec_update_old(av, num_packets, seqno, state);
  236. }
  237. }
  238. }
  239. /**
  240. * dccp_ackvec_clear_state - Perform house-keeping / garbage-collection
  241. * This routine is called when the peer acknowledges the receipt of Ack Vectors
  242. * up to and including @ackno. While based on on section A.3 of RFC 4340, here
  243. * are additional precautions to prevent corrupted buffer state. In particular,
  244. * we use tail_ackno to identify outdated records; it always marks the earliest
  245. * packet of group (2) in 11.4.2.
  246. */
  247. void dccp_ackvec_clear_state(struct dccp_ackvec *av, const u64 ackno)
  248. {
  249. struct dccp_ackvec_record *avr, *next;
  250. u8 runlen_now, eff_runlen;
  251. s64 delta;
  252. avr = dccp_ackvec_lookup(&av->av_records, ackno);
  253. if (avr == NULL)
  254. return;
  255. /*
  256. * Deal with outdated acknowledgments: this arises when e.g. there are
  257. * several old records and the acks from the peer come in slowly. In
  258. * that case we may still have records that pre-date tail_ackno.
  259. */
  260. delta = dccp_delta_seqno(av->av_tail_ackno, avr->avr_ack_ackno);
  261. if (delta < 0)
  262. goto free_records;
  263. /*
  264. * Deal with overlapping Ack Vectors: don't subtract more than the
  265. * number of packets between tail_ackno and ack_ackno.
  266. */
  267. eff_runlen = delta < avr->avr_ack_runlen ? delta : avr->avr_ack_runlen;
  268. runlen_now = dccp_ackvec_runlen(av->av_buf + avr->avr_ack_ptr);
  269. /*
  270. * The run length of Ack Vector cells does not decrease over time. If
  271. * the run length is the same as at the time the Ack Vector was sent, we
  272. * free the ack_ptr cell. That cell can however not be freed if the run
  273. * length has increased: in this case we need to move the tail pointer
  274. * backwards (towards higher indices), to its next-oldest neighbour.
  275. */
  276. if (runlen_now > eff_runlen) {
  277. av->av_buf[avr->avr_ack_ptr] -= eff_runlen + 1;
  278. av->av_buf_tail = __ackvec_idx_add(avr->avr_ack_ptr, 1);
  279. /* This move may not have cleared the overflow flag. */
  280. if (av->av_overflow)
  281. av->av_overflow = (av->av_buf_head == av->av_buf_tail);
  282. } else {
  283. av->av_buf_tail = avr->avr_ack_ptr;
  284. /*
  285. * We have made sure that avr points to a valid cell within the
  286. * buffer. This cell is either older than head, or equals head
  287. * (empty buffer): in both cases we no longer have any overflow.
  288. */
  289. av->av_overflow = 0;
  290. }
  291. /*
  292. * The peer has acknowledged up to and including ack_ackno. Hence the
  293. * first packet in group (2) of 11.4.2 is the successor of ack_ackno.
  294. */
  295. av->av_tail_ackno = ADD48(avr->avr_ack_ackno, 1);
  296. free_records:
  297. list_for_each_entry_safe_from(avr, next, &av->av_records, avr_node) {
  298. list_del(&avr->avr_node);
  299. kmem_cache_free(dccp_ackvec_record_slab, avr);
  300. }
  301. }
  302. /*
  303. * Routines to keep track of Ack Vectors received in an skb
  304. */
  305. int dccp_ackvec_parsed_add(struct list_head *head, u8 *vec, u8 len, u8 nonce)
  306. {
  307. struct dccp_ackvec_parsed *new = kmalloc(sizeof(*new), GFP_ATOMIC);
  308. if (new == NULL)
  309. return -ENOBUFS;
  310. new->vec = vec;
  311. new->len = len;
  312. new->nonce = nonce;
  313. list_add_tail(&new->node, head);
  314. return 0;
  315. }
  316. EXPORT_SYMBOL_GPL(dccp_ackvec_parsed_add);
  317. void dccp_ackvec_parsed_cleanup(struct list_head *parsed_chunks)
  318. {
  319. struct dccp_ackvec_parsed *cur, *next;
  320. list_for_each_entry_safe(cur, next, parsed_chunks, node)
  321. kfree(cur);
  322. INIT_LIST_HEAD(parsed_chunks);
  323. }
  324. EXPORT_SYMBOL_GPL(dccp_ackvec_parsed_cleanup);
  325. int __init dccp_ackvec_init(void)
  326. {
  327. dccp_ackvec_slab = kmem_cache_create("dccp_ackvec",
  328. sizeof(struct dccp_ackvec), 0,
  329. SLAB_HWCACHE_ALIGN, NULL);
  330. if (dccp_ackvec_slab == NULL)
  331. goto out_err;
  332. dccp_ackvec_record_slab = kmem_cache_create("dccp_ackvec_record",
  333. sizeof(struct dccp_ackvec_record),
  334. 0, SLAB_HWCACHE_ALIGN, NULL);
  335. if (dccp_ackvec_record_slab == NULL)
  336. goto out_destroy_slab;
  337. return 0;
  338. out_destroy_slab:
  339. kmem_cache_destroy(dccp_ackvec_slab);
  340. dccp_ackvec_slab = NULL;
  341. out_err:
  342. DCCP_CRIT("Unable to create Ack Vector slab cache");
  343. return -ENOBUFS;
  344. }
  345. void dccp_ackvec_exit(void)
  346. {
  347. if (dccp_ackvec_slab != NULL) {
  348. kmem_cache_destroy(dccp_ackvec_slab);
  349. dccp_ackvec_slab = NULL;
  350. }
  351. if (dccp_ackvec_record_slab != NULL) {
  352. kmem_cache_destroy(dccp_ackvec_record_slab);
  353. dccp_ackvec_record_slab = NULL;
  354. }
  355. }