pf_lb.c 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921
  1. /* $OpenBSD: pf_lb.c,v 1.49 2015/08/03 13:33:12 jsg Exp $ */
  2. /*
  3. * Copyright (c) 2001 Daniel Hartmeier
  4. * Copyright (c) 2002 - 2008 Henning Brauer
  5. * All rights reserved.
  6. *
  7. * Redistribution and use in source and binary forms, with or without
  8. * modification, are permitted provided that the following conditions
  9. * are met:
  10. *
  11. * - Redistributions of source code must retain the above copyright
  12. * notice, this list of conditions and the following disclaimer.
  13. * - Redistributions in binary form must reproduce the above
  14. * copyright notice, this list of conditions and the following
  15. * disclaimer in the documentation and/or other materials provided
  16. * with the distribution.
  17. *
  18. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  19. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  20. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  21. * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
  22. * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  23. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  24. * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  25. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  26. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  27. * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  28. * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  29. * POSSIBILITY OF SUCH DAMAGE.
  30. *
  31. * Effort sponsored in part by the Defense Advanced Research Projects
  32. * Agency (DARPA) and Air Force Research Laboratory, Air Force
  33. * Materiel Command, USAF, under agreement number F30602-01-2-0537.
  34. *
  35. */
  36. #include "bpfilter.h"
  37. #include "pflog.h"
  38. #include "pfsync.h"
  39. #include "pflow.h"
  40. #include <sys/param.h>
  41. #include <sys/systm.h>
  42. #include <sys/mbuf.h>
  43. #include <sys/filio.h>
  44. #include <sys/socket.h>
  45. #include <sys/socketvar.h>
  46. #include <sys/kernel.h>
  47. #include <sys/time.h>
  48. #include <sys/pool.h>
  49. #include <sys/rwlock.h>
  50. #include <sys/syslog.h>
  51. #include <sys/stdint.h>
  52. #include <crypto/siphash.h>
  53. #include <net/if.h>
  54. #include <net/if_types.h>
  55. #include <net/bpf.h>
  56. #include <net/route.h>
  57. #include <netinet/in.h>
  58. #include <netinet/ip.h>
  59. #include <netinet/ip_var.h>
  60. #include <netinet/tcp.h>
  61. #include <netinet/tcp_seq.h>
  62. #include <netinet/udp.h>
  63. #include <netinet/ip_icmp.h>
  64. #include <netinet/tcp_timer.h>
  65. #include <netinet/udp_var.h>
  66. #include <netinet/icmp_var.h>
  67. #include <netinet/if_ether.h>
  68. #include <netinet/in_pcb.h>
  69. #include <net/pfvar.h>
  70. #if NPFLOG > 0
  71. #include <net/if_pflog.h>
  72. #endif /* NPFLOG > 0 */
  73. #if NPFLOW > 0
  74. #include <net/if_pflow.h>
  75. #endif /* NPFLOW > 0 */
  76. #if NPFSYNC > 0
  77. #include <net/if_pfsync.h>
  78. #endif /* NPFSYNC > 0 */
  79. #ifdef INET6
  80. #include <netinet/ip6.h>
  81. #include <netinet/icmp6.h>
  82. #endif /* INET6 */
  83. /*
  84. * Global variables
  85. */
  86. u_int64_t pf_hash(struct pf_addr *, struct pf_addr *,
  87. struct pf_poolhashkey *, sa_family_t);
  88. int pf_get_sport(struct pf_pdesc *, struct pf_rule *,
  89. struct pf_addr *, u_int16_t *, u_int16_t,
  90. u_int16_t, struct pf_src_node **);
  91. int pf_get_transaddr_af(struct pf_rule *,
  92. struct pf_pdesc *, struct pf_src_node **);
  93. int pf_map_addr_sticky(sa_family_t, struct pf_rule *,
  94. struct pf_addr *, struct pf_addr *,
  95. struct pf_src_node **, struct pf_pool *,
  96. enum pf_sn_types);
  97. u_int64_t
  98. pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
  99. struct pf_poolhashkey *key, sa_family_t af)
  100. {
  101. uint64_t res = 0;
  102. #ifdef INET6
  103. union {
  104. uint64_t hash64;
  105. uint32_t hash32[2];
  106. } h;
  107. #endif /* INET6 */
  108. switch (af) {
  109. case AF_INET:
  110. res = SipHash24((SIPHASH_KEY *)key,
  111. &inaddr->addr32[0], sizeof(inaddr->addr32[0]));
  112. hash->addr32[0] = res;
  113. break;
  114. #ifdef INET6
  115. case AF_INET6:
  116. res = SipHash24((SIPHASH_KEY *)key, &inaddr->addr32[0],
  117. 4 * sizeof(inaddr->addr32[0]));
  118. h.hash64 = res;
  119. hash->addr32[0] = h.hash32[0];
  120. hash->addr32[1] = h.hash32[1];
  121. /*
  122. * siphash isn't big enough, but flipping it around is
  123. * good enough here.
  124. */
  125. hash->addr32[2] = ~h.hash32[1];
  126. hash->addr32[3] = ~h.hash32[0];
  127. break;
  128. #endif /* INET6 */
  129. default:
  130. unhandled_af(af);
  131. }
  132. return (res);
  133. }
  134. int
  135. pf_get_sport(struct pf_pdesc *pd, struct pf_rule *r,
  136. struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high,
  137. struct pf_src_node **sn)
  138. {
  139. struct pf_state_key_cmp key;
  140. struct pf_addr init_addr;
  141. u_int16_t cut;
  142. bzero(&init_addr, sizeof(init_addr));
  143. if (pf_map_addr(pd->naf, r, &pd->nsaddr, naddr, &init_addr, sn, &r->nat,
  144. PF_SN_NAT))
  145. return (1);
  146. if (pd->proto == IPPROTO_ICMP) {
  147. if (pd->ndport == htons(ICMP_ECHO)) {
  148. low = 1;
  149. high = 65535;
  150. } else
  151. return (0); /* Don't try to modify non-echo ICMP */
  152. }
  153. #ifdef INET6
  154. if (pd->proto == IPPROTO_ICMPV6) {
  155. if (pd->ndport == htons(ICMP6_ECHO_REQUEST)) {
  156. low = 1;
  157. high = 65535;
  158. } else
  159. return (0); /* Don't try to modify non-echo ICMP */
  160. }
  161. #endif /* INET6 */
  162. do {
  163. key.af = pd->naf;
  164. key.proto = pd->proto;
  165. key.rdomain = pd->rdomain;
  166. PF_ACPY(&key.addr[0], &pd->ndaddr, key.af);
  167. PF_ACPY(&key.addr[1], naddr, key.af);
  168. key.port[0] = pd->ndport;
  169. /*
  170. * port search; start random, step;
  171. * similar 2 portloop in in_pcbbind
  172. */
  173. if (!(pd->proto == IPPROTO_TCP || pd->proto == IPPROTO_UDP ||
  174. pd->proto == IPPROTO_ICMP || pd->proto == IPPROTO_ICMPV6)) {
  175. /* XXX bug: icmp states dont use the id on both
  176. * XXX sides (traceroute -I through nat) */
  177. key.port[1] = pd->nsport;
  178. if (pf_find_state_all(&key, PF_IN, NULL) == NULL) {
  179. *nport = pd->nsport;
  180. return (0);
  181. }
  182. } else if (low == 0 && high == 0) {
  183. key.port[1] = pd->nsport;
  184. if (pf_find_state_all(&key, PF_IN, NULL) == NULL) {
  185. *nport = pd->nsport;
  186. return (0);
  187. }
  188. } else if (low == high) {
  189. key.port[1] = htons(low);
  190. if (pf_find_state_all(&key, PF_IN, NULL) == NULL) {
  191. *nport = htons(low);
  192. return (0);
  193. }
  194. } else {
  195. u_int16_t tmp;
  196. if (low > high) {
  197. tmp = low;
  198. low = high;
  199. high = tmp;
  200. }
  201. /* low < high */
  202. cut = arc4random_uniform(1 + high - low) + low;
  203. /* low <= cut <= high */
  204. for (tmp = cut; tmp <= high; ++(tmp)) {
  205. key.port[1] = htons(tmp);
  206. if (pf_find_state_all(&key, PF_IN, NULL) ==
  207. NULL && !in_baddynamic(tmp, pd->proto)) {
  208. *nport = htons(tmp);
  209. return (0);
  210. }
  211. }
  212. for (tmp = cut - 1; tmp >= low; --(tmp)) {
  213. key.port[1] = htons(tmp);
  214. if (pf_find_state_all(&key, PF_IN, NULL) ==
  215. NULL && !in_baddynamic(tmp, pd->proto)) {
  216. *nport = htons(tmp);
  217. return (0);
  218. }
  219. }
  220. }
  221. switch (r->nat.opts & PF_POOL_TYPEMASK) {
  222. case PF_POOL_RANDOM:
  223. case PF_POOL_ROUNDROBIN:
  224. case PF_POOL_LEASTSTATES:
  225. /*
  226. * pick a different source address since we're out
  227. * of free port choices for the current one.
  228. */
  229. if (pf_map_addr(pd->naf, r, &pd->nsaddr, naddr,
  230. &init_addr, sn, &r->nat, PF_SN_NAT))
  231. return (1);
  232. break;
  233. case PF_POOL_NONE:
  234. case PF_POOL_SRCHASH:
  235. case PF_POOL_BITMASK:
  236. default:
  237. return (1);
  238. }
  239. } while (! PF_AEQ(&init_addr, naddr, pd->naf) );
  240. return (1); /* none available */
  241. }
  242. int
  243. pf_map_addr_sticky(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
  244. struct pf_addr *naddr, struct pf_src_node **sns, struct pf_pool *rpool,
  245. enum pf_sn_types type)
  246. {
  247. struct pf_addr *raddr, *rmask, *cached;
  248. struct pf_state *s;
  249. struct pf_src_node k;
  250. int valid;
  251. k.af = af;
  252. k.type = type;
  253. PF_ACPY(&k.addr, saddr, af);
  254. k.rule.ptr = r;
  255. pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
  256. sns[type] = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
  257. if (sns[type] == NULL)
  258. return (-1);
  259. /* check if the cached entry is still valid */
  260. cached = &(sns[type])->raddr;
  261. valid = 0;
  262. if (PF_AZERO(cached, af)) {
  263. valid = 1;
  264. } else if (rpool->addr.type == PF_ADDR_DYNIFTL) {
  265. if (pfr_kentry_byaddr(rpool->addr.p.dyn->pfid_kt, cached,
  266. af, 0))
  267. valid = 1;
  268. } else if (rpool->addr.type == PF_ADDR_TABLE) {
  269. if (pfr_kentry_byaddr(rpool->addr.p.tbl, cached, af, 0))
  270. valid = 1;
  271. } else if (rpool->addr.type != PF_ADDR_NOROUTE) {
  272. raddr = &rpool->addr.v.a.addr;
  273. rmask = &rpool->addr.v.a.mask;
  274. valid = pf_match_addr(0, raddr, rmask, cached, af);
  275. }
  276. if (!valid) {
  277. if (pf_status.debug >= LOG_DEBUG) {
  278. log(LOG_DEBUG, "pf: pf_map_addr: "
  279. "stale src tracking (%u) ", type);
  280. pf_print_host(&k.addr, 0, af);
  281. addlog(" to ");
  282. pf_print_host(cached, 0, af);
  283. addlog("\n");
  284. }
  285. if (sns[type]->states != 0) {
  286. /* XXX expensive */
  287. RB_FOREACH(s, pf_state_tree_id,
  288. &tree_id)
  289. pf_state_rm_src_node(s,
  290. sns[type]);
  291. }
  292. sns[type]->expire = 1;
  293. pf_remove_src_node(sns[type]);
  294. sns[type] = NULL;
  295. return (-1);
  296. }
  297. if (!PF_AZERO(cached, af))
  298. PF_ACPY(naddr, cached, af);
  299. if (pf_status.debug >= LOG_DEBUG) {
  300. log(LOG_DEBUG, "pf: pf_map_addr: "
  301. "src tracking (%u) maps ", type);
  302. pf_print_host(&k.addr, 0, af);
  303. addlog(" to ");
  304. pf_print_host(naddr, 0, af);
  305. addlog("\n");
  306. }
  307. return (0);
  308. }
  309. int
  310. pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
  311. struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sns,
  312. struct pf_pool *rpool, enum pf_sn_types type)
  313. {
  314. unsigned char hash[16];
  315. struct pf_addr faddr;
  316. struct pf_addr *raddr = &rpool->addr.v.a.addr;
  317. struct pf_addr *rmask = &rpool->addr.v.a.mask;
  318. u_int64_t states;
  319. u_int16_t weight;
  320. u_int64_t load;
  321. u_int64_t cload;
  322. u_int64_t hashidx;
  323. int cnt;
  324. if (sns[type] == NULL && rpool->opts & PF_POOL_STICKYADDR &&
  325. (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE &&
  326. pf_map_addr_sticky(af, r, saddr, naddr, sns, rpool, type) == 0)
  327. return (0);
  328. if (rpool->addr.type == PF_ADDR_NOROUTE)
  329. return (1);
  330. if (rpool->addr.type == PF_ADDR_DYNIFTL) {
  331. switch (af) {
  332. case AF_INET:
  333. if (rpool->addr.p.dyn->pfid_acnt4 < 1 &&
  334. !PF_POOL_DYNTYPE(rpool->opts))
  335. return (1);
  336. raddr = &rpool->addr.p.dyn->pfid_addr4;
  337. rmask = &rpool->addr.p.dyn->pfid_mask4;
  338. break;
  339. #ifdef INET6
  340. case AF_INET6:
  341. if (rpool->addr.p.dyn->pfid_acnt6 < 1 &&
  342. !PF_POOL_DYNTYPE(rpool->opts))
  343. return (1);
  344. raddr = &rpool->addr.p.dyn->pfid_addr6;
  345. rmask = &rpool->addr.p.dyn->pfid_mask6;
  346. break;
  347. #endif /* INET6 */
  348. default:
  349. unhandled_af(af);
  350. }
  351. } else if (rpool->addr.type == PF_ADDR_TABLE) {
  352. if (!PF_POOL_DYNTYPE(rpool->opts))
  353. return (1); /* unsupported */
  354. } else {
  355. raddr = &rpool->addr.v.a.addr;
  356. rmask = &rpool->addr.v.a.mask;
  357. }
  358. switch (rpool->opts & PF_POOL_TYPEMASK) {
  359. case PF_POOL_NONE:
  360. PF_ACPY(naddr, raddr, af);
  361. break;
  362. case PF_POOL_BITMASK:
  363. PF_POOLMASK(naddr, raddr, rmask, saddr, af);
  364. break;
  365. case PF_POOL_RANDOM:
  366. if (rpool->addr.type == PF_ADDR_TABLE) {
  367. cnt = rpool->addr.p.tbl->pfrkt_cnt;
  368. if (cnt == 0)
  369. rpool->tblidx = 0;
  370. else
  371. rpool->tblidx = (int)arc4random_uniform(cnt);
  372. memset(&rpool->counter, 0, sizeof(rpool->counter));
  373. if (pfr_pool_get(rpool, &raddr, &rmask, af))
  374. return (1);
  375. PF_ACPY(naddr, &rpool->counter, af);
  376. } else if (rpool->addr.type == PF_ADDR_DYNIFTL) {
  377. cnt = rpool->addr.p.dyn->pfid_kt->pfrkt_cnt;
  378. if (cnt == 0)
  379. rpool->tblidx = 0;
  380. else
  381. rpool->tblidx = (int)arc4random_uniform(cnt);
  382. memset(&rpool->counter, 0, sizeof(rpool->counter));
  383. if (pfr_pool_get(rpool, &raddr, &rmask, af))
  384. return (1);
  385. PF_ACPY(naddr, &rpool->counter, af);
  386. } else if (init_addr != NULL && PF_AZERO(init_addr, af)) {
  387. switch (af) {
  388. case AF_INET:
  389. rpool->counter.addr32[0] = htonl(arc4random());
  390. break;
  391. #ifdef INET6
  392. case AF_INET6:
  393. if (rmask->addr32[3] != 0xffffffff)
  394. rpool->counter.addr32[3] =
  395. htonl(arc4random());
  396. else
  397. break;
  398. if (rmask->addr32[2] != 0xffffffff)
  399. rpool->counter.addr32[2] =
  400. htonl(arc4random());
  401. else
  402. break;
  403. if (rmask->addr32[1] != 0xffffffff)
  404. rpool->counter.addr32[1] =
  405. htonl(arc4random());
  406. else
  407. break;
  408. if (rmask->addr32[0] != 0xffffffff)
  409. rpool->counter.addr32[0] =
  410. htonl(arc4random());
  411. break;
  412. #endif /* INET6 */
  413. default:
  414. unhandled_af(af);
  415. }
  416. PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
  417. PF_ACPY(init_addr, naddr, af);
  418. } else {
  419. PF_AINC(&rpool->counter, af);
  420. PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
  421. }
  422. break;
  423. case PF_POOL_SRCHASH:
  424. hashidx =
  425. pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
  426. if (rpool->addr.type == PF_ADDR_TABLE) {
  427. cnt = rpool->addr.p.tbl->pfrkt_cnt;
  428. if (cnt == 0)
  429. rpool->tblidx = 0;
  430. else
  431. rpool->tblidx = (int)(hashidx % cnt);
  432. memset(&rpool->counter, 0, sizeof(rpool->counter));
  433. if (pfr_pool_get(rpool, &raddr, &rmask, af))
  434. return (1);
  435. PF_ACPY(naddr, &rpool->counter, af);
  436. } else if (rpool->addr.type == PF_ADDR_DYNIFTL) {
  437. cnt = rpool->addr.p.dyn->pfid_kt->pfrkt_cnt;
  438. if (cnt == 0)
  439. rpool->tblidx = 0;
  440. else
  441. rpool->tblidx = (int)(hashidx % cnt);
  442. memset(&rpool->counter, 0, sizeof(rpool->counter));
  443. if (pfr_pool_get(rpool, &raddr, &rmask, af))
  444. return (1);
  445. PF_ACPY(naddr, &rpool->counter, af);
  446. } else {
  447. PF_POOLMASK(naddr, raddr, rmask,
  448. (struct pf_addr *)&hash, af);
  449. }
  450. break;
  451. case PF_POOL_ROUNDROBIN:
  452. if (rpool->addr.type == PF_ADDR_TABLE ||
  453. rpool->addr.type == PF_ADDR_DYNIFTL) {
  454. if (pfr_pool_get(rpool, &raddr, &rmask, af)) {
  455. /*
  456. * reset counter in case its value
  457. * has been removed from the pool.
  458. */
  459. bzero(&rpool->counter, sizeof(rpool->counter));
  460. if (pfr_pool_get(rpool, &raddr, &rmask, af))
  461. return (1);
  462. }
  463. } else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
  464. return (1);
  465. /* iterate over table if it contains entries which are weighted */
  466. if ((rpool->addr.type == PF_ADDR_TABLE &&
  467. rpool->addr.p.tbl->pfrkt_refcntcost > 0) ||
  468. (rpool->addr.type == PF_ADDR_DYNIFTL &&
  469. rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0)) {
  470. do {
  471. if (rpool->addr.type == PF_ADDR_TABLE ||
  472. rpool->addr.type == PF_ADDR_DYNIFTL) {
  473. if (pfr_pool_get(rpool,
  474. &raddr, &rmask, af))
  475. return (1);
  476. } else {
  477. log(LOG_ERR, "pf: pf_map_addr: "
  478. "weighted RR failure");
  479. return (1);
  480. }
  481. if (rpool->weight >= rpool->curweight)
  482. break;
  483. PF_AINC(&rpool->counter, af);
  484. } while (1);
  485. weight = rpool->weight;
  486. }
  487. PF_ACPY(naddr, &rpool->counter, af);
  488. if (init_addr != NULL && PF_AZERO(init_addr, af))
  489. PF_ACPY(init_addr, naddr, af);
  490. PF_AINC(&rpool->counter, af);
  491. break;
  492. case PF_POOL_LEASTSTATES:
  493. /* retrieve an address first */
  494. if (rpool->addr.type == PF_ADDR_TABLE ||
  495. rpool->addr.type == PF_ADDR_DYNIFTL) {
  496. if (pfr_pool_get(rpool, &raddr, &rmask, af)) {
  497. /* see PF_POOL_ROUNDROBIN */
  498. bzero(&rpool->counter, sizeof(rpool->counter));
  499. if (pfr_pool_get(rpool, &raddr, &rmask, af))
  500. return (1);
  501. }
  502. } else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
  503. return (1);
  504. states = rpool->states;
  505. weight = rpool->weight;
  506. if ((rpool->addr.type == PF_ADDR_TABLE &&
  507. rpool->addr.p.tbl->pfrkt_refcntcost > 0) ||
  508. (rpool->addr.type == PF_ADDR_DYNIFTL &&
  509. rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0))
  510. load = ((UINT16_MAX * rpool->states) / rpool->weight);
  511. else
  512. load = states;
  513. PF_ACPY(&faddr, &rpool->counter, af);
  514. PF_ACPY(naddr, &rpool->counter, af);
  515. if (init_addr != NULL && PF_AZERO(init_addr, af))
  516. PF_ACPY(init_addr, naddr, af);
  517. /*
  518. * iterate *once* over whole table and find destination with
  519. * least connection
  520. */
  521. do {
  522. PF_AINC(&rpool->counter, af);
  523. if (rpool->addr.type == PF_ADDR_TABLE ||
  524. rpool->addr.type == PF_ADDR_DYNIFTL) {
  525. if (pfr_pool_get(rpool, &raddr, &rmask, af))
  526. return (1);
  527. } else if (pf_match_addr(0, raddr, rmask,
  528. &rpool->counter, af))
  529. return (1);
  530. if ((rpool->addr.type == PF_ADDR_TABLE &&
  531. rpool->addr.p.tbl->pfrkt_refcntcost > 0) ||
  532. (rpool->addr.type == PF_ADDR_DYNIFTL &&
  533. rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0))
  534. cload = ((UINT16_MAX * rpool->states)
  535. / rpool->weight);
  536. else
  537. cload = rpool->states;
  538. /* find lc minimum */
  539. if (cload < load) {
  540. states = rpool->states;
  541. weight = rpool->weight;
  542. load = cload;
  543. PF_ACPY(naddr, &rpool->counter, af);
  544. if (init_addr != NULL &&
  545. PF_AZERO(init_addr, af))
  546. PF_ACPY(init_addr, naddr, af);
  547. }
  548. } while (pf_match_addr(1, &faddr, rmask, &rpool->counter, af) &&
  549. (states > 0));
  550. if (rpool->addr.type == PF_ADDR_TABLE) {
  551. if (pfr_states_increase(rpool->addr.p.tbl,
  552. naddr, af) == -1) {
  553. if (pf_status.debug >= LOG_DEBUG) {
  554. log(LOG_DEBUG,"pf: pf_map_addr: "
  555. "selected address ");
  556. pf_print_host(naddr, 0, af);
  557. addlog(". Failed to increase count!\n");
  558. }
  559. return (1);
  560. }
  561. } else if (rpool->addr.type == PF_ADDR_DYNIFTL) {
  562. if (pfr_states_increase(rpool->addr.p.dyn->pfid_kt,
  563. naddr, af) == -1) {
  564. if (pf_status.debug >= LOG_DEBUG) {
  565. log(LOG_DEBUG, "pf: pf_map_addr: "
  566. "selected address ");
  567. pf_print_host(naddr, 0, af);
  568. addlog(". Failed to increase count!\n");
  569. }
  570. return (1);
  571. }
  572. }
  573. break;
  574. }
  575. if (rpool->opts & PF_POOL_STICKYADDR) {
  576. if (sns[type] != NULL) {
  577. pf_remove_src_node(sns[type]);
  578. sns[type] = NULL;
  579. }
  580. if (pf_insert_src_node(&sns[type], r, type, af, saddr, naddr,
  581. 0))
  582. return (1);
  583. }
  584. if (pf_status.debug >= LOG_NOTICE &&
  585. (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
  586. log(LOG_NOTICE, "pf: pf_map_addr: selected address ");
  587. pf_print_host(naddr, 0, af);
  588. if ((rpool->opts & PF_POOL_TYPEMASK) ==
  589. PF_POOL_LEASTSTATES)
  590. addlog(" with state count %llu", states);
  591. if ((rpool->addr.type == PF_ADDR_TABLE &&
  592. rpool->addr.p.tbl->pfrkt_refcntcost > 0) ||
  593. (rpool->addr.type == PF_ADDR_DYNIFTL &&
  594. rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0))
  595. addlog(" with weight %u", weight);
  596. addlog("\n");
  597. }
  598. return (0);
  599. }
  600. int
  601. pf_get_transaddr(struct pf_rule *r, struct pf_pdesc *pd,
  602. struct pf_src_node **sns, struct pf_rule **nr)
  603. {
  604. struct pf_addr naddr;
  605. u_int16_t nport = 0;
  606. #ifdef INET6
  607. if (pd->af != pd->naf)
  608. return (pf_get_transaddr_af(r, pd, sns));
  609. #endif /* INET6 */
  610. if (r->nat.addr.type != PF_ADDR_NONE) {
  611. /* XXX is this right? what if rtable is changed at the same
  612. * XXX time? where do I need to figure out the sport? */
  613. if (pf_get_sport(pd, r, &naddr, &nport,
  614. r->nat.proxy_port[0], r->nat.proxy_port[1], sns)) {
  615. DPFPRINTF(LOG_NOTICE,
  616. "pf: NAT proxy port allocation (%u-%u) failed",
  617. r->nat.proxy_port[0],
  618. r->nat.proxy_port[1]);
  619. return (-1);
  620. }
  621. *nr = r;
  622. PF_ACPY(&pd->nsaddr, &naddr, pd->af);
  623. pd->nsport = nport;
  624. }
  625. if (r->rdr.addr.type != PF_ADDR_NONE) {
  626. if (pf_map_addr(pd->af, r, &pd->nsaddr, &naddr, NULL, sns,
  627. &r->rdr, PF_SN_RDR))
  628. return (-1);
  629. if ((r->rdr.opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK)
  630. PF_POOLMASK(&naddr, &naddr, &r->rdr.addr.v.a.mask,
  631. &pd->ndaddr, pd->af);
  632. if (r->rdr.proxy_port[1]) {
  633. u_int32_t tmp_nport;
  634. tmp_nport = ((ntohs(pd->ndport) -
  635. ntohs(r->dst.port[0])) %
  636. (r->rdr.proxy_port[1] -
  637. r->rdr.proxy_port[0] + 1)) +
  638. r->rdr.proxy_port[0];
  639. /* wrap around if necessary */
  640. if (tmp_nport > 65535)
  641. tmp_nport -= 65535;
  642. nport = htons((u_int16_t)tmp_nport);
  643. } else if (r->rdr.proxy_port[0])
  644. nport = htons(r->rdr.proxy_port[0]);
  645. *nr = r;
  646. PF_ACPY(&pd->ndaddr, &naddr, pd->af);
  647. if (nport)
  648. pd->ndport = nport;
  649. }
  650. return (0);
  651. }
  652. #ifdef INET6
  653. int
  654. pf_get_transaddr_af(struct pf_rule *r, struct pf_pdesc *pd,
  655. struct pf_src_node **sns)
  656. {
  657. struct pf_addr ndaddr, nsaddr, naddr;
  658. u_int16_t nport = 0;
  659. int prefixlen = 96;
  660. if (pf_status.debug >= LOG_NOTICE) {
  661. log(LOG_NOTICE, "pf: af-to %s %s, ",
  662. pd->naf == AF_INET ? "inet" : "inet6",
  663. r->rdr.addr.type == PF_ADDR_NONE ? "nat" : "rdr");
  664. pf_print_host(&pd->nsaddr, pd->nsport, pd->af);
  665. addlog(" -> ");
  666. pf_print_host(&pd->ndaddr, pd->ndport, pd->af);
  667. addlog("\n");
  668. }
  669. if (r->nat.addr.type == PF_ADDR_NONE)
  670. panic("pf_get_transaddr_af: no nat pool for source address");
  671. /* get source address and port */
  672. if (pf_get_sport(pd, r, &nsaddr, &nport,
  673. r->nat.proxy_port[0], r->nat.proxy_port[1], sns)) {
  674. DPFPRINTF(LOG_NOTICE,
  675. "pf: af-to NAT proxy port allocation (%u-%u) failed",
  676. r->nat.proxy_port[0],
  677. r->nat.proxy_port[1]);
  678. return (-1);
  679. }
  680. pd->nsport = nport;
  681. if (pd->proto == IPPROTO_ICMPV6 && pd->naf == AF_INET) {
  682. if (pd->dir == PF_IN) {
  683. pd->ndport = ntohs(pd->ndport);
  684. if (pd->ndport == ICMP6_ECHO_REQUEST)
  685. pd->ndport = ICMP_ECHO;
  686. else if (pd->ndport == ICMP6_ECHO_REPLY)
  687. pd->ndport = ICMP_ECHOREPLY;
  688. pd->ndport = htons(pd->ndport);
  689. } else {
  690. pd->nsport = ntohs(pd->nsport);
  691. if (pd->nsport == ICMP6_ECHO_REQUEST)
  692. pd->nsport = ICMP_ECHO;
  693. else if (pd->nsport == ICMP6_ECHO_REPLY)
  694. pd->nsport = ICMP_ECHOREPLY;
  695. pd->nsport = htons(pd->nsport);
  696. }
  697. } else if (pd->proto == IPPROTO_ICMP && pd->naf == AF_INET6) {
  698. if (pd->dir == PF_IN) {
  699. pd->ndport = ntohs(pd->ndport);
  700. if (pd->ndport == ICMP_ECHO)
  701. pd->ndport = ICMP6_ECHO_REQUEST;
  702. else if (pd->ndport == ICMP_ECHOREPLY)
  703. pd->ndport = ICMP6_ECHO_REPLY;
  704. pd->ndport = htons(pd->ndport);
  705. } else {
  706. pd->nsport = ntohs(pd->nsport);
  707. if (pd->nsport == ICMP_ECHO)
  708. pd->nsport = ICMP6_ECHO_REQUEST;
  709. else if (pd->nsport == ICMP_ECHOREPLY)
  710. pd->nsport = ICMP6_ECHO_REPLY;
  711. pd->nsport = htons(pd->nsport);
  712. }
  713. }
  714. /* get the destination address and port */
  715. if (r->rdr.addr.type != PF_ADDR_NONE) {
  716. if (pf_map_addr(pd->naf, r, &nsaddr, &naddr, NULL, sns,
  717. &r->rdr, PF_SN_RDR))
  718. return (-1);
  719. if (r->rdr.proxy_port[0])
  720. pd->ndport = htons(r->rdr.proxy_port[0]);
  721. if (pd->naf == AF_INET) {
  722. /* The prefix is the IPv4 rdr address */
  723. prefixlen = in_mask2len((struct in_addr *)
  724. &r->rdr.addr.v.a.mask);
  725. inet_nat46(pd->naf, &pd->ndaddr,
  726. &ndaddr, &naddr, prefixlen);
  727. } else {
  728. /* The prefix is the IPv6 rdr address */
  729. prefixlen =
  730. in6_mask2len((struct in6_addr *)
  731. &r->rdr.addr.v.a.mask, NULL);
  732. inet_nat64(pd->naf, &pd->ndaddr,
  733. &ndaddr, &naddr, prefixlen);
  734. }
  735. } else {
  736. if (pd->naf == AF_INET) {
  737. /* The prefix is the IPv6 dst address */
  738. prefixlen =
  739. in6_mask2len((struct in6_addr *)
  740. &r->dst.addr.v.a.mask, NULL);
  741. if (prefixlen < 32)
  742. prefixlen = 96;
  743. inet_nat64(pd->naf, &pd->ndaddr,
  744. &ndaddr, &pd->ndaddr, prefixlen);
  745. } else {
  746. /*
  747. * The prefix is the IPv6 nat address
  748. * (that was stored in pd->nsaddr)
  749. */
  750. prefixlen = in6_mask2len((struct in6_addr *)
  751. &r->nat.addr.v.a.mask, NULL);
  752. if (prefixlen > 96)
  753. prefixlen = 96;
  754. inet_nat64(pd->naf, &pd->ndaddr,
  755. &ndaddr, &nsaddr, prefixlen);
  756. }
  757. }
  758. PF_ACPY(&pd->nsaddr, &nsaddr, pd->naf);
  759. PF_ACPY(&pd->ndaddr, &ndaddr, pd->naf);
  760. if (pf_status.debug >= LOG_NOTICE) {
  761. log(LOG_NOTICE, "pf: af-to %s %s done, prefixlen %d, ",
  762. pd->naf == AF_INET ? "inet" : "inet6",
  763. r->rdr.addr.type == PF_ADDR_NONE ? "nat" : "rdr",
  764. prefixlen);
  765. pf_print_host(&pd->nsaddr, pd->nsport, pd->naf);
  766. addlog(" -> ");
  767. pf_print_host(&pd->ndaddr, pd->ndport, pd->naf);
  768. addlog("\n");
  769. }
  770. return (0);
  771. }
  772. #endif /* INET6 */
  773. int
  774. pf_postprocess_addr(struct pf_state *cur)
  775. {
  776. struct pf_rule *nr;
  777. struct pf_state_key *sks;
  778. struct pf_pool rpool;
  779. struct pf_addr lookup_addr;
  780. int slbcount = -1;
  781. nr = cur->natrule.ptr;
  782. if (nr == NULL)
  783. return (0);
  784. /* decrease counter */
  785. sks = cur ? cur->key[PF_SK_STACK] : NULL;
  786. /* check for outgoing or ingoing balancing */
  787. if (nr->rt == PF_ROUTETO)
  788. lookup_addr = cur->rt_addr;
  789. else if (sks != NULL)
  790. lookup_addr = sks->addr[1];
  791. else {
  792. if (pf_status.debug >= LOG_DEBUG) {
  793. log(LOG_DEBUG, "pf: %s: unable to obtain address",
  794. __func__);
  795. }
  796. return (1);
  797. }
  798. /* check for appropriate pool */
  799. if (nr->rdr.addr.type != PF_ADDR_NONE)
  800. rpool = nr->rdr;
  801. else if (nr->nat.addr.type != PF_ADDR_NONE)
  802. rpool = nr->nat;
  803. else if (nr->route.addr.type != PF_ADDR_NONE)
  804. rpool = nr->route;
  805. else
  806. return (0);
  807. if (((rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_LEASTSTATES))
  808. return (0);
  809. if (rpool.addr.type == PF_ADDR_TABLE) {
  810. if ((slbcount = pfr_states_decrease(
  811. rpool.addr.p.tbl,
  812. &lookup_addr, sks->af)) == -1) {
  813. if (pf_status.debug >= LOG_DEBUG) {
  814. log(LOG_DEBUG, "pf: %s: selected address ",
  815. __func__);
  816. pf_print_host(&lookup_addr,
  817. sks->port[0], sks->af);
  818. addlog(". Failed to "
  819. "decrease count!\n");
  820. }
  821. return (1);
  822. }
  823. } else if (rpool.addr.type == PF_ADDR_DYNIFTL) {
  824. if ((slbcount = pfr_states_decrease(
  825. rpool.addr.p.dyn->pfid_kt,
  826. &lookup_addr, sks->af)) == -1) {
  827. if (pf_status.debug >= LOG_DEBUG) {
  828. log(LOG_DEBUG, "pf: %s: selected address ",
  829. __func__);
  830. pf_print_host(&lookup_addr,
  831. sks->port[0], sks->af);
  832. addlog(". Failed to "
  833. "decrease count!\n");
  834. }
  835. return (1);
  836. }
  837. }
  838. if (slbcount > -1) {
  839. if (pf_status.debug >= LOG_NOTICE) {
  840. log(LOG_NOTICE, "pf: %s: selected address ", __func__);
  841. pf_print_host(&lookup_addr, sks->port[0],
  842. sks->af);
  843. addlog(" decreased state count to %u\n",
  844. slbcount);
  845. }
  846. }
  847. return (0);
  848. }