server.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693
  1. /* AFS server record management
  2. *
  3. * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved.
  4. * Written by David Howells (dhowells@redhat.com)
  5. *
  6. * This program is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU General Public License
  8. * as published by the Free Software Foundation; either version
  9. * 2 of the License, or (at your option) any later version.
  10. */
  11. #include <linux/sched.h>
  12. #include <linux/slab.h>
  13. #include "afs_fs.h"
  14. #include "internal.h"
  15. static unsigned afs_server_gc_delay = 10; /* Server record timeout in seconds */
  16. static unsigned afs_server_update_delay = 30; /* Time till VLDB recheck in secs */
  17. static void afs_inc_servers_outstanding(struct afs_net *net)
  18. {
  19. atomic_inc(&net->servers_outstanding);
  20. }
  21. static void afs_dec_servers_outstanding(struct afs_net *net)
  22. {
  23. if (atomic_dec_and_test(&net->servers_outstanding))
  24. wake_up_var(&net->servers_outstanding);
  25. }
  26. /*
  27. * Find a server by one of its addresses.
  28. */
  29. struct afs_server *afs_find_server(struct afs_net *net,
  30. const struct sockaddr_rxrpc *srx)
  31. {
  32. const struct afs_addr_list *alist;
  33. struct afs_server *server = NULL;
  34. unsigned int i;
  35. int seq = 0, diff;
  36. rcu_read_lock();
  37. do {
  38. if (server)
  39. afs_put_server(net, server);
  40. server = NULL;
  41. read_seqbegin_or_lock(&net->fs_addr_lock, &seq);
  42. if (srx->transport.family == AF_INET6) {
  43. const struct sockaddr_in6 *a = &srx->transport.sin6, *b;
  44. hlist_for_each_entry_rcu(server, &net->fs_addresses6, addr6_link) {
  45. alist = rcu_dereference(server->addresses);
  46. for (i = alist->nr_ipv4; i < alist->nr_addrs; i++) {
  47. b = &alist->addrs[i].transport.sin6;
  48. diff = ((u16 __force)a->sin6_port -
  49. (u16 __force)b->sin6_port);
  50. if (diff == 0)
  51. diff = memcmp(&a->sin6_addr,
  52. &b->sin6_addr,
  53. sizeof(struct in6_addr));
  54. if (diff == 0)
  55. goto found;
  56. }
  57. }
  58. } else {
  59. const struct sockaddr_in *a = &srx->transport.sin, *b;
  60. hlist_for_each_entry_rcu(server, &net->fs_addresses4, addr4_link) {
  61. alist = rcu_dereference(server->addresses);
  62. for (i = 0; i < alist->nr_ipv4; i++) {
  63. b = &alist->addrs[i].transport.sin;
  64. diff = ((u16 __force)a->sin_port -
  65. (u16 __force)b->sin_port);
  66. if (diff == 0)
  67. diff = ((u32 __force)a->sin_addr.s_addr -
  68. (u32 __force)b->sin_addr.s_addr);
  69. if (diff == 0)
  70. goto found;
  71. }
  72. }
  73. }
  74. server = NULL;
  75. found:
  76. if (server && !atomic_inc_not_zero(&server->usage))
  77. server = NULL;
  78. } while (need_seqretry(&net->fs_addr_lock, seq));
  79. done_seqretry(&net->fs_addr_lock, seq);
  80. rcu_read_unlock();
  81. return server;
  82. }
  83. /*
  84. * Look up a server by its UUID
  85. */
  86. struct afs_server *afs_find_server_by_uuid(struct afs_net *net, const uuid_t *uuid)
  87. {
  88. struct afs_server *server = NULL;
  89. struct rb_node *p;
  90. int diff, seq = 0;
  91. _enter("%pU", uuid);
  92. do {
  93. /* Unfortunately, rbtree walking doesn't give reliable results
  94. * under just the RCU read lock, so we have to check for
  95. * changes.
  96. */
  97. if (server)
  98. afs_put_server(net, server);
  99. server = NULL;
  100. read_seqbegin_or_lock(&net->fs_lock, &seq);
  101. p = net->fs_servers.rb_node;
  102. while (p) {
  103. server = rb_entry(p, struct afs_server, uuid_rb);
  104. diff = memcmp(uuid, &server->uuid, sizeof(*uuid));
  105. if (diff < 0) {
  106. p = p->rb_left;
  107. } else if (diff > 0) {
  108. p = p->rb_right;
  109. } else {
  110. afs_get_server(server);
  111. break;
  112. }
  113. server = NULL;
  114. }
  115. } while (need_seqretry(&net->fs_lock, seq));
  116. done_seqretry(&net->fs_lock, seq);
  117. _leave(" = %p", server);
  118. return server;
  119. }
  120. /*
  121. * Install a server record in the namespace tree
  122. */
  123. static struct afs_server *afs_install_server(struct afs_net *net,
  124. struct afs_server *candidate)
  125. {
  126. const struct afs_addr_list *alist;
  127. struct afs_server *server;
  128. struct rb_node **pp, *p;
  129. int ret = -EEXIST, diff;
  130. _enter("%p", candidate);
  131. write_seqlock(&net->fs_lock);
  132. /* Firstly install the server in the UUID lookup tree */
  133. pp = &net->fs_servers.rb_node;
  134. p = NULL;
  135. while (*pp) {
  136. p = *pp;
  137. _debug("- consider %p", p);
  138. server = rb_entry(p, struct afs_server, uuid_rb);
  139. diff = memcmp(&candidate->uuid, &server->uuid, sizeof(uuid_t));
  140. if (diff < 0)
  141. pp = &(*pp)->rb_left;
  142. else if (diff > 0)
  143. pp = &(*pp)->rb_right;
  144. else
  145. goto exists;
  146. }
  147. server = candidate;
  148. rb_link_node(&server->uuid_rb, p, pp);
  149. rb_insert_color(&server->uuid_rb, &net->fs_servers);
  150. hlist_add_head_rcu(&server->proc_link, &net->fs_proc);
  151. write_seqlock(&net->fs_addr_lock);
  152. alist = rcu_dereference_protected(server->addresses,
  153. lockdep_is_held(&net->fs_addr_lock.lock));
  154. /* Secondly, if the server has any IPv4 and/or IPv6 addresses, install
  155. * it in the IPv4 and/or IPv6 reverse-map lists.
  156. *
  157. * TODO: For speed we want to use something other than a flat list
  158. * here; even sorting the list in terms of lowest address would help a
  159. * bit, but anything we might want to do gets messy and memory
  160. * intensive.
  161. */
  162. if (alist->nr_ipv4 > 0)
  163. hlist_add_head_rcu(&server->addr4_link, &net->fs_addresses4);
  164. if (alist->nr_addrs > alist->nr_ipv4)
  165. hlist_add_head_rcu(&server->addr6_link, &net->fs_addresses6);
  166. write_sequnlock(&net->fs_addr_lock);
  167. ret = 0;
  168. exists:
  169. afs_get_server(server);
  170. write_sequnlock(&net->fs_lock);
  171. return server;
  172. }
  173. /*
  174. * allocate a new server record
  175. */
  176. static struct afs_server *afs_alloc_server(struct afs_net *net,
  177. const uuid_t *uuid,
  178. struct afs_addr_list *alist)
  179. {
  180. struct afs_server *server;
  181. _enter("");
  182. server = kzalloc(sizeof(struct afs_server), GFP_KERNEL);
  183. if (!server)
  184. goto enomem;
  185. atomic_set(&server->usage, 1);
  186. RCU_INIT_POINTER(server->addresses, alist);
  187. server->addr_version = alist->version;
  188. server->uuid = *uuid;
  189. server->flags = (1UL << AFS_SERVER_FL_NEW);
  190. server->update_at = ktime_get_real_seconds() + afs_server_update_delay;
  191. rwlock_init(&server->fs_lock);
  192. INIT_HLIST_HEAD(&server->cb_volumes);
  193. rwlock_init(&server->cb_break_lock);
  194. afs_inc_servers_outstanding(net);
  195. _leave(" = %p", server);
  196. return server;
  197. enomem:
  198. _leave(" = NULL [nomem]");
  199. return NULL;
  200. }
  201. /*
  202. * Look up an address record for a server
  203. */
  204. static struct afs_addr_list *afs_vl_lookup_addrs(struct afs_cell *cell,
  205. struct key *key, const uuid_t *uuid)
  206. {
  207. struct afs_addr_cursor ac;
  208. struct afs_addr_list *alist;
  209. int ret;
  210. ret = afs_set_vl_cursor(&ac, cell);
  211. if (ret < 0)
  212. return ERR_PTR(ret);
  213. while (afs_iterate_addresses(&ac)) {
  214. if (test_bit(ac.index, &ac.alist->yfs))
  215. alist = afs_yfsvl_get_endpoints(cell->net, &ac, key, uuid);
  216. else
  217. alist = afs_vl_get_addrs_u(cell->net, &ac, key, uuid);
  218. switch (ac.error) {
  219. case 0:
  220. afs_end_cursor(&ac);
  221. return alist;
  222. case -ECONNABORTED:
  223. ac.error = afs_abort_to_error(ac.abort_code);
  224. goto error;
  225. case -ENOMEM:
  226. case -ENONET:
  227. goto error;
  228. case -ENETUNREACH:
  229. case -EHOSTUNREACH:
  230. case -ECONNREFUSED:
  231. break;
  232. default:
  233. ac.error = -EIO;
  234. goto error;
  235. }
  236. }
  237. error:
  238. return ERR_PTR(afs_end_cursor(&ac));
  239. }
  240. /*
  241. * Get or create a fileserver record.
  242. */
  243. struct afs_server *afs_lookup_server(struct afs_cell *cell, struct key *key,
  244. const uuid_t *uuid)
  245. {
  246. struct afs_addr_list *alist;
  247. struct afs_server *server, *candidate;
  248. _enter("%p,%pU", cell->net, uuid);
  249. server = afs_find_server_by_uuid(cell->net, uuid);
  250. if (server)
  251. return server;
  252. alist = afs_vl_lookup_addrs(cell, key, uuid);
  253. if (IS_ERR(alist))
  254. return ERR_CAST(alist);
  255. candidate = afs_alloc_server(cell->net, uuid, alist);
  256. if (!candidate) {
  257. afs_put_addrlist(alist);
  258. return ERR_PTR(-ENOMEM);
  259. }
  260. server = afs_install_server(cell->net, candidate);
  261. if (server != candidate) {
  262. afs_put_addrlist(alist);
  263. kfree(candidate);
  264. }
  265. _leave(" = %p{%d}", server, atomic_read(&server->usage));
  266. return server;
  267. }
  268. /*
  269. * Set the server timer to fire after a given delay, assuming it's not already
  270. * set for an earlier time.
  271. */
  272. static void afs_set_server_timer(struct afs_net *net, time64_t delay)
  273. {
  274. if (net->live) {
  275. afs_inc_servers_outstanding(net);
  276. if (timer_reduce(&net->fs_timer, jiffies + delay * HZ))
  277. afs_dec_servers_outstanding(net);
  278. }
  279. }
  280. /*
  281. * Server management timer. We have an increment on fs_outstanding that we
  282. * need to pass along to the work item.
  283. */
  284. void afs_servers_timer(struct timer_list *timer)
  285. {
  286. struct afs_net *net = container_of(timer, struct afs_net, fs_timer);
  287. _enter("");
  288. if (!queue_work(afs_wq, &net->fs_manager))
  289. afs_dec_servers_outstanding(net);
  290. }
  291. /*
  292. * Release a reference on a server record.
  293. */
  294. void afs_put_server(struct afs_net *net, struct afs_server *server)
  295. {
  296. unsigned int usage;
  297. if (!server)
  298. return;
  299. server->put_time = ktime_get_real_seconds();
  300. usage = atomic_dec_return(&server->usage);
  301. _enter("{%u}", usage);
  302. if (likely(usage > 0))
  303. return;
  304. afs_set_server_timer(net, afs_server_gc_delay);
  305. }
  306. static void afs_server_rcu(struct rcu_head *rcu)
  307. {
  308. struct afs_server *server = container_of(rcu, struct afs_server, rcu);
  309. afs_put_addrlist(rcu_access_pointer(server->addresses));
  310. kfree(server);
  311. }
  312. /*
  313. * destroy a dead server
  314. */
  315. static void afs_destroy_server(struct afs_net *net, struct afs_server *server)
  316. {
  317. struct afs_addr_list *alist = rcu_access_pointer(server->addresses);
  318. struct afs_addr_cursor ac = {
  319. .alist = alist,
  320. .start = alist->index,
  321. .index = 0,
  322. .addr = &alist->addrs[alist->index],
  323. .error = 0,
  324. };
  325. _enter("%p", server);
  326. if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags))
  327. afs_fs_give_up_all_callbacks(net, server, &ac, NULL);
  328. call_rcu(&server->rcu, afs_server_rcu);
  329. afs_dec_servers_outstanding(net);
  330. }
  331. /*
  332. * Garbage collect any expired servers.
  333. */
  334. static void afs_gc_servers(struct afs_net *net, struct afs_server *gc_list)
  335. {
  336. struct afs_server *server;
  337. bool deleted;
  338. int usage;
  339. while ((server = gc_list)) {
  340. gc_list = server->gc_next;
  341. write_seqlock(&net->fs_lock);
  342. usage = 1;
  343. deleted = atomic_try_cmpxchg(&server->usage, &usage, 0);
  344. if (deleted) {
  345. rb_erase(&server->uuid_rb, &net->fs_servers);
  346. hlist_del_rcu(&server->proc_link);
  347. }
  348. write_sequnlock(&net->fs_lock);
  349. if (deleted) {
  350. write_seqlock(&net->fs_addr_lock);
  351. if (!hlist_unhashed(&server->addr4_link))
  352. hlist_del_rcu(&server->addr4_link);
  353. if (!hlist_unhashed(&server->addr6_link))
  354. hlist_del_rcu(&server->addr6_link);
  355. write_sequnlock(&net->fs_addr_lock);
  356. afs_destroy_server(net, server);
  357. }
  358. }
  359. }
  360. /*
  361. * Manage the records of servers known to be within a network namespace. This
  362. * includes garbage collecting unused servers.
  363. *
  364. * Note also that we were given an increment on net->servers_outstanding by
  365. * whoever queued us that we need to deal with before returning.
  366. */
  367. void afs_manage_servers(struct work_struct *work)
  368. {
  369. struct afs_net *net = container_of(work, struct afs_net, fs_manager);
  370. struct afs_server *gc_list = NULL;
  371. struct rb_node *cursor;
  372. time64_t now = ktime_get_real_seconds(), next_manage = TIME64_MAX;
  373. bool purging = !net->live;
  374. _enter("");
  375. /* Trawl the server list looking for servers that have expired from
  376. * lack of use.
  377. */
  378. read_seqlock_excl(&net->fs_lock);
  379. for (cursor = rb_first(&net->fs_servers); cursor; cursor = rb_next(cursor)) {
  380. struct afs_server *server =
  381. rb_entry(cursor, struct afs_server, uuid_rb);
  382. int usage = atomic_read(&server->usage);
  383. _debug("manage %pU %u", &server->uuid, usage);
  384. ASSERTCMP(usage, >=, 1);
  385. ASSERTIFCMP(purging, usage, ==, 1);
  386. if (usage == 1) {
  387. time64_t expire_at = server->put_time;
  388. if (!test_bit(AFS_SERVER_FL_VL_FAIL, &server->flags) &&
  389. !test_bit(AFS_SERVER_FL_NOT_FOUND, &server->flags))
  390. expire_at += afs_server_gc_delay;
  391. if (purging || expire_at <= now) {
  392. server->gc_next = gc_list;
  393. gc_list = server;
  394. } else if (expire_at < next_manage) {
  395. next_manage = expire_at;
  396. }
  397. }
  398. }
  399. read_sequnlock_excl(&net->fs_lock);
  400. /* Update the timer on the way out. We have to pass an increment on
  401. * servers_outstanding in the namespace that we are in to the timer or
  402. * the work scheduler.
  403. */
  404. if (!purging && next_manage < TIME64_MAX) {
  405. now = ktime_get_real_seconds();
  406. if (next_manage - now <= 0) {
  407. if (queue_work(afs_wq, &net->fs_manager))
  408. afs_inc_servers_outstanding(net);
  409. } else {
  410. afs_set_server_timer(net, next_manage - now);
  411. }
  412. }
  413. afs_gc_servers(net, gc_list);
  414. afs_dec_servers_outstanding(net);
  415. _leave(" [%d]", atomic_read(&net->servers_outstanding));
  416. }
  417. static void afs_queue_server_manager(struct afs_net *net)
  418. {
  419. afs_inc_servers_outstanding(net);
  420. if (!queue_work(afs_wq, &net->fs_manager))
  421. afs_dec_servers_outstanding(net);
  422. }
  423. /*
  424. * Purge list of servers.
  425. */
  426. void afs_purge_servers(struct afs_net *net)
  427. {
  428. _enter("");
  429. if (del_timer_sync(&net->fs_timer))
  430. atomic_dec(&net->servers_outstanding);
  431. afs_queue_server_manager(net);
  432. _debug("wait");
  433. wait_var_event(&net->servers_outstanding,
  434. !atomic_read(&net->servers_outstanding));
  435. _leave("");
  436. }
  437. /*
  438. * Probe a fileserver to find its capabilities.
  439. *
  440. * TODO: Try service upgrade.
  441. */
  442. static bool afs_do_probe_fileserver(struct afs_fs_cursor *fc)
  443. {
  444. _enter("");
  445. fc->ac.addr = NULL;
  446. fc->ac.start = READ_ONCE(fc->ac.alist->index);
  447. fc->ac.index = fc->ac.start;
  448. fc->ac.error = 0;
  449. fc->ac.begun = false;
  450. while (afs_iterate_addresses(&fc->ac)) {
  451. afs_fs_get_capabilities(afs_v2net(fc->vnode), fc->cbi->server,
  452. &fc->ac, fc->key);
  453. switch (fc->ac.error) {
  454. case 0:
  455. afs_end_cursor(&fc->ac);
  456. set_bit(AFS_SERVER_FL_PROBED, &fc->cbi->server->flags);
  457. return true;
  458. case -ECONNABORTED:
  459. fc->ac.error = afs_abort_to_error(fc->ac.abort_code);
  460. goto error;
  461. case -ENOMEM:
  462. case -ENONET:
  463. goto error;
  464. case -ENETUNREACH:
  465. case -EHOSTUNREACH:
  466. case -ECONNREFUSED:
  467. case -ETIMEDOUT:
  468. case -ETIME:
  469. break;
  470. default:
  471. fc->ac.error = -EIO;
  472. goto error;
  473. }
  474. }
  475. error:
  476. afs_end_cursor(&fc->ac);
  477. return false;
  478. }
  479. /*
  480. * If we haven't already, try probing the fileserver to get its capabilities.
  481. * We try not to instigate parallel probes, but it's possible that the parallel
  482. * probes will fail due to authentication failure when ours would succeed.
  483. *
  484. * TODO: Try sending an anonymous probe if an authenticated probe fails.
  485. */
  486. bool afs_probe_fileserver(struct afs_fs_cursor *fc)
  487. {
  488. bool success;
  489. int ret, retries = 0;
  490. _enter("");
  491. retry:
  492. if (test_bit(AFS_SERVER_FL_PROBED, &fc->cbi->server->flags)) {
  493. _leave(" = t");
  494. return true;
  495. }
  496. if (!test_and_set_bit_lock(AFS_SERVER_FL_PROBING, &fc->cbi->server->flags)) {
  497. success = afs_do_probe_fileserver(fc);
  498. clear_bit_unlock(AFS_SERVER_FL_PROBING, &fc->cbi->server->flags);
  499. wake_up_bit(&fc->cbi->server->flags, AFS_SERVER_FL_PROBING);
  500. _leave(" = t");
  501. return success;
  502. }
  503. _debug("wait");
  504. ret = wait_on_bit(&fc->cbi->server->flags, AFS_SERVER_FL_PROBING,
  505. TASK_INTERRUPTIBLE);
  506. if (ret == -ERESTARTSYS) {
  507. fc->ac.error = ret;
  508. _leave(" = f [%d]", ret);
  509. return false;
  510. }
  511. retries++;
  512. if (retries == 4) {
  513. fc->ac.error = -ESTALE;
  514. _leave(" = f [stale]");
  515. return false;
  516. }
  517. _debug("retry");
  518. goto retry;
  519. }
  520. /*
  521. * Get an update for a server's address list.
  522. */
  523. static noinline bool afs_update_server_record(struct afs_fs_cursor *fc, struct afs_server *server)
  524. {
  525. struct afs_addr_list *alist, *discard;
  526. _enter("");
  527. alist = afs_vl_lookup_addrs(fc->vnode->volume->cell, fc->key,
  528. &server->uuid);
  529. if (IS_ERR(alist)) {
  530. fc->ac.error = PTR_ERR(alist);
  531. _leave(" = f [%d]", fc->ac.error);
  532. return false;
  533. }
  534. discard = alist;
  535. if (server->addr_version != alist->version) {
  536. write_lock(&server->fs_lock);
  537. discard = rcu_dereference_protected(server->addresses,
  538. lockdep_is_held(&server->fs_lock));
  539. rcu_assign_pointer(server->addresses, alist);
  540. server->addr_version = alist->version;
  541. write_unlock(&server->fs_lock);
  542. }
  543. server->update_at = ktime_get_real_seconds() + afs_server_update_delay;
  544. afs_put_addrlist(discard);
  545. _leave(" = t");
  546. return true;
  547. }
  548. /*
  549. * See if a server's address list needs updating.
  550. */
  551. bool afs_check_server_record(struct afs_fs_cursor *fc, struct afs_server *server)
  552. {
  553. time64_t now = ktime_get_real_seconds();
  554. long diff;
  555. bool success;
  556. int ret, retries = 0;
  557. _enter("");
  558. ASSERT(server);
  559. retry:
  560. diff = READ_ONCE(server->update_at) - now;
  561. if (diff > 0) {
  562. _leave(" = t [not now %ld]", diff);
  563. return true;
  564. }
  565. if (!test_and_set_bit_lock(AFS_SERVER_FL_UPDATING, &server->flags)) {
  566. success = afs_update_server_record(fc, server);
  567. clear_bit_unlock(AFS_SERVER_FL_UPDATING, &server->flags);
  568. wake_up_bit(&server->flags, AFS_SERVER_FL_UPDATING);
  569. _leave(" = %d", success);
  570. return success;
  571. }
  572. ret = wait_on_bit(&server->flags, AFS_SERVER_FL_UPDATING,
  573. TASK_INTERRUPTIBLE);
  574. if (ret == -ERESTARTSYS) {
  575. fc->ac.error = ret;
  576. _leave(" = f [intr]");
  577. return false;
  578. }
  579. retries++;
  580. if (retries == 4) {
  581. _leave(" = f [stale]");
  582. ret = -ESTALE;
  583. return false;
  584. }
  585. goto retry;
  586. }