rotate.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529
  1. /* Handle fileserver selection and rotation.
  2. *
  3. * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
  4. * Written by David Howells (dhowells@redhat.com)
  5. *
  6. * This program is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU General Public Licence
  8. * as published by the Free Software Foundation; either version
  9. * 2 of the Licence, or (at your option) any later version.
  10. */
  11. #include <linux/kernel.h>
  12. #include <linux/slab.h>
  13. #include <linux/fs.h>
  14. #include <linux/sched.h>
  15. #include <linux/delay.h>
  16. #include <linux/sched/signal.h>
  17. #include "internal.h"
  18. #include "afs_fs.h"
  19. /*
  20. * Initialise a filesystem server cursor for iterating over FS servers.
  21. */
  22. static void afs_init_fs_cursor(struct afs_fs_cursor *fc, struct afs_vnode *vnode)
  23. {
  24. memset(fc, 0, sizeof(*fc));
  25. }
  26. /*
  27. * Begin an operation on the fileserver.
  28. *
  29. * Fileserver operations are serialised on the server by vnode, so we serialise
  30. * them here also using the io_lock.
  31. */
  32. bool afs_begin_vnode_operation(struct afs_fs_cursor *fc, struct afs_vnode *vnode,
  33. struct key *key)
  34. {
  35. afs_init_fs_cursor(fc, vnode);
  36. fc->vnode = vnode;
  37. fc->key = key;
  38. fc->ac.error = SHRT_MAX;
  39. if (mutex_lock_interruptible(&vnode->io_lock) < 0) {
  40. fc->ac.error = -EINTR;
  41. fc->flags |= AFS_FS_CURSOR_STOP;
  42. return false;
  43. }
  44. if (vnode->lock_state != AFS_VNODE_LOCK_NONE)
  45. fc->flags |= AFS_FS_CURSOR_CUR_ONLY;
  46. return true;
  47. }
  48. /*
  49. * Begin iteration through a server list, starting with the vnode's last used
  50. * server if possible, or the last recorded good server if not.
  51. */
  52. static bool afs_start_fs_iteration(struct afs_fs_cursor *fc,
  53. struct afs_vnode *vnode)
  54. {
  55. struct afs_cb_interest *cbi;
  56. int i;
  57. read_lock(&vnode->volume->servers_lock);
  58. fc->server_list = afs_get_serverlist(vnode->volume->servers);
  59. read_unlock(&vnode->volume->servers_lock);
  60. cbi = vnode->cb_interest;
  61. if (cbi) {
  62. /* See if the vnode's preferred record is still available */
  63. for (i = 0; i < fc->server_list->nr_servers; i++) {
  64. if (fc->server_list->servers[i].cb_interest == cbi) {
  65. fc->start = i;
  66. goto found_interest;
  67. }
  68. }
  69. /* If we have a lock outstanding on a server that's no longer
  70. * serving this vnode, then we can't switch to another server
  71. * and have to return an error.
  72. */
  73. if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) {
  74. fc->ac.error = -ESTALE;
  75. return false;
  76. }
  77. /* Note that the callback promise is effectively broken */
  78. write_seqlock(&vnode->cb_lock);
  79. ASSERTCMP(cbi, ==, vnode->cb_interest);
  80. vnode->cb_interest = NULL;
  81. if (test_and_clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags))
  82. vnode->cb_break++;
  83. write_sequnlock(&vnode->cb_lock);
  84. afs_put_cb_interest(afs_v2net(vnode), cbi);
  85. cbi = NULL;
  86. } else {
  87. fc->start = READ_ONCE(fc->server_list->index);
  88. }
  89. found_interest:
  90. fc->index = fc->start;
  91. return true;
  92. }
  93. /*
  94. * Post volume busy note.
  95. */
  96. static void afs_busy(struct afs_volume *volume, u32 abort_code)
  97. {
  98. const char *m;
  99. switch (abort_code) {
  100. case VOFFLINE: m = "offline"; break;
  101. case VRESTARTING: m = "restarting"; break;
  102. case VSALVAGING: m = "being salvaged"; break;
  103. default: m = "busy"; break;
  104. }
  105. pr_notice("kAFS: Volume %u '%s' is %s\n", volume->vid, volume->name, m);
  106. }
  107. /*
  108. * Sleep and retry the operation to the same fileserver.
  109. */
  110. static bool afs_sleep_and_retry(struct afs_fs_cursor *fc)
  111. {
  112. msleep_interruptible(1000);
  113. if (signal_pending(current)) {
  114. fc->ac.error = -ERESTARTSYS;
  115. return false;
  116. }
  117. return true;
  118. }
  119. /*
  120. * Select the fileserver to use. May be called multiple times to rotate
  121. * through the fileservers.
  122. */
  123. bool afs_select_fileserver(struct afs_fs_cursor *fc)
  124. {
  125. struct afs_addr_list *alist;
  126. struct afs_server *server;
  127. struct afs_vnode *vnode = fc->vnode;
  128. _enter("%u/%u,%u/%u,%d,%d",
  129. fc->index, fc->start,
  130. fc->ac.index, fc->ac.start,
  131. fc->ac.error, fc->ac.abort_code);
  132. if (fc->flags & AFS_FS_CURSOR_STOP) {
  133. _leave(" = f [stopped]");
  134. return false;
  135. }
  136. /* Evaluate the result of the previous operation, if there was one. */
  137. switch (fc->ac.error) {
  138. case SHRT_MAX:
  139. goto start;
  140. case 0:
  141. default:
  142. /* Success or local failure. Stop. */
  143. fc->flags |= AFS_FS_CURSOR_STOP;
  144. _leave(" = f [okay/local %d]", fc->ac.error);
  145. return false;
  146. case -ECONNABORTED:
  147. /* The far side rejected the operation on some grounds. This
  148. * might involve the server being busy or the volume having been moved.
  149. */
  150. switch (fc->ac.abort_code) {
  151. case VNOVOL:
  152. /* This fileserver doesn't know about the volume.
  153. * - May indicate that the VL is wrong - retry once and compare
  154. * the results.
  155. * - May indicate that the fileserver couldn't attach to the vol.
  156. */
  157. if (fc->flags & AFS_FS_CURSOR_VNOVOL) {
  158. fc->ac.error = -EREMOTEIO;
  159. goto next_server;
  160. }
  161. write_lock(&vnode->volume->servers_lock);
  162. fc->server_list->vnovol_mask |= 1 << fc->index;
  163. write_unlock(&vnode->volume->servers_lock);
  164. set_bit(AFS_VOLUME_NEEDS_UPDATE, &vnode->volume->flags);
  165. fc->ac.error = afs_check_volume_status(vnode->volume, fc->key);
  166. if (fc->ac.error < 0)
  167. goto failed;
  168. if (test_bit(AFS_VOLUME_DELETED, &vnode->volume->flags)) {
  169. fc->ac.error = -ENOMEDIUM;
  170. goto failed;
  171. }
  172. /* If the server list didn't change, then assume that
  173. * it's the fileserver having trouble.
  174. */
  175. if (vnode->volume->servers == fc->server_list) {
  176. fc->ac.error = -EREMOTEIO;
  177. goto next_server;
  178. }
  179. /* Try again */
  180. fc->flags |= AFS_FS_CURSOR_VNOVOL;
  181. _leave(" = t [vnovol]");
  182. return true;
  183. case VSALVAGE: /* TODO: Should this return an error or iterate? */
  184. case VVOLEXISTS:
  185. case VNOSERVICE:
  186. case VONLINE:
  187. case VDISKFULL:
  188. case VOVERQUOTA:
  189. fc->ac.error = afs_abort_to_error(fc->ac.abort_code);
  190. goto next_server;
  191. case VOFFLINE:
  192. if (!test_and_set_bit(AFS_VOLUME_OFFLINE, &vnode->volume->flags)) {
  193. afs_busy(vnode->volume, fc->ac.abort_code);
  194. clear_bit(AFS_VOLUME_BUSY, &vnode->volume->flags);
  195. }
  196. if (fc->flags & AFS_FS_CURSOR_NO_VSLEEP) {
  197. fc->ac.error = -EADV;
  198. goto failed;
  199. }
  200. if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) {
  201. fc->ac.error = -ESTALE;
  202. goto failed;
  203. }
  204. goto busy;
  205. case VSALVAGING:
  206. case VRESTARTING:
  207. case VBUSY:
  208. /* Retry after going round all the servers unless we
  209. * have a file lock we need to maintain.
  210. */
  211. if (fc->flags & AFS_FS_CURSOR_NO_VSLEEP) {
  212. fc->ac.error = -EBUSY;
  213. goto failed;
  214. }
  215. if (!test_and_set_bit(AFS_VOLUME_BUSY, &vnode->volume->flags)) {
  216. afs_busy(vnode->volume, fc->ac.abort_code);
  217. clear_bit(AFS_VOLUME_OFFLINE, &vnode->volume->flags);
  218. }
  219. busy:
  220. if (fc->flags & AFS_FS_CURSOR_CUR_ONLY) {
  221. if (!afs_sleep_and_retry(fc))
  222. goto failed;
  223. /* Retry with same server & address */
  224. _leave(" = t [vbusy]");
  225. return true;
  226. }
  227. fc->flags |= AFS_FS_CURSOR_VBUSY;
  228. goto next_server;
  229. case VMOVED:
  230. /* The volume migrated to another server. We consider
  231. * consider all locks and callbacks broken and request
  232. * an update from the VLDB.
  233. *
  234. * We also limit the number of VMOVED hops we will
  235. * honour, just in case someone sets up a loop.
  236. */
  237. if (fc->flags & AFS_FS_CURSOR_VMOVED) {
  238. fc->ac.error = -EREMOTEIO;
  239. goto failed;
  240. }
  241. fc->flags |= AFS_FS_CURSOR_VMOVED;
  242. set_bit(AFS_VOLUME_WAIT, &vnode->volume->flags);
  243. set_bit(AFS_VOLUME_NEEDS_UPDATE, &vnode->volume->flags);
  244. fc->ac.error = afs_check_volume_status(vnode->volume, fc->key);
  245. if (fc->ac.error < 0)
  246. goto failed;
  247. /* If the server list didn't change, then the VLDB is
  248. * out of sync with the fileservers. This is hopefully
  249. * a temporary condition, however, so we don't want to
  250. * permanently block access to the file.
  251. *
  252. * TODO: Try other fileservers if we can.
  253. *
  254. * TODO: Retry a few times with sleeps.
  255. */
  256. if (vnode->volume->servers == fc->server_list) {
  257. fc->ac.error = -ENOMEDIUM;
  258. goto failed;
  259. }
  260. goto restart_from_beginning;
  261. default:
  262. clear_bit(AFS_VOLUME_OFFLINE, &vnode->volume->flags);
  263. clear_bit(AFS_VOLUME_BUSY, &vnode->volume->flags);
  264. fc->ac.error = afs_abort_to_error(fc->ac.abort_code);
  265. goto failed;
  266. }
  267. case -ENETUNREACH:
  268. case -EHOSTUNREACH:
  269. case -ECONNREFUSED:
  270. case -ETIMEDOUT:
  271. case -ETIME:
  272. _debug("no conn");
  273. goto iterate_address;
  274. case -ECONNRESET:
  275. _debug("call reset");
  276. goto failed;
  277. }
  278. restart_from_beginning:
  279. _debug("restart");
  280. afs_end_cursor(&fc->ac);
  281. afs_put_cb_interest(afs_v2net(vnode), fc->cbi);
  282. fc->cbi = NULL;
  283. afs_put_serverlist(afs_v2net(vnode), fc->server_list);
  284. fc->server_list = NULL;
  285. start:
  286. _debug("start");
  287. /* See if we need to do an update of the volume record. Note that the
  288. * volume may have moved or even have been deleted.
  289. */
  290. fc->ac.error = afs_check_volume_status(vnode->volume, fc->key);
  291. if (fc->ac.error < 0)
  292. goto failed;
  293. if (!afs_start_fs_iteration(fc, vnode))
  294. goto failed;
  295. use_server:
  296. _debug("use");
  297. /* We're starting on a different fileserver from the list. We need to
  298. * check it, create a callback intercept, find its address list and
  299. * probe its capabilities before we use it.
  300. */
  301. ASSERTCMP(fc->ac.alist, ==, NULL);
  302. server = fc->server_list->servers[fc->index].server;
  303. if (!afs_check_server_record(fc, server))
  304. goto failed;
  305. _debug("USING SERVER: %pU", &server->uuid);
  306. /* Make sure we've got a callback interest record for this server. We
  307. * have to link it in before we send the request as we can be sent a
  308. * break request before we've finished decoding the reply and
  309. * installing the vnode.
  310. */
  311. fc->ac.error = afs_register_server_cb_interest(vnode, fc->server_list,
  312. fc->index);
  313. if (fc->ac.error < 0)
  314. goto failed;
  315. fc->cbi = afs_get_cb_interest(vnode->cb_interest);
  316. read_lock(&server->fs_lock);
  317. alist = rcu_dereference_protected(server->addresses,
  318. lockdep_is_held(&server->fs_lock));
  319. afs_get_addrlist(alist);
  320. read_unlock(&server->fs_lock);
  321. memset(&fc->ac, 0, sizeof(fc->ac));
  322. /* Probe the current fileserver if we haven't done so yet. */
  323. if (!test_bit(AFS_SERVER_FL_PROBED, &server->flags)) {
  324. fc->ac.alist = afs_get_addrlist(alist);
  325. if (!afs_probe_fileserver(fc)) {
  326. switch (fc->ac.error) {
  327. case -ENOMEM:
  328. case -ERESTARTSYS:
  329. case -EINTR:
  330. goto failed;
  331. default:
  332. goto next_server;
  333. }
  334. }
  335. }
  336. if (!fc->ac.alist)
  337. fc->ac.alist = alist;
  338. else
  339. afs_put_addrlist(alist);
  340. fc->ac.start = READ_ONCE(alist->index);
  341. fc->ac.index = fc->ac.start;
  342. iterate_address:
  343. ASSERT(fc->ac.alist);
  344. _debug("iterate %d/%d", fc->ac.index, fc->ac.alist->nr_addrs);
  345. /* Iterate over the current server's address list to try and find an
  346. * address on which it will respond to us.
  347. */
  348. if (!afs_iterate_addresses(&fc->ac))
  349. goto next_server;
  350. _leave(" = t");
  351. return true;
  352. next_server:
  353. _debug("next");
  354. afs_end_cursor(&fc->ac);
  355. afs_put_cb_interest(afs_v2net(vnode), fc->cbi);
  356. fc->cbi = NULL;
  357. fc->index++;
  358. if (fc->index >= fc->server_list->nr_servers)
  359. fc->index = 0;
  360. if (fc->index != fc->start)
  361. goto use_server;
  362. /* That's all the servers poked to no good effect. Try again if some
  363. * of them were busy.
  364. */
  365. if (fc->flags & AFS_FS_CURSOR_VBUSY)
  366. goto restart_from_beginning;
  367. fc->ac.error = -EDESTADDRREQ;
  368. goto failed;
  369. failed:
  370. fc->flags |= AFS_FS_CURSOR_STOP;
  371. afs_end_cursor(&fc->ac);
  372. _leave(" = f [failed %d]", fc->ac.error);
  373. return false;
  374. }
  375. /*
  376. * Select the same fileserver we used for a vnode before and only that
  377. * fileserver. We use this when we have a lock on that file, which is backed
  378. * only by the fileserver we obtained it from.
  379. */
  380. bool afs_select_current_fileserver(struct afs_fs_cursor *fc)
  381. {
  382. struct afs_vnode *vnode = fc->vnode;
  383. struct afs_cb_interest *cbi = vnode->cb_interest;
  384. struct afs_addr_list *alist;
  385. _enter("");
  386. switch (fc->ac.error) {
  387. case SHRT_MAX:
  388. if (!cbi) {
  389. fc->ac.error = -ESTALE;
  390. fc->flags |= AFS_FS_CURSOR_STOP;
  391. return false;
  392. }
  393. fc->cbi = afs_get_cb_interest(vnode->cb_interest);
  394. read_lock(&cbi->server->fs_lock);
  395. alist = rcu_dereference_protected(cbi->server->addresses,
  396. lockdep_is_held(&cbi->server->fs_lock));
  397. afs_get_addrlist(alist);
  398. read_unlock(&cbi->server->fs_lock);
  399. if (!alist) {
  400. fc->ac.error = -ESTALE;
  401. fc->flags |= AFS_FS_CURSOR_STOP;
  402. return false;
  403. }
  404. memset(&fc->ac, 0, sizeof(fc->ac));
  405. fc->ac.alist = alist;
  406. fc->ac.start = READ_ONCE(alist->index);
  407. fc->ac.index = fc->ac.start;
  408. goto iterate_address;
  409. case 0:
  410. default:
  411. /* Success or local failure. Stop. */
  412. fc->flags |= AFS_FS_CURSOR_STOP;
  413. _leave(" = f [okay/local %d]", fc->ac.error);
  414. return false;
  415. case -ECONNABORTED:
  416. fc->flags |= AFS_FS_CURSOR_STOP;
  417. _leave(" = f [abort]");
  418. return false;
  419. case -ENETUNREACH:
  420. case -EHOSTUNREACH:
  421. case -ECONNREFUSED:
  422. case -ETIMEDOUT:
  423. case -ETIME:
  424. _debug("no conn");
  425. goto iterate_address;
  426. }
  427. iterate_address:
  428. /* Iterate over the current server's address list to try and find an
  429. * address on which it will respond to us.
  430. */
  431. if (afs_iterate_addresses(&fc->ac)) {
  432. _leave(" = t");
  433. return true;
  434. }
  435. afs_end_cursor(&fc->ac);
  436. return false;
  437. }
  438. /*
  439. * Tidy up a filesystem cursor and unlock the vnode.
  440. */
  441. int afs_end_vnode_operation(struct afs_fs_cursor *fc)
  442. {
  443. struct afs_net *net = afs_v2net(fc->vnode);
  444. int ret;
  445. mutex_unlock(&fc->vnode->io_lock);
  446. afs_end_cursor(&fc->ac);
  447. afs_put_cb_interest(net, fc->cbi);
  448. afs_put_serverlist(net, fc->server_list);
  449. ret = fc->ac.error;
  450. if (ret == -ECONNABORTED)
  451. afs_abort_to_error(fc->ac.abort_code);
  452. return fc->ac.error;
  453. }