plock.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515
  1. /*
  2. * Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved.
  3. *
  4. * This copyrighted material is made available to anyone wishing to use,
  5. * modify, copy, or redistribute it subject to the terms and conditions
  6. * of the GNU General Public License version 2.
  7. */
  8. #include <linux/fs.h>
  9. #include <linux/miscdevice.h>
  10. #include <linux/poll.h>
  11. #include <linux/dlm.h>
  12. #include <linux/dlm_plock.h>
  13. #include <linux/slab.h>
  14. #include "dlm_internal.h"
  15. #include "lockspace.h"
  16. static spinlock_t ops_lock;
  17. static struct list_head send_list;
  18. static struct list_head recv_list;
  19. static wait_queue_head_t send_wq;
  20. static wait_queue_head_t recv_wq;
  21. struct plock_op {
  22. struct list_head list;
  23. int done;
  24. struct dlm_plock_info info;
  25. };
  26. struct plock_xop {
  27. struct plock_op xop;
  28. int (*callback)(struct file_lock *fl, int result);
  29. void *fl;
  30. void *file;
  31. struct file_lock flc;
  32. };
  33. static inline void set_version(struct dlm_plock_info *info)
  34. {
  35. info->version[0] = DLM_PLOCK_VERSION_MAJOR;
  36. info->version[1] = DLM_PLOCK_VERSION_MINOR;
  37. info->version[2] = DLM_PLOCK_VERSION_PATCH;
  38. }
  39. static int check_version(struct dlm_plock_info *info)
  40. {
  41. if ((DLM_PLOCK_VERSION_MAJOR != info->version[0]) ||
  42. (DLM_PLOCK_VERSION_MINOR < info->version[1])) {
  43. log_print("plock device version mismatch: "
  44. "kernel (%u.%u.%u), user (%u.%u.%u)",
  45. DLM_PLOCK_VERSION_MAJOR,
  46. DLM_PLOCK_VERSION_MINOR,
  47. DLM_PLOCK_VERSION_PATCH,
  48. info->version[0],
  49. info->version[1],
  50. info->version[2]);
  51. return -EINVAL;
  52. }
  53. return 0;
  54. }
  55. static void send_op(struct plock_op *op)
  56. {
  57. set_version(&op->info);
  58. INIT_LIST_HEAD(&op->list);
  59. spin_lock(&ops_lock);
  60. list_add_tail(&op->list, &send_list);
  61. spin_unlock(&ops_lock);
  62. wake_up(&send_wq);
  63. }
  64. /* If a process was killed while waiting for the only plock on a file,
  65. locks_remove_posix will not see any lock on the file so it won't
  66. send an unlock-close to us to pass on to userspace to clean up the
  67. abandoned waiter. So, we have to insert the unlock-close when the
  68. lock call is interrupted. */
  69. static void do_unlock_close(struct dlm_ls *ls, u64 number,
  70. struct file *file, struct file_lock *fl)
  71. {
  72. struct plock_op *op;
  73. op = kzalloc(sizeof(*op), GFP_NOFS);
  74. if (!op)
  75. return;
  76. op->info.optype = DLM_PLOCK_OP_UNLOCK;
  77. op->info.pid = fl->fl_pid;
  78. op->info.fsid = ls->ls_global_id;
  79. op->info.number = number;
  80. op->info.start = 0;
  81. op->info.end = OFFSET_MAX;
  82. if (fl->fl_lmops && fl->fl_lmops->lm_grant)
  83. op->info.owner = (__u64) fl->fl_pid;
  84. else
  85. op->info.owner = (__u64)(long) fl->fl_owner;
  86. op->info.flags |= DLM_PLOCK_FL_CLOSE;
  87. send_op(op);
  88. }
  89. int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
  90. int cmd, struct file_lock *fl)
  91. {
  92. struct dlm_ls *ls;
  93. struct plock_op *op;
  94. struct plock_xop *xop;
  95. int rv;
  96. ls = dlm_find_lockspace_local(lockspace);
  97. if (!ls)
  98. return -EINVAL;
  99. xop = kzalloc(sizeof(*xop), GFP_NOFS);
  100. if (!xop) {
  101. rv = -ENOMEM;
  102. goto out;
  103. }
  104. op = &xop->xop;
  105. op->info.optype = DLM_PLOCK_OP_LOCK;
  106. op->info.pid = fl->fl_pid;
  107. op->info.ex = (fl->fl_type == F_WRLCK);
  108. op->info.wait = IS_SETLKW(cmd);
  109. op->info.fsid = ls->ls_global_id;
  110. op->info.number = number;
  111. op->info.start = fl->fl_start;
  112. op->info.end = fl->fl_end;
  113. if (fl->fl_lmops && fl->fl_lmops->lm_grant) {
  114. /* fl_owner is lockd which doesn't distinguish
  115. processes on the nfs client */
  116. op->info.owner = (__u64) fl->fl_pid;
  117. xop->callback = fl->fl_lmops->lm_grant;
  118. locks_init_lock(&xop->flc);
  119. locks_copy_lock(&xop->flc, fl);
  120. xop->fl = fl;
  121. xop->file = file;
  122. } else {
  123. op->info.owner = (__u64)(long) fl->fl_owner;
  124. xop->callback = NULL;
  125. }
  126. send_op(op);
  127. if (xop->callback == NULL) {
  128. rv = wait_event_interruptible(recv_wq, (op->done != 0));
  129. if (rv == -ERESTARTSYS) {
  130. log_debug(ls, "dlm_posix_lock: wait killed %llx",
  131. (unsigned long long)number);
  132. spin_lock(&ops_lock);
  133. list_del(&op->list);
  134. spin_unlock(&ops_lock);
  135. kfree(xop);
  136. do_unlock_close(ls, number, file, fl);
  137. goto out;
  138. }
  139. } else {
  140. rv = FILE_LOCK_DEFERRED;
  141. goto out;
  142. }
  143. spin_lock(&ops_lock);
  144. if (!list_empty(&op->list)) {
  145. log_error(ls, "dlm_posix_lock: op on list %llx",
  146. (unsigned long long)number);
  147. list_del(&op->list);
  148. }
  149. spin_unlock(&ops_lock);
  150. rv = op->info.rv;
  151. if (!rv) {
  152. if (locks_lock_file_wait(file, fl) < 0)
  153. log_error(ls, "dlm_posix_lock: vfs lock error %llx",
  154. (unsigned long long)number);
  155. }
  156. kfree(xop);
  157. out:
  158. dlm_put_lockspace(ls);
  159. return rv;
  160. }
  161. EXPORT_SYMBOL_GPL(dlm_posix_lock);
  162. /* Returns failure iff a successful lock operation should be canceled */
  163. static int dlm_plock_callback(struct plock_op *op)
  164. {
  165. struct file *file;
  166. struct file_lock *fl;
  167. struct file_lock *flc;
  168. int (*notify)(struct file_lock *fl, int result) = NULL;
  169. struct plock_xop *xop = (struct plock_xop *)op;
  170. int rv = 0;
  171. spin_lock(&ops_lock);
  172. if (!list_empty(&op->list)) {
  173. log_print("dlm_plock_callback: op on list %llx",
  174. (unsigned long long)op->info.number);
  175. list_del(&op->list);
  176. }
  177. spin_unlock(&ops_lock);
  178. /* check if the following 2 are still valid or make a copy */
  179. file = xop->file;
  180. flc = &xop->flc;
  181. fl = xop->fl;
  182. notify = xop->callback;
  183. if (op->info.rv) {
  184. notify(fl, op->info.rv);
  185. goto out;
  186. }
  187. /* got fs lock; bookkeep locally as well: */
  188. flc->fl_flags &= ~FL_SLEEP;
  189. if (posix_lock_file(file, flc, NULL)) {
  190. /*
  191. * This can only happen in the case of kmalloc() failure.
  192. * The filesystem's own lock is the authoritative lock,
  193. * so a failure to get the lock locally is not a disaster.
  194. * As long as the fs cannot reliably cancel locks (especially
  195. * in a low-memory situation), we're better off ignoring
  196. * this failure than trying to recover.
  197. */
  198. log_print("dlm_plock_callback: vfs lock error %llx file %p fl %p",
  199. (unsigned long long)op->info.number, file, fl);
  200. }
  201. rv = notify(fl, 0);
  202. if (rv) {
  203. /* XXX: We need to cancel the fs lock here: */
  204. log_print("dlm_plock_callback: lock granted after lock request "
  205. "failed; dangling lock!\n");
  206. goto out;
  207. }
  208. out:
  209. kfree(xop);
  210. return rv;
  211. }
  212. int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
  213. struct file_lock *fl)
  214. {
  215. struct dlm_ls *ls;
  216. struct plock_op *op;
  217. int rv;
  218. unsigned char fl_flags = fl->fl_flags;
  219. ls = dlm_find_lockspace_local(lockspace);
  220. if (!ls)
  221. return -EINVAL;
  222. op = kzalloc(sizeof(*op), GFP_NOFS);
  223. if (!op) {
  224. rv = -ENOMEM;
  225. goto out;
  226. }
  227. /* cause the vfs unlock to return ENOENT if lock is not found */
  228. fl->fl_flags |= FL_EXISTS;
  229. rv = locks_lock_file_wait(file, fl);
  230. if (rv == -ENOENT) {
  231. rv = 0;
  232. goto out_free;
  233. }
  234. if (rv < 0) {
  235. log_error(ls, "dlm_posix_unlock: vfs unlock error %d %llx",
  236. rv, (unsigned long long)number);
  237. }
  238. op->info.optype = DLM_PLOCK_OP_UNLOCK;
  239. op->info.pid = fl->fl_pid;
  240. op->info.fsid = ls->ls_global_id;
  241. op->info.number = number;
  242. op->info.start = fl->fl_start;
  243. op->info.end = fl->fl_end;
  244. if (fl->fl_lmops && fl->fl_lmops->lm_grant)
  245. op->info.owner = (__u64) fl->fl_pid;
  246. else
  247. op->info.owner = (__u64)(long) fl->fl_owner;
  248. if (fl->fl_flags & FL_CLOSE) {
  249. op->info.flags |= DLM_PLOCK_FL_CLOSE;
  250. send_op(op);
  251. rv = 0;
  252. goto out;
  253. }
  254. send_op(op);
  255. wait_event(recv_wq, (op->done != 0));
  256. spin_lock(&ops_lock);
  257. if (!list_empty(&op->list)) {
  258. log_error(ls, "dlm_posix_unlock: op on list %llx",
  259. (unsigned long long)number);
  260. list_del(&op->list);
  261. }
  262. spin_unlock(&ops_lock);
  263. rv = op->info.rv;
  264. if (rv == -ENOENT)
  265. rv = 0;
  266. out_free:
  267. kfree(op);
  268. out:
  269. dlm_put_lockspace(ls);
  270. fl->fl_flags = fl_flags;
  271. return rv;
  272. }
  273. EXPORT_SYMBOL_GPL(dlm_posix_unlock);
  274. int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file,
  275. struct file_lock *fl)
  276. {
  277. struct dlm_ls *ls;
  278. struct plock_op *op;
  279. int rv;
  280. ls = dlm_find_lockspace_local(lockspace);
  281. if (!ls)
  282. return -EINVAL;
  283. op = kzalloc(sizeof(*op), GFP_NOFS);
  284. if (!op) {
  285. rv = -ENOMEM;
  286. goto out;
  287. }
  288. op->info.optype = DLM_PLOCK_OP_GET;
  289. op->info.pid = fl->fl_pid;
  290. op->info.ex = (fl->fl_type == F_WRLCK);
  291. op->info.fsid = ls->ls_global_id;
  292. op->info.number = number;
  293. op->info.start = fl->fl_start;
  294. op->info.end = fl->fl_end;
  295. if (fl->fl_lmops && fl->fl_lmops->lm_grant)
  296. op->info.owner = (__u64) fl->fl_pid;
  297. else
  298. op->info.owner = (__u64)(long) fl->fl_owner;
  299. send_op(op);
  300. wait_event(recv_wq, (op->done != 0));
  301. spin_lock(&ops_lock);
  302. if (!list_empty(&op->list)) {
  303. log_error(ls, "dlm_posix_get: op on list %llx",
  304. (unsigned long long)number);
  305. list_del(&op->list);
  306. }
  307. spin_unlock(&ops_lock);
  308. /* info.rv from userspace is 1 for conflict, 0 for no-conflict,
  309. -ENOENT if there are no locks on the file */
  310. rv = op->info.rv;
  311. fl->fl_type = F_UNLCK;
  312. if (rv == -ENOENT)
  313. rv = 0;
  314. else if (rv > 0) {
  315. locks_init_lock(fl);
  316. fl->fl_type = (op->info.ex) ? F_WRLCK : F_RDLCK;
  317. fl->fl_flags = FL_POSIX;
  318. fl->fl_pid = -op->info.pid;
  319. fl->fl_start = op->info.start;
  320. fl->fl_end = op->info.end;
  321. rv = 0;
  322. }
  323. kfree(op);
  324. out:
  325. dlm_put_lockspace(ls);
  326. return rv;
  327. }
  328. EXPORT_SYMBOL_GPL(dlm_posix_get);
  329. /* a read copies out one plock request from the send list */
  330. static ssize_t dev_read(struct file *file, char __user *u, size_t count,
  331. loff_t *ppos)
  332. {
  333. struct dlm_plock_info info;
  334. struct plock_op *op = NULL;
  335. if (count < sizeof(info))
  336. return -EINVAL;
  337. spin_lock(&ops_lock);
  338. if (!list_empty(&send_list)) {
  339. op = list_entry(send_list.next, struct plock_op, list);
  340. if (op->info.flags & DLM_PLOCK_FL_CLOSE)
  341. list_del(&op->list);
  342. else
  343. list_move(&op->list, &recv_list);
  344. memcpy(&info, &op->info, sizeof(info));
  345. }
  346. spin_unlock(&ops_lock);
  347. if (!op)
  348. return -EAGAIN;
  349. /* there is no need to get a reply from userspace for unlocks
  350. that were generated by the vfs cleaning up for a close
  351. (the process did not make an unlock call). */
  352. if (op->info.flags & DLM_PLOCK_FL_CLOSE)
  353. kfree(op);
  354. if (copy_to_user(u, &info, sizeof(info)))
  355. return -EFAULT;
  356. return sizeof(info);
  357. }
  358. /* a write copies in one plock result that should match a plock_op
  359. on the recv list */
  360. static ssize_t dev_write(struct file *file, const char __user *u, size_t count,
  361. loff_t *ppos)
  362. {
  363. struct dlm_plock_info info;
  364. struct plock_op *op;
  365. int found = 0, do_callback = 0;
  366. if (count != sizeof(info))
  367. return -EINVAL;
  368. if (copy_from_user(&info, u, sizeof(info)))
  369. return -EFAULT;
  370. if (check_version(&info))
  371. return -EINVAL;
  372. spin_lock(&ops_lock);
  373. list_for_each_entry(op, &recv_list, list) {
  374. if (op->info.fsid == info.fsid &&
  375. op->info.number == info.number &&
  376. op->info.owner == info.owner) {
  377. struct plock_xop *xop = (struct plock_xop *)op;
  378. list_del_init(&op->list);
  379. memcpy(&op->info, &info, sizeof(info));
  380. if (xop->callback)
  381. do_callback = 1;
  382. else
  383. op->done = 1;
  384. found = 1;
  385. break;
  386. }
  387. }
  388. spin_unlock(&ops_lock);
  389. if (found) {
  390. if (do_callback)
  391. dlm_plock_callback(op);
  392. else
  393. wake_up(&recv_wq);
  394. } else
  395. log_print("dev_write no op %x %llx", info.fsid,
  396. (unsigned long long)info.number);
  397. return count;
  398. }
  399. static __poll_t dev_poll(struct file *file, poll_table *wait)
  400. {
  401. __poll_t mask = 0;
  402. poll_wait(file, &send_wq, wait);
  403. spin_lock(&ops_lock);
  404. if (!list_empty(&send_list))
  405. mask = EPOLLIN | EPOLLRDNORM;
  406. spin_unlock(&ops_lock);
  407. return mask;
  408. }
  409. static const struct file_operations dev_fops = {
  410. .read = dev_read,
  411. .write = dev_write,
  412. .poll = dev_poll,
  413. .owner = THIS_MODULE,
  414. .llseek = noop_llseek,
  415. };
  416. static struct miscdevice plock_dev_misc = {
  417. .minor = MISC_DYNAMIC_MINOR,
  418. .name = DLM_PLOCK_MISC_NAME,
  419. .fops = &dev_fops
  420. };
  421. int dlm_plock_init(void)
  422. {
  423. int rv;
  424. spin_lock_init(&ops_lock);
  425. INIT_LIST_HEAD(&send_list);
  426. INIT_LIST_HEAD(&recv_list);
  427. init_waitqueue_head(&send_wq);
  428. init_waitqueue_head(&recv_wq);
  429. rv = misc_register(&plock_dev_misc);
  430. if (rv)
  431. log_print("dlm_plock_init: misc_register failed %d", rv);
  432. return rv;
  433. }
  434. void dlm_plock_exit(void)
  435. {
  436. misc_deregister(&plock_dev_misc);
  437. }