copy_up.c 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417
  1. /*
  2. *
  3. * Copyright (C) 2011 Novell Inc.
  4. *
  5. * This program is free software; you can redistribute it and/or modify it
  6. * under the terms of the GNU General Public License version 2 as published by
  7. * the Free Software Foundation.
  8. */
  9. #include <linux/fs.h>
  10. #include <linux/slab.h>
  11. #include <linux/file.h>
  12. #include <linux/splice.h>
  13. #include <linux/xattr.h>
  14. #include <linux/security.h>
  15. #include <linux/uaccess.h>
  16. #include <linux/sched.h>
  17. #include <linux/namei.h>
  18. #include "overlayfs.h"
  19. #define OVL_COPY_UP_CHUNK_SIZE (1 << 20)
  20. int ovl_copy_xattr(struct dentry *old, struct dentry *new)
  21. {
  22. ssize_t list_size, size;
  23. char *buf, *name, *value;
  24. int error;
  25. if (!old->d_inode->i_op->getxattr ||
  26. !new->d_inode->i_op->getxattr)
  27. return 0;
  28. list_size = vfs_listxattr(old, NULL, 0);
  29. if (list_size <= 0) {
  30. if (list_size == -EOPNOTSUPP)
  31. return 0;
  32. return list_size;
  33. }
  34. buf = kzalloc(list_size, GFP_KERNEL);
  35. if (!buf)
  36. return -ENOMEM;
  37. error = -ENOMEM;
  38. value = kmalloc(XATTR_SIZE_MAX, GFP_KERNEL);
  39. if (!value)
  40. goto out;
  41. list_size = vfs_listxattr(old, buf, list_size);
  42. if (list_size <= 0) {
  43. error = list_size;
  44. goto out_free_value;
  45. }
  46. for (name = buf; name < (buf + list_size); name += strlen(name) + 1) {
  47. size = vfs_getxattr(old, name, value, XATTR_SIZE_MAX);
  48. if (size <= 0) {
  49. error = size;
  50. goto out_free_value;
  51. }
  52. error = vfs_setxattr(new, name, value, size, 0);
  53. if (error)
  54. goto out_free_value;
  55. }
  56. out_free_value:
  57. kfree(value);
  58. out:
  59. kfree(buf);
  60. return error;
  61. }
  62. static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
  63. {
  64. struct file *old_file;
  65. struct file *new_file;
  66. loff_t old_pos = 0;
  67. loff_t new_pos = 0;
  68. int error = 0;
  69. if (len == 0)
  70. return 0;
  71. old_file = ovl_path_open(old, O_RDONLY);
  72. if (IS_ERR(old_file))
  73. return PTR_ERR(old_file);
  74. new_file = ovl_path_open(new, O_WRONLY);
  75. if (IS_ERR(new_file)) {
  76. error = PTR_ERR(new_file);
  77. goto out_fput;
  78. }
  79. /* FIXME: copy up sparse files efficiently */
  80. while (len) {
  81. size_t this_len = OVL_COPY_UP_CHUNK_SIZE;
  82. long bytes;
  83. if (len < this_len)
  84. this_len = len;
  85. if (signal_pending_state(TASK_KILLABLE, current)) {
  86. error = -EINTR;
  87. break;
  88. }
  89. bytes = do_splice_direct(old_file, &old_pos,
  90. new_file, &new_pos,
  91. this_len, SPLICE_F_MOVE);
  92. if (bytes <= 0) {
  93. error = bytes;
  94. break;
  95. }
  96. WARN_ON(old_pos != new_pos);
  97. len -= bytes;
  98. }
  99. fput(new_file);
  100. out_fput:
  101. fput(old_file);
  102. return error;
  103. }
  104. static char *ovl_read_symlink(struct dentry *realdentry)
  105. {
  106. int res;
  107. char *buf;
  108. struct inode *inode = realdentry->d_inode;
  109. mm_segment_t old_fs;
  110. res = -EINVAL;
  111. if (!inode->i_op->readlink)
  112. goto err;
  113. res = -ENOMEM;
  114. buf = (char *) __get_free_page(GFP_KERNEL);
  115. if (!buf)
  116. goto err;
  117. old_fs = get_fs();
  118. set_fs(get_ds());
  119. /* The cast to a user pointer is valid due to the set_fs() */
  120. res = inode->i_op->readlink(realdentry,
  121. (char __user *)buf, PAGE_SIZE - 1);
  122. set_fs(old_fs);
  123. if (res < 0) {
  124. free_page((unsigned long) buf);
  125. goto err;
  126. }
  127. buf[res] = '\0';
  128. return buf;
  129. err:
  130. return ERR_PTR(res);
  131. }
  132. static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat)
  133. {
  134. struct iattr attr = {
  135. .ia_valid =
  136. ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET,
  137. .ia_atime = stat->atime,
  138. .ia_mtime = stat->mtime,
  139. };
  140. return notify_change(upperdentry, &attr, NULL);
  141. }
  142. int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
  143. {
  144. int err = 0;
  145. if (!S_ISLNK(stat->mode)) {
  146. struct iattr attr = {
  147. .ia_valid = ATTR_MODE,
  148. .ia_mode = stat->mode,
  149. };
  150. err = notify_change(upperdentry, &attr, NULL);
  151. }
  152. if (!err) {
  153. struct iattr attr = {
  154. .ia_valid = ATTR_UID | ATTR_GID,
  155. .ia_uid = stat->uid,
  156. .ia_gid = stat->gid,
  157. };
  158. err = notify_change(upperdentry, &attr, NULL);
  159. }
  160. if (!err)
  161. ovl_set_timestamps(upperdentry, stat);
  162. return err;
  163. }
  164. static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
  165. struct dentry *dentry, struct path *lowerpath,
  166. struct kstat *stat, struct iattr *attr,
  167. const char *link)
  168. {
  169. struct inode *wdir = workdir->d_inode;
  170. struct inode *udir = upperdir->d_inode;
  171. struct dentry *newdentry = NULL;
  172. struct dentry *upper = NULL;
  173. umode_t mode = stat->mode;
  174. int err;
  175. newdentry = ovl_lookup_temp(workdir, dentry);
  176. err = PTR_ERR(newdentry);
  177. if (IS_ERR(newdentry))
  178. goto out;
  179. upper = lookup_one_len(dentry->d_name.name, upperdir,
  180. dentry->d_name.len);
  181. err = PTR_ERR(upper);
  182. if (IS_ERR(upper))
  183. goto out1;
  184. /* Can't properly set mode on creation because of the umask */
  185. stat->mode &= S_IFMT;
  186. err = ovl_create_real(wdir, newdentry, stat, link, NULL, true);
  187. stat->mode = mode;
  188. if (err)
  189. goto out2;
  190. if (S_ISREG(stat->mode)) {
  191. struct path upperpath;
  192. ovl_path_upper(dentry, &upperpath);
  193. BUG_ON(upperpath.dentry != NULL);
  194. upperpath.dentry = newdentry;
  195. err = ovl_copy_up_data(lowerpath, &upperpath, stat->size);
  196. if (err)
  197. goto out_cleanup;
  198. }
  199. err = ovl_copy_xattr(lowerpath->dentry, newdentry);
  200. if (err)
  201. goto out_cleanup;
  202. mutex_lock(&newdentry->d_inode->i_mutex);
  203. err = ovl_set_attr(newdentry, stat);
  204. if (!err && attr)
  205. err = notify_change(newdentry, attr, NULL);
  206. mutex_unlock(&newdentry->d_inode->i_mutex);
  207. if (err)
  208. goto out_cleanup;
  209. err = ovl_do_rename(wdir, newdentry, udir, upper, 0);
  210. if (err)
  211. goto out_cleanup;
  212. ovl_dentry_update(dentry, newdentry);
  213. newdentry = NULL;
  214. /*
  215. * Non-directores become opaque when copied up.
  216. */
  217. if (!S_ISDIR(stat->mode))
  218. ovl_dentry_set_opaque(dentry, true);
  219. out2:
  220. dput(upper);
  221. out1:
  222. dput(newdentry);
  223. out:
  224. return err;
  225. out_cleanup:
  226. ovl_cleanup(wdir, newdentry);
  227. goto out;
  228. }
  229. /*
  230. * Copy up a single dentry
  231. *
  232. * Directory renames only allowed on "pure upper" (already created on
  233. * upper filesystem, never copied up). Directories which are on lower or
  234. * are merged may not be renamed. For these -EXDEV is returned and
  235. * userspace has to deal with it. This means, when copying up a
  236. * directory we can rely on it and ancestors being stable.
  237. *
  238. * Non-directory renames start with copy up of source if necessary. The
  239. * actual rename will only proceed once the copy up was successful. Copy
  240. * up uses upper parent i_mutex for exclusion. Since rename can change
  241. * d_parent it is possible that the copy up will lock the old parent. At
  242. * that point the file will have already been copied up anyway.
  243. */
  244. int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
  245. struct path *lowerpath, struct kstat *stat,
  246. struct iattr *attr)
  247. {
  248. struct dentry *workdir = ovl_workdir(dentry);
  249. int err;
  250. struct kstat pstat;
  251. struct path parentpath;
  252. struct dentry *upperdir;
  253. struct dentry *upperdentry;
  254. const struct cred *old_cred;
  255. struct cred *override_cred;
  256. char *link = NULL;
  257. if (WARN_ON(!workdir))
  258. return -EROFS;
  259. ovl_path_upper(parent, &parentpath);
  260. upperdir = parentpath.dentry;
  261. err = vfs_getattr(&parentpath, &pstat);
  262. if (err)
  263. return err;
  264. if (S_ISLNK(stat->mode)) {
  265. link = ovl_read_symlink(lowerpath->dentry);
  266. if (IS_ERR(link))
  267. return PTR_ERR(link);
  268. }
  269. err = -ENOMEM;
  270. override_cred = prepare_creds();
  271. if (!override_cred)
  272. goto out_free_link;
  273. override_cred->fsuid = stat->uid;
  274. override_cred->fsgid = stat->gid;
  275. /*
  276. * CAP_SYS_ADMIN for copying up extended attributes
  277. * CAP_DAC_OVERRIDE for create
  278. * CAP_FOWNER for chmod, timestamp update
  279. * CAP_FSETID for chmod
  280. * CAP_CHOWN for chown
  281. * CAP_MKNOD for mknod
  282. */
  283. cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
  284. cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
  285. cap_raise(override_cred->cap_effective, CAP_FOWNER);
  286. cap_raise(override_cred->cap_effective, CAP_FSETID);
  287. cap_raise(override_cred->cap_effective, CAP_CHOWN);
  288. cap_raise(override_cred->cap_effective, CAP_MKNOD);
  289. old_cred = override_creds(override_cred);
  290. err = -EIO;
  291. if (lock_rename(workdir, upperdir) != NULL) {
  292. pr_err("overlayfs: failed to lock workdir+upperdir\n");
  293. goto out_unlock;
  294. }
  295. upperdentry = ovl_dentry_upper(dentry);
  296. if (upperdentry) {
  297. unlock_rename(workdir, upperdir);
  298. err = 0;
  299. /* Raced with another copy-up? Do the setattr here */
  300. if (attr) {
  301. mutex_lock(&upperdentry->d_inode->i_mutex);
  302. err = notify_change(upperdentry, attr, NULL);
  303. mutex_unlock(&upperdentry->d_inode->i_mutex);
  304. }
  305. goto out_put_cred;
  306. }
  307. err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath,
  308. stat, attr, link);
  309. if (!err) {
  310. /* Restore timestamps on parent (best effort) */
  311. ovl_set_timestamps(upperdir, &pstat);
  312. }
  313. out_unlock:
  314. unlock_rename(workdir, upperdir);
  315. out_put_cred:
  316. revert_creds(old_cred);
  317. put_cred(override_cred);
  318. out_free_link:
  319. if (link)
  320. free_page((unsigned long) link);
  321. return err;
  322. }
  323. int ovl_copy_up(struct dentry *dentry)
  324. {
  325. int err;
  326. err = 0;
  327. while (!err) {
  328. struct dentry *next;
  329. struct dentry *parent;
  330. struct path lowerpath;
  331. struct kstat stat;
  332. enum ovl_path_type type = ovl_path_type(dentry);
  333. if (OVL_TYPE_UPPER(type))
  334. break;
  335. next = dget(dentry);
  336. /* find the topmost dentry not yet copied up */
  337. for (;;) {
  338. parent = dget_parent(next);
  339. type = ovl_path_type(parent);
  340. if (OVL_TYPE_UPPER(type))
  341. break;
  342. dput(next);
  343. next = parent;
  344. }
  345. ovl_path_lower(next, &lowerpath);
  346. err = vfs_getattr(&lowerpath, &stat);
  347. if (!err)
  348. err = ovl_copy_up_one(parent, next, &lowerpath, &stat, NULL);
  349. dput(parent);
  350. dput(next);
  351. }
  352. return err;
  353. }