optimise-store.cc 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278
  1. #include "config.h"
  2. #include "util.hh"
  3. #include "local-store.hh"
  4. #include "globals.hh"
  5. #include <cstdlib>
  6. #include <cstring>
  7. #include <sys/types.h>
  8. #include <sys/stat.h>
  9. #include <unistd.h>
  10. #include <errno.h>
  11. #include <stdio.h>
  12. namespace nix {
  13. static void makeWritable(const Path & path)
  14. {
  15. struct stat st;
  16. if (lstat(path.c_str(), &st))
  17. throw SysError(format("getting attributes of path `%1%'") % path);
  18. if (chmod(path.c_str(), st.st_mode | S_IWUSR) == -1)
  19. throw SysError(format("changing writability of `%1%'") % path);
  20. }
  21. struct MakeReadOnly
  22. {
  23. Path path;
  24. MakeReadOnly(const Path & path) : path(path) { }
  25. ~MakeReadOnly()
  26. {
  27. try {
  28. /* This will make the path read-only. */
  29. if (path != "") canonicaliseTimestampAndPermissions(path);
  30. } catch (...) {
  31. ignoreException();
  32. }
  33. }
  34. };
  35. LocalStore::InodeHash LocalStore::loadInodeHash()
  36. {
  37. printMsg(lvlDebug, "loading hash inodes in memory");
  38. InodeHash inodeHash;
  39. AutoCloseDir dir = opendir(linksDir.c_str());
  40. if (!dir) throw SysError(format("opening directory `%1%'") % linksDir);
  41. struct dirent * dirent;
  42. while (errno = 0, dirent = readdir(dir)) { /* sic */
  43. checkInterrupt();
  44. // We don't care if we hit non-hash files, anything goes
  45. inodeHash.insert(dirent->d_ino);
  46. }
  47. if (errno) throw SysError(format("reading directory `%1%'") % linksDir);
  48. printMsg(lvlTalkative, format("loaded %1% hash inodes") % inodeHash.size());
  49. return inodeHash;
  50. }
  51. Strings LocalStore::readDirectoryIgnoringInodes(const Path & path, const InodeHash & inodeHash)
  52. {
  53. Strings names;
  54. AutoCloseDir dir = opendir(path.c_str());
  55. if (!dir) throw SysError(format("opening directory `%1%'") % path);
  56. struct dirent * dirent;
  57. while (errno = 0, dirent = readdir(dir)) { /* sic */
  58. checkInterrupt();
  59. if (inodeHash.count(dirent->d_ino)) {
  60. printMsg(lvlDebug, format("`%1%' is already linked") % dirent->d_name);
  61. continue;
  62. }
  63. string name = dirent->d_name;
  64. if (name == "." || name == "..") continue;
  65. names.push_back(name);
  66. }
  67. if (errno) throw SysError(format("reading directory `%1%'") % path);
  68. return names;
  69. }
  70. void LocalStore::optimisePath_(OptimiseStats & stats, const Path & path, InodeHash & inodeHash)
  71. {
  72. checkInterrupt();
  73. struct stat st;
  74. if (lstat(path.c_str(), &st))
  75. throw SysError(format("getting attributes of path `%1%'") % path);
  76. if (S_ISDIR(st.st_mode)) {
  77. Strings names = readDirectoryIgnoringInodes(path, inodeHash);
  78. foreach (Strings::iterator, i, names)
  79. optimisePath_(stats, path + "/" + *i, inodeHash);
  80. return;
  81. }
  82. /* We can hard link regular files and maybe symlinks. */
  83. if (!S_ISREG(st.st_mode)
  84. #if CAN_LINK_SYMLINK
  85. && !S_ISLNK(st.st_mode)
  86. #endif
  87. ) return;
  88. /* Sometimes SNAFUs can cause files in the store to be
  89. modified, in particular when running programs as root under
  90. GuixSD (example: $fontconfig/var/cache being modified). Skip
  91. those files. FIXME: check the modification time. */
  92. if (S_ISREG(st.st_mode) && (st.st_mode & S_IWUSR)) {
  93. printMsg(lvlError, format("skipping suspicious writable file `%1%'") % path);
  94. return;
  95. }
  96. /* This can still happen on top-level files. */
  97. if (st.st_nlink > 1 && inodeHash.count(st.st_ino)) {
  98. printMsg(lvlDebug, format("`%1%' is already linked, with %2% other file(s).") % path % (st.st_nlink - 2));
  99. return;
  100. }
  101. /* Hash the file. Note that hashPath() returns the hash over the
  102. NAR serialisation, which includes the execute bit on the file.
  103. Thus, executable and non-executable files with the same
  104. contents *won't* be linked (which is good because otherwise the
  105. permissions would be screwed up).
  106. Also note that if `path' is a symlink, then we're hashing the
  107. contents of the symlink (i.e. the result of readlink()), not
  108. the contents of the target (which may not even exist). */
  109. Hash hash = hashPath(htSHA256, path).first;
  110. printMsg(lvlDebug, format("`%1%' has hash `%2%'") % path % printHash(hash));
  111. /* Check if this is a known hash. */
  112. Path linkPath = linksDir + "/" + printHash32(hash);
  113. retry:
  114. if (!pathExists(linkPath)) {
  115. /* Nope, create a hard link in the links directory. */
  116. if (link(path.c_str(), linkPath.c_str()) == 0) {
  117. inodeHash.insert(st.st_ino);
  118. return;
  119. }
  120. switch (errno) {
  121. case EEXIST:
  122. /* Fall through if another process created ‘linkPath’ before
  123. we did. */
  124. break;
  125. case ENOSPC:
  126. /* On ext4, that probably means the directory index is full. When
  127. that happens, it's fine to ignore it: we just effectively
  128. disable deduplication of this file. */
  129. printMsg(lvlInfo, format("cannot link `%1%' to `%2%': %3%")
  130. % linkPath % path % strerror(ENOSPC));
  131. return;
  132. default:
  133. throw SysError(format("cannot link `%1%' to `%2%'") % linkPath % path);
  134. }
  135. }
  136. /* Yes! We've seen a file with the same contents. Replace the
  137. current file with a hard link to that file. */
  138. struct stat stLink;
  139. if (lstat(linkPath.c_str(), &stLink))
  140. throw SysError(format("getting attributes of path `%1%'") % linkPath);
  141. if (st.st_ino == stLink.st_ino) {
  142. printMsg(lvlDebug, format("`%1%' is already linked to `%2%'") % path % linkPath);
  143. return;
  144. }
  145. if (st.st_size != stLink.st_size) {
  146. printMsg(lvlError, format("removing corrupted link ‘%1%’") % linkPath);
  147. unlink(linkPath.c_str());
  148. goto retry;
  149. }
  150. printMsg(lvlTalkative, format("linking ‘%1%’ to ‘%2%’") % path % linkPath);
  151. /* Make the containing directory writable, but only if it's not
  152. the store itself (we don't want or need to mess with its
  153. permissions). */
  154. bool mustToggle = !isStorePath(path);
  155. if (mustToggle) makeWritable(dirOf(path));
  156. /* When we're done, make the directory read-only again and reset
  157. its timestamp back to 0. */
  158. MakeReadOnly makeReadOnly(mustToggle ? dirOf(path) : "");
  159. Path tempLink = (format("%1%/.tmp-link-%2%-%3%")
  160. % settings.nixStore % getpid() % rand()).str();
  161. if (link(linkPath.c_str(), tempLink.c_str()) == -1) {
  162. if (errno == EMLINK) {
  163. /* Too many links to the same file (>= 32000 on most file
  164. systems). This is likely to happen with empty files.
  165. Just shrug and ignore. */
  166. if (st.st_size)
  167. printMsg(lvlInfo, format("`%1%' has maximum number of links") % linkPath);
  168. return;
  169. }
  170. throw SysError(format("cannot link `%1%' to `%2%'") % tempLink % linkPath);
  171. }
  172. /* Atomically replace the old file with the new hard link. */
  173. if (rename(tempLink.c_str(), path.c_str()) == -1) {
  174. if (unlink(tempLink.c_str()) == -1)
  175. printMsg(lvlError, format("unable to unlink `%1%'") % tempLink);
  176. if (errno == EMLINK) {
  177. /* Some filesystems generate too many links on the rename,
  178. rather than on the original link. (Probably it
  179. temporarily increases the st_nlink field before
  180. decreasing it again.) */
  181. if (st.st_size)
  182. printMsg(lvlInfo, format("`%1%' has maximum number of links") % linkPath);
  183. return;
  184. }
  185. throw SysError(format("cannot rename `%1%' to `%2%'") % tempLink % path);
  186. }
  187. stats.filesLinked++;
  188. stats.bytesFreed += st.st_size;
  189. stats.blocksFreed += st.st_blocks;
  190. }
  191. void LocalStore::optimiseStore(OptimiseStats & stats)
  192. {
  193. PathSet paths = queryAllValidPaths();
  194. InodeHash inodeHash = loadInodeHash();
  195. foreach (PathSet::iterator, i, paths) {
  196. addTempRoot(*i);
  197. if (!isValidPath(*i)) continue; /* path was GC'ed, probably */
  198. startNest(nest, lvlChatty, format("hashing files in `%1%'") % *i);
  199. optimisePath_(stats, *i, inodeHash);
  200. }
  201. }
  202. static string showBytes(unsigned long long bytes)
  203. {
  204. return (format("%.2f MiB") % (bytes / (1024.0 * 1024.0))).str();
  205. }
  206. void LocalStore::optimiseStore()
  207. {
  208. OptimiseStats stats;
  209. optimiseStore(stats);
  210. printMsg(lvlError,
  211. format("%1% freed by hard-linking %2% files")
  212. % showBytes(stats.bytesFreed)
  213. % stats.filesLinked);
  214. }
  215. void LocalStore::optimisePath(const Path & path)
  216. {
  217. OptimiseStats stats;
  218. InodeHash inodeHash;
  219. if (settings.autoOptimiseStore) optimisePath_(stats, path, inodeHash);
  220. }
  221. }