references.cc 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123
  1. #include "references.hh"
  2. #include "hash.hh"
  3. #include "util.hh"
  4. #include "archive.hh"
  5. #include <map>
  6. #include <cstdlib>
  7. namespace nix {
  8. static unsigned int refLength = 32; /* characters */
  9. static void search(const unsigned char * s, unsigned int len,
  10. StringSet & hashes, StringSet & seen)
  11. {
  12. static bool initialised = false;
  13. static bool isBase32[256];
  14. if (!initialised) {
  15. for (unsigned int i = 0; i < 256; ++i) isBase32[i] = false;
  16. for (unsigned int i = 0; i < base32Chars.size(); ++i)
  17. isBase32[(unsigned char) base32Chars[i]] = true;
  18. initialised = true;
  19. }
  20. for (unsigned int i = 0; i + refLength <= len; ) {
  21. int j;
  22. bool match = true;
  23. for (j = refLength - 1; j >= 0; --j)
  24. if (!isBase32[(unsigned char) s[i + j]]) {
  25. i += j + 1;
  26. match = false;
  27. break;
  28. }
  29. if (!match) continue;
  30. string ref((const char *) s + i, refLength);
  31. if (hashes.find(ref) != hashes.end()) {
  32. debug(format("found reference to `%1%' at offset `%2%'")
  33. % ref % i);
  34. seen.insert(ref);
  35. hashes.erase(ref);
  36. }
  37. ++i;
  38. }
  39. }
  40. struct RefScanSink : Sink
  41. {
  42. HashSink hashSink;
  43. StringSet hashes;
  44. StringSet seen;
  45. string tail;
  46. RefScanSink() : hashSink(htSHA256) { }
  47. void operator () (const unsigned char * data, size_t len);
  48. };
  49. void RefScanSink::operator () (const unsigned char * data, size_t len)
  50. {
  51. hashSink(data, len);
  52. /* It's possible that a reference spans the previous and current
  53. fragment, so search in the concatenation of the tail of the
  54. previous fragment and the start of the current fragment. */
  55. string s = tail + string((const char *) data, len > refLength ? refLength : len);
  56. search((const unsigned char *) s.data(), s.size(), hashes, seen);
  57. search(data, len, hashes, seen);
  58. unsigned int tailLen = len <= refLength ? len : refLength;
  59. tail =
  60. string(tail, tail.size() < refLength - tailLen ? 0 : tail.size() - (refLength - tailLen)) +
  61. string((const char *) data + len - tailLen, tailLen);
  62. }
  63. PathSet scanForReferences(const string & path,
  64. const PathSet & refs, HashResult & hash)
  65. {
  66. RefScanSink sink;
  67. std::map<string, Path> backMap;
  68. /* For efficiency (and a higher hit rate), just search for the
  69. hash part of the file name. (This assumes that all references
  70. have the form `HASH-bla'). */
  71. foreach (PathSet::const_iterator, i, refs) {
  72. string baseName = baseNameOf(*i);
  73. string::size_type pos = baseName.find('-');
  74. if (pos == string::npos)
  75. throw Error(format("bad reference `%1%'") % *i);
  76. string s = string(baseName, 0, pos);
  77. assert(s.size() == refLength);
  78. assert(backMap.find(s) == backMap.end());
  79. // parseHash(htSHA256, s);
  80. sink.hashes.insert(s);
  81. backMap[s] = *i;
  82. }
  83. /* Look for the hashes in the NAR dump of the path. */
  84. dumpPath(path, sink);
  85. /* Map the hashes found back to their store paths. */
  86. PathSet found;
  87. foreach (StringSet::iterator, i, sink.seen) {
  88. std::map<string, Path>::iterator j;
  89. if ((j = backMap.find(*i)) == backMap.end()) abort();
  90. found.insert(j->second);
  91. }
  92. hash = sink.hashSink.finish();
  93. return found;
  94. }
  95. }