nsUrlClassifierPrefixSet.cpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520
  1. /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
  2. /* This Source Code Form is subject to the terms of the Mozilla Public
  3. * License, v. 2.0. If a copy of the MPL was not distributed with this
  4. * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
  5. #include "nsUrlClassifierPrefixSet.h"
  6. #include "nsIUrlClassifierPrefixSet.h"
  7. #include "nsCOMPtr.h"
  8. #include "nsDebug.h"
  9. #include "nsPrintfCString.h"
  10. #include "nsTArray.h"
  11. #include "nsString.h"
  12. #include "nsIFile.h"
  13. #include "nsToolkitCompsCID.h"
  14. #include "nsTArray.h"
  15. #include "nsThreadUtils.h"
  16. #include "nsNetUtil.h"
  17. #include "nsISeekableStream.h"
  18. #include "nsIBufferedStreams.h"
  19. #include "nsIFileStreams.h"
  20. #include "mozilla/MemoryReporting.h"
  21. #include "mozilla/Telemetry.h"
  22. #include "mozilla/FileUtils.h"
  23. #include "mozilla/Logging.h"
  24. #include "mozilla/Unused.h"
  25. #include <algorithm>
  26. using namespace mozilla;
  27. // MOZ_LOG=UrlClassifierPrefixSet:5
  28. static LazyLogModule gUrlClassifierPrefixSetLog("UrlClassifierPrefixSet");
  29. #define LOG(args) MOZ_LOG(gUrlClassifierPrefixSetLog, mozilla::LogLevel::Debug, args)
  30. #define LOG_ENABLED() MOZ_LOG_TEST(gUrlClassifierPrefixSetLog, mozilla::LogLevel::Debug)
  31. NS_IMPL_ISUPPORTS(
  32. nsUrlClassifierPrefixSet, nsIUrlClassifierPrefixSet, nsIMemoryReporter)
  33. // Definition required due to std::max<>()
  34. const uint32_t nsUrlClassifierPrefixSet::MAX_BUFFER_SIZE;
  35. nsUrlClassifierPrefixSet::nsUrlClassifierPrefixSet()
  36. : mLock("nsUrlClassifierPrefixSet.mLock")
  37. , mTotalPrefixes(0)
  38. , mMemoryReportPath()
  39. {
  40. }
  41. NS_IMETHODIMP
  42. nsUrlClassifierPrefixSet::Init(const nsACString& aName)
  43. {
  44. mMemoryReportPath =
  45. nsPrintfCString(
  46. "explicit/storage/prefix-set/%s",
  47. (!aName.IsEmpty() ? PromiseFlatCString(aName).get() : "?!")
  48. );
  49. RegisterWeakMemoryReporter(this);
  50. return NS_OK;
  51. }
  52. nsUrlClassifierPrefixSet::~nsUrlClassifierPrefixSet()
  53. {
  54. UnregisterWeakMemoryReporter(this);
  55. }
  56. NS_IMETHODIMP
  57. nsUrlClassifierPrefixSet::SetPrefixes(const uint32_t* aArray, uint32_t aLength)
  58. {
  59. MutexAutoLock lock(mLock);
  60. nsresult rv = NS_OK;
  61. if (aLength <= 0) {
  62. if (mIndexPrefixes.Length() > 0) {
  63. LOG(("Clearing PrefixSet"));
  64. mIndexDeltas.Clear();
  65. mIndexPrefixes.Clear();
  66. mTotalPrefixes = 0;
  67. }
  68. } else {
  69. rv = MakePrefixSet(aArray, aLength);
  70. }
  71. return rv;
  72. }
  73. nsresult
  74. nsUrlClassifierPrefixSet::MakePrefixSet(const uint32_t* aPrefixes, uint32_t aLength)
  75. {
  76. mLock.AssertCurrentThreadOwns();
  77. if (aLength == 0) {
  78. return NS_OK;
  79. }
  80. #ifdef DEBUG
  81. for (uint32_t i = 1; i < aLength; i++) {
  82. MOZ_ASSERT(aPrefixes[i] >= aPrefixes[i-1]);
  83. }
  84. #endif
  85. mIndexPrefixes.Clear();
  86. mIndexDeltas.Clear();
  87. mTotalPrefixes = aLength;
  88. mIndexPrefixes.AppendElement(aPrefixes[0]);
  89. mIndexDeltas.AppendElement();
  90. uint32_t numOfDeltas = 0;
  91. uint32_t totalDeltas = 0;
  92. uint32_t previousItem = aPrefixes[0];
  93. for (uint32_t i = 1; i < aLength; i++) {
  94. if ((numOfDeltas >= DELTAS_LIMIT) ||
  95. (aPrefixes[i] - previousItem >= MAX_INDEX_DIFF)) {
  96. // Compact the previous element.
  97. // Note there is always at least one element when we get here,
  98. // because we created the first element before the loop.
  99. mIndexDeltas.LastElement().Compact();
  100. mIndexDeltas.AppendElement();
  101. mIndexPrefixes.AppendElement(aPrefixes[i]);
  102. numOfDeltas = 0;
  103. } else {
  104. uint16_t delta = aPrefixes[i] - previousItem;
  105. mIndexDeltas.LastElement().AppendElement(delta);
  106. numOfDeltas++;
  107. totalDeltas++;
  108. }
  109. previousItem = aPrefixes[i];
  110. }
  111. mIndexDeltas.LastElement().Compact();
  112. mIndexDeltas.Compact();
  113. mIndexPrefixes.Compact();
  114. LOG(("Total number of indices: %d", aLength));
  115. LOG(("Total number of deltas: %d", totalDeltas));
  116. LOG(("Total number of delta chunks: %d", mIndexDeltas.Length()));
  117. return NS_OK;
  118. }
  119. nsresult
  120. nsUrlClassifierPrefixSet::GetPrefixesNative(FallibleTArray<uint32_t>& outArray)
  121. {
  122. MutexAutoLock lock(mLock);
  123. if (!outArray.SetLength(mTotalPrefixes, fallible)) {
  124. return NS_ERROR_OUT_OF_MEMORY;
  125. }
  126. uint32_t prefixIdxLength = mIndexPrefixes.Length();
  127. uint32_t prefixCnt = 0;
  128. for (uint32_t i = 0; i < prefixIdxLength; i++) {
  129. uint32_t prefix = mIndexPrefixes[i];
  130. outArray[prefixCnt++] = prefix;
  131. for (uint32_t j = 0; j < mIndexDeltas[i].Length(); j++) {
  132. prefix += mIndexDeltas[i][j];
  133. outArray[prefixCnt++] = prefix;
  134. }
  135. }
  136. NS_ASSERTION(mTotalPrefixes == prefixCnt, "Lengths are inconsistent");
  137. return NS_OK;
  138. }
  139. NS_IMETHODIMP
  140. nsUrlClassifierPrefixSet::GetPrefixes(uint32_t* aCount,
  141. uint32_t** aPrefixes)
  142. {
  143. // No need to get mLock here because this function does not directly touch
  144. // the class's data members. (GetPrefixesNative() will get mLock, however.)
  145. NS_ENSURE_ARG_POINTER(aCount);
  146. *aCount = 0;
  147. NS_ENSURE_ARG_POINTER(aPrefixes);
  148. *aPrefixes = nullptr;
  149. FallibleTArray<uint32_t> prefixes;
  150. nsresult rv = GetPrefixesNative(prefixes);
  151. if (NS_FAILED(rv)) {
  152. return rv;
  153. }
  154. uint64_t itemCount = prefixes.Length();
  155. uint32_t* prefixArray = static_cast<uint32_t*>(moz_xmalloc(itemCount * sizeof(uint32_t)));
  156. NS_ENSURE_TRUE(prefixArray, NS_ERROR_OUT_OF_MEMORY);
  157. memcpy(prefixArray, prefixes.Elements(), sizeof(uint32_t) * itemCount);
  158. *aCount = itemCount;
  159. *aPrefixes = prefixArray;
  160. return NS_OK;
  161. }
  162. uint32_t nsUrlClassifierPrefixSet::BinSearch(uint32_t start,
  163. uint32_t end,
  164. uint32_t target)
  165. {
  166. mLock.AssertCurrentThreadOwns();
  167. while (start != end && end >= start) {
  168. uint32_t i = start + ((end - start) >> 1);
  169. uint32_t value = mIndexPrefixes[i];
  170. if (value < target) {
  171. start = i + 1;
  172. } else if (value > target) {
  173. end = i - 1;
  174. } else {
  175. return i;
  176. }
  177. }
  178. return end;
  179. }
  180. NS_IMETHODIMP
  181. nsUrlClassifierPrefixSet::Contains(uint32_t aPrefix, bool* aFound)
  182. {
  183. MutexAutoLock lock(mLock);
  184. *aFound = false;
  185. if (mIndexPrefixes.Length() == 0) {
  186. return NS_OK;
  187. }
  188. uint32_t target = aPrefix;
  189. // We want to do a "Price is Right" binary search, that is, we want to find
  190. // the index of the value either equal to the target or the closest value
  191. // that is less than the target.
  192. //
  193. if (target < mIndexPrefixes[0]) {
  194. return NS_OK;
  195. }
  196. // |binsearch| does not necessarily return the correct index (when the
  197. // target is not found) but rather it returns an index at least one away
  198. // from the correct index.
  199. // Because of this, we need to check if the target lies before the beginning
  200. // of the indices.
  201. uint32_t i = BinSearch(0, mIndexPrefixes.Length() - 1, target);
  202. if (mIndexPrefixes[i] > target && i > 0) {
  203. i--;
  204. }
  205. // Now search through the deltas for the target.
  206. uint32_t diff = target - mIndexPrefixes[i];
  207. uint32_t deltaSize = mIndexDeltas[i].Length();
  208. uint32_t deltaIndex = 0;
  209. while (diff > 0 && deltaIndex < deltaSize) {
  210. diff -= mIndexDeltas[i][deltaIndex];
  211. deltaIndex++;
  212. }
  213. if (diff == 0) {
  214. *aFound = true;
  215. }
  216. return NS_OK;
  217. }
  218. MOZ_DEFINE_MALLOC_SIZE_OF(UrlClassifierMallocSizeOf)
  219. NS_IMETHODIMP
  220. nsUrlClassifierPrefixSet::CollectReports(nsIHandleReportCallback* aHandleReport,
  221. nsISupports* aData, bool aAnonymize)
  222. {
  223. MOZ_ASSERT(NS_IsMainThread());
  224. // No need to get mLock here because this function does not directly touch
  225. // the class's data members. (SizeOfIncludingThis() will get mLock, however.)
  226. aHandleReport->Callback(
  227. EmptyCString(), mMemoryReportPath, KIND_HEAP, UNITS_BYTES,
  228. SizeOfIncludingThis(UrlClassifierMallocSizeOf),
  229. NS_LITERAL_CSTRING("Memory used by the prefix set for a URL classifier."),
  230. aData);
  231. return NS_OK;
  232. }
  233. size_t
  234. nsUrlClassifierPrefixSet::SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf)
  235. {
  236. MutexAutoLock lock(mLock);
  237. size_t n = 0;
  238. n += aMallocSizeOf(this);
  239. n += mIndexDeltas.ShallowSizeOfExcludingThis(aMallocSizeOf);
  240. for (uint32_t i = 0; i < mIndexDeltas.Length(); i++) {
  241. n += mIndexDeltas[i].ShallowSizeOfExcludingThis(aMallocSizeOf);
  242. }
  243. n += mIndexPrefixes.ShallowSizeOfExcludingThis(aMallocSizeOf);
  244. return n;
  245. }
  246. NS_IMETHODIMP
  247. nsUrlClassifierPrefixSet::IsEmpty(bool * aEmpty)
  248. {
  249. MutexAutoLock lock(mLock);
  250. *aEmpty = (mIndexPrefixes.Length() == 0);
  251. return NS_OK;
  252. }
  253. NS_IMETHODIMP
  254. nsUrlClassifierPrefixSet::LoadFromFile(nsIFile* aFile)
  255. {
  256. MutexAutoLock lock(mLock);
  257. nsCOMPtr<nsIInputStream> localInFile;
  258. nsresult rv = NS_NewLocalFileInputStream(getter_AddRefs(localInFile), aFile,
  259. PR_RDONLY | nsIFile::OS_READAHEAD);
  260. NS_ENSURE_SUCCESS(rv, rv);
  261. // Calculate how big the file is, make sure our read buffer isn't bigger
  262. // than the file itself which is just wasting memory.
  263. int64_t fileSize;
  264. rv = aFile->GetFileSize(&fileSize);
  265. NS_ENSURE_SUCCESS(rv, rv);
  266. if (fileSize < 0 || fileSize > UINT32_MAX) {
  267. return NS_ERROR_FAILURE;
  268. }
  269. uint32_t bufferSize = std::min<uint32_t>(static_cast<uint32_t>(fileSize),
  270. MAX_BUFFER_SIZE);
  271. // Convert to buffered stream
  272. nsCOMPtr<nsIInputStream> in = NS_BufferInputStream(localInFile, bufferSize);
  273. rv = LoadPrefixes(in);
  274. NS_ENSURE_SUCCESS(rv, rv);
  275. return NS_OK;
  276. }
  277. NS_IMETHODIMP
  278. nsUrlClassifierPrefixSet::StoreToFile(nsIFile* aFile)
  279. {
  280. MutexAutoLock lock(mLock);
  281. nsCOMPtr<nsIOutputStream> localOutFile;
  282. nsresult rv = NS_NewLocalFileOutputStream(getter_AddRefs(localOutFile), aFile,
  283. PR_WRONLY | PR_TRUNCATE | PR_CREATE_FILE);
  284. NS_ENSURE_SUCCESS(rv, rv);
  285. uint32_t fileSize;
  286. nsCOMPtr<nsIFileOutputStream> fos(do_QueryInterface(localOutFile));
  287. fileSize = CalculatePreallocateSize();
  288. // Ignore failure, the preallocation is a hint and we write out the entire
  289. // file later on
  290. Unused << fos->Preallocate(fileSize);
  291. // Convert to buffered stream
  292. nsCOMPtr<nsIOutputStream> out =
  293. NS_BufferOutputStream(localOutFile, std::min(fileSize, MAX_BUFFER_SIZE));
  294. rv = WritePrefixes(out);
  295. NS_ENSURE_SUCCESS(rv, rv);
  296. LOG(("Saving PrefixSet successful\n"));
  297. return NS_OK;
  298. }
  299. nsresult
  300. nsUrlClassifierPrefixSet::LoadPrefixes(nsIInputStream* in)
  301. {
  302. uint32_t magic;
  303. uint32_t read;
  304. nsresult rv = in->Read(reinterpret_cast<char*>(&magic), sizeof(uint32_t), &read);
  305. NS_ENSURE_SUCCESS(rv, rv);
  306. NS_ENSURE_TRUE(read == sizeof(uint32_t), NS_ERROR_FAILURE);
  307. if (magic == PREFIXSET_VERSION_MAGIC) {
  308. uint32_t indexSize;
  309. uint32_t deltaSize;
  310. rv = in->Read(reinterpret_cast<char*>(&indexSize), sizeof(uint32_t), &read);
  311. NS_ENSURE_SUCCESS(rv, rv);
  312. NS_ENSURE_TRUE(read == sizeof(uint32_t), NS_ERROR_FAILURE);
  313. rv = in->Read(reinterpret_cast<char*>(&deltaSize), sizeof(uint32_t), &read);
  314. NS_ENSURE_SUCCESS(rv, rv);
  315. NS_ENSURE_TRUE(read == sizeof(uint32_t), NS_ERROR_FAILURE);
  316. if (indexSize == 0) {
  317. LOG(("stored PrefixSet is empty!"));
  318. return NS_OK;
  319. }
  320. if (deltaSize > (indexSize * DELTAS_LIMIT)) {
  321. return NS_ERROR_FILE_CORRUPTED;
  322. }
  323. nsTArray<uint32_t> indexStarts;
  324. indexStarts.SetLength(indexSize);
  325. mIndexPrefixes.SetLength(indexSize);
  326. mIndexDeltas.SetLength(indexSize);
  327. mTotalPrefixes = indexSize;
  328. uint32_t toRead = indexSize*sizeof(uint32_t);
  329. rv = in->Read(reinterpret_cast<char*>(mIndexPrefixes.Elements()), toRead, &read);
  330. NS_ENSURE_SUCCESS(rv, rv);
  331. NS_ENSURE_TRUE(read == toRead, NS_ERROR_FAILURE);
  332. rv = in->Read(reinterpret_cast<char*>(indexStarts.Elements()), toRead, &read);
  333. NS_ENSURE_SUCCESS(rv, rv);
  334. NS_ENSURE_TRUE(read == toRead, NS_ERROR_FAILURE);
  335. if (indexSize != 0 && indexStarts[0] != 0) {
  336. return NS_ERROR_FILE_CORRUPTED;
  337. }
  338. for (uint32_t i = 0; i < indexSize; i++) {
  339. uint32_t numInDelta = i == indexSize - 1 ? deltaSize - indexStarts[i]
  340. : indexStarts[i + 1] - indexStarts[i];
  341. if (numInDelta > DELTAS_LIMIT) {
  342. return NS_ERROR_FILE_CORRUPTED;
  343. }
  344. if (numInDelta > 0) {
  345. mIndexDeltas[i].SetLength(numInDelta);
  346. mTotalPrefixes += numInDelta;
  347. toRead = numInDelta * sizeof(uint16_t);
  348. rv = in->Read(reinterpret_cast<char*>(mIndexDeltas[i].Elements()), toRead, &read);
  349. NS_ENSURE_SUCCESS(rv, rv);
  350. NS_ENSURE_TRUE(read == toRead, NS_ERROR_FAILURE);
  351. }
  352. }
  353. } else {
  354. LOG(("Version magic mismatch, not loading"));
  355. return NS_ERROR_FILE_CORRUPTED;
  356. }
  357. MOZ_ASSERT(mIndexPrefixes.Length() == mIndexDeltas.Length());
  358. LOG(("Loading PrefixSet successful"));
  359. return NS_OK;
  360. }
  361. uint32_t
  362. nsUrlClassifierPrefixSet::CalculatePreallocateSize()
  363. {
  364. uint32_t fileSize = 4 * sizeof(uint32_t);
  365. uint32_t deltas = mTotalPrefixes - mIndexPrefixes.Length();
  366. fileSize += 2 * mIndexPrefixes.Length() * sizeof(uint32_t);
  367. fileSize += deltas * sizeof(uint16_t);
  368. return fileSize;
  369. }
  370. nsresult
  371. nsUrlClassifierPrefixSet::WritePrefixes(nsIOutputStream* out)
  372. {
  373. uint32_t written;
  374. uint32_t writelen = sizeof(uint32_t);
  375. uint32_t magic = PREFIXSET_VERSION_MAGIC;
  376. nsresult rv = out->Write(reinterpret_cast<char*>(&magic), writelen, &written);
  377. NS_ENSURE_SUCCESS(rv, rv);
  378. NS_ENSURE_TRUE(written == writelen, NS_ERROR_FAILURE);
  379. uint32_t indexSize = mIndexPrefixes.Length();
  380. uint32_t indexDeltaSize = mIndexDeltas.Length();
  381. uint32_t totalDeltas = 0;
  382. // Store the shape of mIndexDeltas by noting at which "count" of total
  383. // indexes a new subarray starts. This is slightly cumbersome but keeps
  384. // file format compatibility.
  385. // If we ever update the format, we can gain space by storing the delta
  386. // subarray sizes, which fit in bytes.
  387. nsTArray<uint32_t> indexStarts;
  388. indexStarts.AppendElement(0);
  389. for (uint32_t i = 0; i < indexDeltaSize; i++) {
  390. uint32_t deltaLength = mIndexDeltas[i].Length();
  391. totalDeltas += deltaLength;
  392. indexStarts.AppendElement(totalDeltas);
  393. }
  394. rv = out->Write(reinterpret_cast<char*>(&indexSize), writelen, &written);
  395. NS_ENSURE_SUCCESS(rv, rv);
  396. NS_ENSURE_TRUE(written == writelen, NS_ERROR_FAILURE);
  397. rv = out->Write(reinterpret_cast<char*>(&totalDeltas), writelen, &written);
  398. NS_ENSURE_SUCCESS(rv, rv);
  399. NS_ENSURE_TRUE(written == writelen, NS_ERROR_FAILURE);
  400. writelen = indexSize * sizeof(uint32_t);
  401. rv = out->Write(reinterpret_cast<char*>(mIndexPrefixes.Elements()), writelen, &written);
  402. NS_ENSURE_SUCCESS(rv, rv);
  403. NS_ENSURE_TRUE(written == writelen, NS_ERROR_FAILURE);
  404. rv = out->Write(reinterpret_cast<char*>(indexStarts.Elements()), writelen, &written);
  405. NS_ENSURE_SUCCESS(rv, rv);
  406. NS_ENSURE_TRUE(written == writelen, NS_ERROR_FAILURE);
  407. if (totalDeltas > 0) {
  408. for (uint32_t i = 0; i < indexDeltaSize; i++) {
  409. writelen = mIndexDeltas[i].Length() * sizeof(uint16_t);
  410. rv = out->Write(reinterpret_cast<char*>(mIndexDeltas[i].Elements()), writelen, &written);
  411. NS_ENSURE_SUCCESS(rv, rv);
  412. NS_ENSURE_TRUE(written == writelen, NS_ERROR_FAILURE);
  413. }
  414. }
  415. LOG(("Saving PrefixSet successful\n"));
  416. return NS_OK;
  417. }