123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248 |
- //* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
- // Originally based on Chrome sources:
- // Copyright (c) 2010 The Chromium Authors. All rights reserved.
- //
- // Redistribution and use in source and binary forms, with or without
- // modification, are permitted provided that the following conditions are
- // met:
- //
- // * Redistributions of source code must retain the above copyright
- // notice, this list of conditions and the following disclaimer.
- // * Redistributions in binary form must reproduce the above
- // copyright notice, this list of conditions and the following disclaimer
- // in the documentation and/or other materials provided with the
- // distribution.
- // * Neither the name of Google Inc. nor the names of its
- // contributors may be used to endorse or promote products derived from
- // this software without specific prior written permission.
- //
- // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- #include "HashStore.h"
- #include "nsICryptoHash.h"
- #include "nsISeekableStream.h"
- #include "nsIStreamConverterService.h"
- #include "nsNetUtil.h"
- #include "nsCheckSummedOutputStream.h"
- #include "prio.h"
- #include "mozilla/Logging.h"
- #include "zlib.h"
- #include "Classifier.h"
- #include "nsUrlClassifierDBService.h"
- // Main store for SafeBrowsing protocol data. We store
- // known add/sub chunks, prefixes and completions in memory
- // during an update, and serialize to disk.
- // We do not store the add prefixes, those are retrieved by
- // decompressing the PrefixSet cache whenever we need to apply
- // an update.
- //
- // byte slicing: Many of the 4-byte values stored here are strongly
- // correlated in the upper bytes, and uncorrelated in the lower
- // bytes. Because zlib/DEFLATE requires match lengths of at least
- // 3 to achieve good compression, and we don't get those if only
- // the upper 16-bits are correlated, it is worthwhile to slice 32-bit
- // values into 4 1-byte slices and compress the slices individually.
- // The slices corresponding to MSBs will compress very well, and the
- // slice corresponding to LSB almost nothing. Because of this, we
- // only apply DEFLATE to the 3 most significant bytes, and store the
- // LSB uncompressed.
- //
- // byte sliced (numValues) data format:
- // uint32_t compressed-size
- // compressed-size bytes zlib DEFLATE data
- // 0...numValues byte MSB of 4-byte numValues data
- // uint32_t compressed-size
- // compressed-size bytes zlib DEFLATE data
- // 0...numValues byte 2nd byte of 4-byte numValues data
- // uint32_t compressed-size
- // compressed-size bytes zlib DEFLATE data
- // 0...numValues byte 3rd byte of 4-byte numValues data
- // 0...numValues byte LSB of 4-byte numValues data
- //
- // Store data format:
- // uint32_t magic
- // uint32_t version
- // uint32_t numAddChunks
- // uint32_t numSubChunks
- // uint32_t numAddPrefixes
- // uint32_t numSubPrefixes
- // uint32_t numAddCompletes
- // uint32_t numSubCompletes
- // 0...numAddChunks uint32_t addChunk
- // 0...numSubChunks uint32_t subChunk
- // byte sliced (numAddPrefixes) uint32_t add chunk of AddPrefixes
- // byte sliced (numSubPrefixes) uint32_t add chunk of SubPrefixes
- // byte sliced (numSubPrefixes) uint32_t sub chunk of SubPrefixes
- // byte sliced (numSubPrefixes) uint32_t SubPrefixes
- // 0...numAddCompletes 32-byte Completions + uint32_t addChunk
- // 0...numSubCompletes 32-byte Completions + uint32_t addChunk
- // + uint32_t subChunk
- // 16-byte MD5 of all preceding data
- // Name of the SafeBrowsing store
- #define STORE_SUFFIX ".sbstore"
- // MOZ_LOG=UrlClassifierDbService:5
- extern mozilla::LazyLogModule gUrlClassifierDbServiceLog;
- #define LOG(args) MOZ_LOG(gUrlClassifierDbServiceLog, mozilla::LogLevel::Debug, args)
- #define LOG_ENABLED() MOZ_LOG_TEST(gUrlClassifierDbServiceLog, mozilla::LogLevel::Debug)
- // Either the return was successful or we call the Reset function (unless we
- // hit an OOM). Used while reading in the store.
- #define SUCCESS_OR_RESET(res) \
- do { \
- nsresult __rv = res; /* Don't evaluate |res| more than once */ \
- if (__rv == NS_ERROR_OUT_OF_MEMORY) { \
- NS_WARNING("SafeBrowsing OOM."); \
- return __rv; \
- } \
- if (NS_FAILED(__rv)) { \
- NS_WARNING("SafeBrowsing store corrupted or out of date."); \
- Reset(); \
- return __rv; \
- } \
- } while(0)
- namespace mozilla {
- namespace safebrowsing {
- const uint32_t STORE_MAGIC = 0x1231af3b;
- const uint32_t CURRENT_VERSION = 3;
- nsresult
- TableUpdateV2::NewAddPrefix(uint32_t aAddChunk, const Prefix& aHash)
- {
- AddPrefix *add = mAddPrefixes.AppendElement(fallible);
- if (!add) return NS_ERROR_OUT_OF_MEMORY;
- add->addChunk = aAddChunk;
- add->prefix = aHash;
- return NS_OK;
- }
- nsresult
- TableUpdateV2::NewSubPrefix(uint32_t aAddChunk, const Prefix& aHash, uint32_t aSubChunk)
- {
- SubPrefix *sub = mSubPrefixes.AppendElement(fallible);
- if (!sub) return NS_ERROR_OUT_OF_MEMORY;
- sub->addChunk = aAddChunk;
- sub->prefix = aHash;
- sub->subChunk = aSubChunk;
- return NS_OK;
- }
- nsresult
- TableUpdateV2::NewAddComplete(uint32_t aAddChunk, const Completion& aHash)
- {
- AddComplete *add = mAddCompletes.AppendElement(fallible);
- if (!add) return NS_ERROR_OUT_OF_MEMORY;
- add->addChunk = aAddChunk;
- add->complete = aHash;
- return NS_OK;
- }
- nsresult
- TableUpdateV2::NewSubComplete(uint32_t aAddChunk, const Completion& aHash, uint32_t aSubChunk)
- {
- SubComplete *sub = mSubCompletes.AppendElement(fallible);
- if (!sub) return NS_ERROR_OUT_OF_MEMORY;
- sub->addChunk = aAddChunk;
- sub->complete = aHash;
- sub->subChunk = aSubChunk;
- return NS_OK;
- }
- void
- TableUpdateV4::NewPrefixes(int32_t aSize, std::string& aPrefixes)
- {
- NS_ENSURE_TRUE_VOID(aPrefixes.size() % aSize == 0);
- NS_ENSURE_TRUE_VOID(!mPrefixesMap.Get(aSize));
- if (LOG_ENABLED() && 4 == aSize) {
- int numOfPrefixes = aPrefixes.size() / 4;
- uint32_t* p = (uint32_t*)aPrefixes.c_str();
- // Dump the first/last 10 fixed-length prefixes for debugging.
- LOG(("* The first 10 (maximum) fixed-length prefixes: "));
- for (int i = 0; i < std::min(10, numOfPrefixes); i++) {
- uint8_t* c = (uint8_t*)&p[i];
- LOG(("%.2X%.2X%.2X%.2X", c[0], c[1], c[2], c[3]));
- }
- LOG(("* The last 10 (maximum) fixed-length prefixes: "));
- for (int i = std::max(0, numOfPrefixes - 10); i < numOfPrefixes; i++) {
- uint8_t* c = (uint8_t*)&p[i];
- LOG(("%.2X%.2X%.2X%.2X", c[0], c[1], c[2], c[3]));
- }
- LOG(("---- %d fixed-length prefixes in total.", aPrefixes.size() / aSize));
- }
- PrefixStdString* prefix = new PrefixStdString(aPrefixes);
- mPrefixesMap.Put(aSize, prefix);
- }
- void
- TableUpdateV4::NewRemovalIndices(const uint32_t* aIndices, size_t aNumOfIndices)
- {
- for (size_t i = 0; i < aNumOfIndices; i++) {
- mRemovalIndiceArray.AppendElement(aIndices[i]);
- }
- }
- void
- TableUpdateV4::NewChecksum(const std::string& aChecksum)
- {
- mChecksum.Assign(aChecksum.data(), aChecksum.size());
- }
- HashStore::HashStore(const nsACString& aTableName,
- const nsACString& aProvider,
- nsIFile* aRootStoreDir)
- : mTableName(aTableName)
- , mInUpdate(false)
- , mFileSize(0)
- {
- nsresult rv = Classifier::GetPrivateStoreDirectory(aRootStoreDir,
- aTableName,
- aProvider,
- getter_AddRefs(mStoreDirectory));
- if (NS_FAILED(rv)) {
- LOG(("Failed to get private store directory for %s", mTableName.get()));
- mStoreDirectory = aRootStoreDir;
- }
- }
- HashStore::~HashStore()
- {
- }
- nsresult
- HashStore::Reset()
- {
- LOG(("HashStore resetting"));
- nsCOMPtr<nsIFile> storeFile;
- nsresult rv = mStoreDirectory->Clone(getter_AddRefs(storeFile));
- NS_ENSURE_SUCCESS(rv, rv);
- rv = storeFile->AppendNative(mTableName + NS_LITERAL_CSTRING(STORE_SUFFIX));
- NS_ENSURE_SUCCESS(rv, rv);
- rv = storeFile->Remove(false);
- NS_ENSURE_SUCCESS(rv, rv);
- mFileSize = 0;
- return NS_OK;
- }
- nsresult
- HashStore::CheckChecksum(uint32_t aFileSize)
- {
- if (!mInputStream) {
- return NS_OK;
- }
- // Check for file corruption by
- // comparing the stored checksum to actual checksum of data
- nsAutoCString hash;
- nsAutoCString compareHash;
- char *data;
- uint32_t read;
- nsresult rv = CalculateChecksum(hash, aFileSize, true);
- NS_ENSURE_SUCCESS(rv, rv);
- compareHash.GetMutableData(&data, hash.Length());
- if (hash.Length() > aFileSize) {
- NS_WARNING("SafeBrowing file not long enough to store its hash");
- return NS_ERROR_FAILURE;
- }
- nsCOMPtr<nsISeekableStream> seekIn = do_QueryInterface(mInputStream);
- rv = seekIn->Seek(nsISeekableStream::NS_SEEK_SET, aFileSize - hash.Length());
- NS_ENSURE_SUCCESS(rv, rv);
- rv = mInputStream->Read(data, hash.Length(), &read);
- NS_ENSURE_SUCCESS(rv, rv);
- NS_ASSERTION(read == hash.Length(), "Could not read hash bytes");
- if (!hash.Equals(compareHash)) {
- NS_WARNING("Safebrowing file failed checksum.");
- return NS_ERROR_FAILURE;
- }
- return NS_OK;
- }
- nsresult
- HashStore::Open()
- {
- nsCOMPtr<nsIFile> storeFile;
- nsresult rv = mStoreDirectory->Clone(getter_AddRefs(storeFile));
- NS_ENSURE_SUCCESS(rv, rv);
- rv = storeFile->AppendNative(mTableName + NS_LITERAL_CSTRING(".sbstore"));
- NS_ENSURE_SUCCESS(rv, rv);
- nsCOMPtr<nsIInputStream> origStream;
- rv = NS_NewLocalFileInputStream(getter_AddRefs(origStream), storeFile,
- PR_RDONLY | nsIFile::OS_READAHEAD);
- if (rv == NS_ERROR_FILE_NOT_FOUND) {
- UpdateHeader();
- return NS_OK;
- } else {
- SUCCESS_OR_RESET(rv);
- }
- int64_t fileSize;
- rv = storeFile->GetFileSize(&fileSize);
- NS_ENSURE_SUCCESS(rv, rv);
- if (fileSize < 0 || fileSize > UINT32_MAX) {
- return NS_ERROR_FAILURE;
- }
- mFileSize = static_cast<uint32_t>(fileSize);
- mInputStream = NS_BufferInputStream(origStream, mFileSize);
- rv = ReadHeader();
- SUCCESS_OR_RESET(rv);
- rv = SanityCheck();
- SUCCESS_OR_RESET(rv);
- return NS_OK;
- }
- nsresult
- HashStore::ReadHeader()
- {
- if (!mInputStream) {
- UpdateHeader();
- return NS_OK;
- }
- nsCOMPtr<nsISeekableStream> seekable = do_QueryInterface(mInputStream);
- nsresult rv = seekable->Seek(nsISeekableStream::NS_SEEK_SET, 0);
- NS_ENSURE_SUCCESS(rv, rv);
- void *buffer = &mHeader;
- rv = NS_ReadInputStreamToBuffer(mInputStream,
- &buffer,
- sizeof(Header));
- NS_ENSURE_SUCCESS(rv, rv);
- return NS_OK;
- }
- nsresult
- HashStore::SanityCheck()
- {
- if (mHeader.magic != STORE_MAGIC || mHeader.version != CURRENT_VERSION) {
- NS_WARNING("Unexpected header data in the store.");
- return NS_ERROR_FAILURE;
- }
- return NS_OK;
- }
- nsresult
- HashStore::CalculateChecksum(nsAutoCString& aChecksum,
- uint32_t aFileSize,
- bool aChecksumPresent)
- {
- aChecksum.Truncate();
- // Reset mInputStream to start
- nsCOMPtr<nsISeekableStream> seekable = do_QueryInterface(mInputStream);
- nsresult rv = seekable->Seek(nsISeekableStream::NS_SEEK_SET, 0);
- nsCOMPtr<nsICryptoHash> hash = do_CreateInstance(NS_CRYPTO_HASH_CONTRACTID, &rv);
- NS_ENSURE_SUCCESS(rv, rv);
- // Size of MD5 hash in bytes
- const uint32_t CHECKSUM_SIZE = 16;
- // MD5 is not a secure hash function, but since this is a filesystem integrity
- // check, this usage is ok.
- rv = hash->Init(nsICryptoHash::MD5);
- NS_ENSURE_SUCCESS(rv, rv);
- if (!aChecksumPresent) {
- // Hash entire file
- rv = hash->UpdateFromStream(mInputStream, UINT32_MAX);
- } else {
- // Hash everything but last checksum bytes
- if (aFileSize < CHECKSUM_SIZE) {
- NS_WARNING("SafeBrowsing file isn't long enough to store its checksum");
- return NS_ERROR_FAILURE;
- }
- rv = hash->UpdateFromStream(mInputStream, aFileSize - CHECKSUM_SIZE);
- }
- NS_ENSURE_SUCCESS(rv, rv);
- rv = hash->Finish(false, aChecksum);
- NS_ENSURE_SUCCESS(rv, rv);
- return NS_OK;
- }
- void
- HashStore::UpdateHeader()
- {
- mHeader.magic = STORE_MAGIC;
- mHeader.version = CURRENT_VERSION;
- mHeader.numAddChunks = mAddChunks.Length();
- mHeader.numSubChunks = mSubChunks.Length();
- mHeader.numAddPrefixes = mAddPrefixes.Length();
- mHeader.numSubPrefixes = mSubPrefixes.Length();
- mHeader.numAddCompletes = mAddCompletes.Length();
- mHeader.numSubCompletes = mSubCompletes.Length();
- }
- nsresult
- HashStore::ReadChunkNumbers()
- {
- if (!mInputStream || AlreadyReadChunkNumbers()) {
- return NS_OK;
- }
- nsCOMPtr<nsISeekableStream> seekable = do_QueryInterface(mInputStream);
- nsresult rv = seekable->Seek(nsISeekableStream::NS_SEEK_SET,
- sizeof(Header));
- NS_ENSURE_SUCCESS(rv, rv);
- rv = mAddChunks.Read(mInputStream, mHeader.numAddChunks);
- NS_ENSURE_SUCCESS(rv, rv);
- NS_ASSERTION(mAddChunks.Length() == mHeader.numAddChunks, "Read the right amount of add chunks.");
- rv = mSubChunks.Read(mInputStream, mHeader.numSubChunks);
- NS_ENSURE_SUCCESS(rv, rv);
- NS_ASSERTION(mSubChunks.Length() == mHeader.numSubChunks, "Read the right amount of sub chunks.");
- return NS_OK;
- }
- nsresult
- HashStore::ReadHashes()
- {
- if (!mInputStream) {
- // BeginUpdate has been called but Open hasn't initialized mInputStream,
- // because the existing HashStore is empty.
- return NS_OK;
- }
- nsCOMPtr<nsISeekableStream> seekable = do_QueryInterface(mInputStream);
- uint32_t offset = sizeof(Header);
- offset += (mHeader.numAddChunks + mHeader.numSubChunks) * sizeof(uint32_t);
- nsresult rv = seekable->Seek(nsISeekableStream::NS_SEEK_SET, offset);
- NS_ENSURE_SUCCESS(rv, rv);
- rv = ReadAddPrefixes();
- NS_ENSURE_SUCCESS(rv, rv);
- rv = ReadSubPrefixes();
- NS_ENSURE_SUCCESS(rv, rv);
- // If completions was read before, then we are done here.
- if (AlreadyReadCompletions()) {
- return NS_OK;
- }
- rv = ReadTArray(mInputStream, &mAddCompletes, mHeader.numAddCompletes);
- NS_ENSURE_SUCCESS(rv, rv);
- rv = ReadTArray(mInputStream, &mSubCompletes, mHeader.numSubCompletes);
- NS_ENSURE_SUCCESS(rv, rv);
- return NS_OK;
- }
- nsresult
- HashStore::ReadCompletions()
- {
- if (!mInputStream || AlreadyReadCompletions()) {
- return NS_OK;
- }
- nsCOMPtr<nsIFile> storeFile;
- nsresult rv = mStoreDirectory->Clone(getter_AddRefs(storeFile));
- NS_ENSURE_SUCCESS(rv, rv);
- rv = storeFile->AppendNative(mTableName + NS_LITERAL_CSTRING(STORE_SUFFIX));
- NS_ENSURE_SUCCESS(rv, rv);
- uint32_t offset = mFileSize -
- sizeof(struct AddComplete) * mHeader.numAddCompletes -
- sizeof(struct SubComplete) * mHeader.numSubCompletes -
- nsCheckSummedOutputStream::CHECKSUM_SIZE;
- nsCOMPtr<nsISeekableStream> seekable = do_QueryInterface(mInputStream);
- rv = seekable->Seek(nsISeekableStream::NS_SEEK_SET, offset);
- NS_ENSURE_SUCCESS(rv, rv);
- rv = ReadTArray(mInputStream, &mAddCompletes, mHeader.numAddCompletes);
- NS_ENSURE_SUCCESS(rv, rv);
- rv = ReadTArray(mInputStream, &mSubCompletes, mHeader.numSubCompletes);
- NS_ENSURE_SUCCESS(rv, rv);
- return NS_OK;
- }
- nsresult
- HashStore::PrepareForUpdate()
- {
- nsresult rv = CheckChecksum(mFileSize);
- SUCCESS_OR_RESET(rv);
- rv = ReadChunkNumbers();
- SUCCESS_OR_RESET(rv);
- rv = ReadHashes();
- SUCCESS_OR_RESET(rv);
- return NS_OK;
- }
- nsresult
- HashStore::BeginUpdate()
- {
- // Check wether the file is corrupted and read the rest of the store
- // in memory.
- nsresult rv = PrepareForUpdate();
- NS_ENSURE_SUCCESS(rv, rv);
- // Close input stream, won't be needed any more and
- // we will rewrite ourselves.
- if (mInputStream) {
- rv = mInputStream->Close();
- NS_ENSURE_SUCCESS(rv, rv);
- }
- mInUpdate = true;
- return NS_OK;
- }
- template<class T>
- static nsresult
- Merge(ChunkSet* aStoreChunks,
- FallibleTArray<T>* aStorePrefixes,
- ChunkSet& aUpdateChunks,
- FallibleTArray<T>& aUpdatePrefixes,
- bool aAllowMerging = false)
- {
- EntrySort(aUpdatePrefixes);
- T* updateIter = aUpdatePrefixes.Elements();
- T* updateEnd = aUpdatePrefixes.Elements() + aUpdatePrefixes.Length();
- T* storeIter = aStorePrefixes->Elements();
- T* storeEnd = aStorePrefixes->Elements() + aStorePrefixes->Length();
- // use a separate array so we can keep the iterators valid
- // if the nsTArray grows
- nsTArray<T> adds;
- for (; updateIter != updateEnd; updateIter++) {
- // skip this chunk if we already have it, unless we're
- // merging completions, in which case we'll always already
- // have the chunk from the original prefix
- if (aStoreChunks->Has(updateIter->Chunk()))
- if (!aAllowMerging)
- continue;
- // XXX: binary search for insertion point might be faster in common
- // case?
- while (storeIter < storeEnd && (storeIter->Compare(*updateIter) < 0)) {
- // skip forward to matching element (or not...)
- storeIter++;
- }
- // no match, add
- if (storeIter == storeEnd
- || storeIter->Compare(*updateIter) != 0) {
- if (!adds.AppendElement(*updateIter))
- return NS_ERROR_OUT_OF_MEMORY;
- }
- }
- // Chunks can be empty, but we should still report we have them
- // to make the chunkranges continuous.
- aStoreChunks->Merge(aUpdateChunks);
- if (!aStorePrefixes->AppendElements(adds, fallible))
- return NS_ERROR_OUT_OF_MEMORY;
- EntrySort(*aStorePrefixes);
- return NS_OK;
- }
- nsresult
- HashStore::ApplyUpdate(TableUpdate &aUpdate)
- {
- auto updateV2 = TableUpdate::Cast<TableUpdateV2>(&aUpdate);
- NS_ENSURE_TRUE(updateV2, NS_ERROR_FAILURE);
- TableUpdateV2& update = *updateV2;
- nsresult rv = mAddExpirations.Merge(update.AddExpirations());
- NS_ENSURE_SUCCESS(rv, rv);
- rv = mSubExpirations.Merge(update.SubExpirations());
- NS_ENSURE_SUCCESS(rv, rv);
- rv = Expire();
- NS_ENSURE_SUCCESS(rv, rv);
- rv = Merge(&mAddChunks, &mAddPrefixes,
- update.AddChunks(), update.AddPrefixes());
- NS_ENSURE_SUCCESS(rv, rv);
- rv = Merge(&mAddChunks, &mAddCompletes,
- update.AddChunks(), update.AddCompletes(), true);
- NS_ENSURE_SUCCESS(rv, rv);
- rv = Merge(&mSubChunks, &mSubPrefixes,
- update.SubChunks(), update.SubPrefixes());
- NS_ENSURE_SUCCESS(rv, rv);
- rv = Merge(&mSubChunks, &mSubCompletes,
- update.SubChunks(), update.SubCompletes(), true);
- NS_ENSURE_SUCCESS(rv, rv);
- return NS_OK;
- }
- nsresult
- HashStore::Rebuild()
- {
- NS_ASSERTION(mInUpdate, "Must be in update to rebuild.");
- nsresult rv = ProcessSubs();
- NS_ENSURE_SUCCESS(rv, rv);
- UpdateHeader();
- return NS_OK;
- }
- void
- HashStore::ClearCompletes()
- {
- NS_ASSERTION(mInUpdate, "Must be in update to clear completes.");
- mAddCompletes.Clear();
- mSubCompletes.Clear();
- UpdateHeader();
- }
- template<class T>
- static void
- ExpireEntries(FallibleTArray<T>* aEntries, ChunkSet& aExpirations)
- {
- T* addIter = aEntries->Elements();
- T* end = aEntries->Elements() + aEntries->Length();
- for (T *iter = addIter; iter != end; iter++) {
- if (!aExpirations.Has(iter->Chunk())) {
- *addIter = *iter;
- addIter++;
- }
- }
- aEntries->TruncateLength(addIter - aEntries->Elements());
- }
- nsresult
- HashStore::Expire()
- {
- ExpireEntries(&mAddPrefixes, mAddExpirations);
- ExpireEntries(&mAddCompletes, mAddExpirations);
- ExpireEntries(&mSubPrefixes, mSubExpirations);
- ExpireEntries(&mSubCompletes, mSubExpirations);
- mAddChunks.Remove(mAddExpirations);
- mSubChunks.Remove(mSubExpirations);
- mAddExpirations.Clear();
- mSubExpirations.Clear();
- return NS_OK;
- }
- template<class T>
- nsresult DeflateWriteTArray(nsIOutputStream* aStream, nsTArray<T>& aIn)
- {
- uLongf insize = aIn.Length() * sizeof(T);
- uLongf outsize = compressBound(insize);
- FallibleTArray<char> outBuff;
- if (!outBuff.SetLength(outsize, fallible)) {
- return NS_ERROR_OUT_OF_MEMORY;
- }
- int zerr = compress(reinterpret_cast<Bytef*>(outBuff.Elements()),
- &outsize,
- reinterpret_cast<const Bytef*>(aIn.Elements()),
- insize);
- if (zerr != Z_OK) {
- return NS_ERROR_FAILURE;
- }
- LOG(("DeflateWriteTArray: %d in %d out", insize, outsize));
- outBuff.TruncateLength(outsize);
- // Length of compressed data stream
- uint32_t dataLen = outBuff.Length();
- uint32_t written;
- nsresult rv = aStream->Write(reinterpret_cast<char*>(&dataLen), sizeof(dataLen), &written);
- NS_ENSURE_SUCCESS(rv, rv);
- NS_ASSERTION(written == sizeof(dataLen), "Error writing deflate length");
- // Store to stream
- rv = WriteTArray(aStream, outBuff);
- NS_ENSURE_SUCCESS(rv, rv);
- return NS_OK;
- }
- template<class T>
- nsresult InflateReadTArray(nsIInputStream* aStream, FallibleTArray<T>* aOut,
- uint32_t aExpectedSize)
- {
- uint32_t inLen;
- uint32_t read;
- nsresult rv = aStream->Read(reinterpret_cast<char*>(&inLen), sizeof(inLen), &read);
- NS_ENSURE_SUCCESS(rv, rv);
- NS_ASSERTION(read == sizeof(inLen), "Error reading inflate length");
- FallibleTArray<char> inBuff;
- if (!inBuff.SetLength(inLen, fallible)) {
- return NS_ERROR_OUT_OF_MEMORY;
- }
- rv = ReadTArray(aStream, &inBuff, inLen);
- NS_ENSURE_SUCCESS(rv, rv);
- uLongf insize = inLen;
- uLongf outsize = aExpectedSize * sizeof(T);
- if (!aOut->SetLength(aExpectedSize, fallible)) {
- return NS_ERROR_OUT_OF_MEMORY;
- }
- int zerr = uncompress(reinterpret_cast<Bytef*>(aOut->Elements()),
- &outsize,
- reinterpret_cast<const Bytef*>(inBuff.Elements()),
- insize);
- if (zerr != Z_OK) {
- return NS_ERROR_FAILURE;
- }
- LOG(("InflateReadTArray: %d in %d out", insize, outsize));
- NS_ASSERTION(outsize == aExpectedSize * sizeof(T), "Decompression size mismatch");
- return NS_OK;
- }
- static nsresult
- ByteSliceWrite(nsIOutputStream* aOut, nsTArray<uint32_t>& aData)
- {
- nsTArray<uint8_t> slice;
- uint32_t count = aData.Length();
- // Only process one slice at a time to avoid using too much memory.
- if (!slice.SetLength(count, fallible)) {
- return NS_ERROR_OUT_OF_MEMORY;
- }
- // Process slice 1.
- for (uint32_t i = 0; i < count; i++) {
- slice[i] = (aData[i] >> 24);
- }
- nsresult rv = DeflateWriteTArray(aOut, slice);
- NS_ENSURE_SUCCESS(rv, rv);
- // Process slice 2.
- for (uint32_t i = 0; i < count; i++) {
- slice[i] = ((aData[i] >> 16) & 0xFF);
- }
- rv = DeflateWriteTArray(aOut, slice);
- NS_ENSURE_SUCCESS(rv, rv);
- // Process slice 3.
- for (uint32_t i = 0; i < count; i++) {
- slice[i] = ((aData[i] >> 8) & 0xFF);
- }
- rv = DeflateWriteTArray(aOut, slice);
- NS_ENSURE_SUCCESS(rv, rv);
- // Process slice 4.
- for (uint32_t i = 0; i < count; i++) {
- slice[i] = (aData[i] & 0xFF);
- }
- // The LSB slice is generally uncompressible, don't bother
- // compressing it.
- rv = WriteTArray(aOut, slice);
- NS_ENSURE_SUCCESS(rv, rv);
- return NS_OK;
- }
- static nsresult
- ByteSliceRead(nsIInputStream* aInStream, FallibleTArray<uint32_t>* aData, uint32_t count)
- {
- FallibleTArray<uint8_t> slice1;
- FallibleTArray<uint8_t> slice2;
- FallibleTArray<uint8_t> slice3;
- FallibleTArray<uint8_t> slice4;
- nsresult rv = InflateReadTArray(aInStream, &slice1, count);
- NS_ENSURE_SUCCESS(rv, rv);
- rv = InflateReadTArray(aInStream, &slice2, count);
- NS_ENSURE_SUCCESS(rv, rv);
- rv = InflateReadTArray(aInStream, &slice3, count);
- NS_ENSURE_SUCCESS(rv, rv);
- rv = ReadTArray(aInStream, &slice4, count);
- NS_ENSURE_SUCCESS(rv, rv);
- if (!aData->SetCapacity(count, fallible)) {
- return NS_ERROR_OUT_OF_MEMORY;
- }
- for (uint32_t i = 0; i < count; i++) {
- aData->AppendElement((slice1[i] << 24) |
- (slice2[i] << 16) |
- (slice3[i] << 8) |
- (slice4[i]),
- fallible);
- }
- return NS_OK;
- }
- nsresult
- HashStore::ReadAddPrefixes()
- {
- FallibleTArray<uint32_t> chunks;
- uint32_t count = mHeader.numAddPrefixes;
- nsresult rv = ByteSliceRead(mInputStream, &chunks, count);
- NS_ENSURE_SUCCESS(rv, rv);
- if (!mAddPrefixes.SetCapacity(count, fallible)) {
- return NS_ERROR_OUT_OF_MEMORY;
- }
- for (uint32_t i = 0; i < count; i++) {
- AddPrefix *add = mAddPrefixes.AppendElement(fallible);
- add->prefix.FromUint32(0);
- add->addChunk = chunks[i];
- }
- return NS_OK;
- }
- nsresult
- HashStore::ReadSubPrefixes()
- {
- FallibleTArray<uint32_t> addchunks;
- FallibleTArray<uint32_t> subchunks;
- FallibleTArray<uint32_t> prefixes;
- uint32_t count = mHeader.numSubPrefixes;
- nsresult rv = ByteSliceRead(mInputStream, &addchunks, count);
- NS_ENSURE_SUCCESS(rv, rv);
- rv = ByteSliceRead(mInputStream, &subchunks, count);
- NS_ENSURE_SUCCESS(rv, rv);
- rv = ByteSliceRead(mInputStream, &prefixes, count);
- NS_ENSURE_SUCCESS(rv, rv);
- if (!mSubPrefixes.SetCapacity(count, fallible)) {
- return NS_ERROR_OUT_OF_MEMORY;
- }
- for (uint32_t i = 0; i < count; i++) {
- SubPrefix *sub = mSubPrefixes.AppendElement(fallible);
- sub->addChunk = addchunks[i];
- sub->prefix.FromUint32(prefixes[i]);
- sub->subChunk = subchunks[i];
- }
- return NS_OK;
- }
- // Split up PrefixArray back into the constituents
- nsresult
- HashStore::WriteAddPrefixes(nsIOutputStream* aOut)
- {
- nsTArray<uint32_t> chunks;
- uint32_t count = mAddPrefixes.Length();
- if (!chunks.SetCapacity(count, fallible)) {
- return NS_ERROR_OUT_OF_MEMORY;
- }
- for (uint32_t i = 0; i < count; i++) {
- chunks.AppendElement(mAddPrefixes[i].Chunk());
- }
- nsresult rv = ByteSliceWrite(aOut, chunks);
- NS_ENSURE_SUCCESS(rv, rv);
- return NS_OK;
- }
- nsresult
- HashStore::WriteSubPrefixes(nsIOutputStream* aOut)
- {
- nsTArray<uint32_t> addchunks;
- nsTArray<uint32_t> subchunks;
- nsTArray<uint32_t> prefixes;
- uint32_t count = mSubPrefixes.Length();
- addchunks.SetCapacity(count);
- subchunks.SetCapacity(count);
- prefixes.SetCapacity(count);
- for (uint32_t i = 0; i < count; i++) {
- addchunks.AppendElement(mSubPrefixes[i].AddChunk());
- prefixes.AppendElement(mSubPrefixes[i].PrefixHash().ToUint32());
- subchunks.AppendElement(mSubPrefixes[i].Chunk());
- }
- nsresult rv = ByteSliceWrite(aOut, addchunks);
- NS_ENSURE_SUCCESS(rv, rv);
- rv = ByteSliceWrite(aOut, subchunks);
- NS_ENSURE_SUCCESS(rv, rv);
- rv = ByteSliceWrite(aOut, prefixes);
- NS_ENSURE_SUCCESS(rv, rv);
- return NS_OK;
- }
- nsresult
- HashStore::WriteFile()
- {
- NS_ASSERTION(mInUpdate, "Must be in update to write database.");
- if (nsUrlClassifierDBService::ShutdownHasStarted()) {
- return NS_ERROR_ABORT;
- }
- nsCOMPtr<nsIFile> storeFile;
- nsresult rv = mStoreDirectory->Clone(getter_AddRefs(storeFile));
- NS_ENSURE_SUCCESS(rv, rv);
- rv = storeFile->AppendNative(mTableName + NS_LITERAL_CSTRING(".sbstore"));
- NS_ENSURE_SUCCESS(rv, rv);
- nsCOMPtr<nsIOutputStream> out;
- rv = NS_NewCheckSummedOutputStream(getter_AddRefs(out), storeFile);
- NS_ENSURE_SUCCESS(rv, rv);
- uint32_t written;
- rv = out->Write(reinterpret_cast<char*>(&mHeader), sizeof(mHeader), &written);
- NS_ENSURE_SUCCESS(rv, rv);
- // Write chunk numbers.
- rv = mAddChunks.Write(out);
- NS_ENSURE_SUCCESS(rv, rv);
- rv = mSubChunks.Write(out);
- NS_ENSURE_SUCCESS(rv, rv);
- // Write hashes.
- rv = WriteAddPrefixes(out);
- NS_ENSURE_SUCCESS(rv, rv);
- rv = WriteSubPrefixes(out);
- NS_ENSURE_SUCCESS(rv, rv);
- rv = WriteTArray(out, mAddCompletes);
- NS_ENSURE_SUCCESS(rv, rv);
- rv = WriteTArray(out, mSubCompletes);
- NS_ENSURE_SUCCESS(rv, rv);
- nsCOMPtr<nsISafeOutputStream> safeOut = do_QueryInterface(out, &rv);
- NS_ENSURE_SUCCESS(rv, rv);
- rv = safeOut->Finish();
- NS_ENSURE_SUCCESS(rv, rv);
- return NS_OK;
- }
- template <class T>
- static void
- Erase(FallibleTArray<T>* array, T* iterStart, T* iterEnd)
- {
- uint32_t start = iterStart - array->Elements();
- uint32_t count = iterEnd - iterStart;
- if (count > 0) {
- array->RemoveElementsAt(start, count);
- }
- }
- // Find items matching between |subs| and |adds|, and remove them,
- // recording the item from |adds| in |adds_removed|. To minimize
- // copies, the inputs are processing in parallel, so |subs| and |adds|
- // should be compatibly ordered (either by SBAddPrefixLess or
- // SBAddPrefixHashLess).
- //
- // |predAS| provides add < sub, |predSA| provides sub < add, for the
- // tightest compare appropriate (see calls in SBProcessSubs).
- template<class TSub, class TAdd>
- static void
- KnockoutSubs(FallibleTArray<TSub>* aSubs, FallibleTArray<TAdd>* aAdds)
- {
- // Keep a pair of output iterators for writing kept items. Due to
- // deletions, these may lag the main iterators. Using erase() on
- // individual items would result in O(N^2) copies. Using a list
- // would work around that, at double or triple the memory cost.
- TAdd* addOut = aAdds->Elements();
- TAdd* addIter = aAdds->Elements();
- TSub* subOut = aSubs->Elements();
- TSub* subIter = aSubs->Elements();
- TAdd* addEnd = addIter + aAdds->Length();
- TSub* subEnd = subIter + aSubs->Length();
- while (addIter != addEnd && subIter != subEnd) {
- // additer compare, so it compares on add chunk
- int32_t cmp = addIter->Compare(*subIter);
- if (cmp > 0) {
- // If |*sub_iter| < |*add_iter|, retain the sub.
- *subOut = *subIter;
- ++subOut;
- ++subIter;
- } else if (cmp < 0) {
- // If |*add_iter| < |*sub_iter|, retain the add.
- *addOut = *addIter;
- ++addOut;
- ++addIter;
- } else {
- // Drop equal items
- ++addIter;
- ++subIter;
- }
- }
- Erase(aAdds, addOut, addIter);
- Erase(aSubs, subOut, subIter);
- }
- // Remove items in |removes| from |fullHashes|. |fullHashes| and
- // |removes| should be ordered by SBAddPrefix component.
- template <class T>
- static void
- RemoveMatchingPrefixes(const SubPrefixArray& aSubs, FallibleTArray<T>* aFullHashes)
- {
- // Where to store kept items.
- T* out = aFullHashes->Elements();
- T* hashIter = out;
- T* hashEnd = aFullHashes->Elements() + aFullHashes->Length();
- SubPrefix const * removeIter = aSubs.Elements();
- SubPrefix const * removeEnd = aSubs.Elements() + aSubs.Length();
- while (hashIter != hashEnd && removeIter != removeEnd) {
- int32_t cmp = removeIter->CompareAlt(*hashIter);
- if (cmp > 0) {
- // Keep items less than |*removeIter|.
- *out = *hashIter;
- ++out;
- ++hashIter;
- } else if (cmp < 0) {
- // No hit for |*removeIter|, bump it forward.
- ++removeIter;
- } else {
- // Drop equal items, there may be multiple hits.
- do {
- ++hashIter;
- } while (hashIter != hashEnd &&
- !(removeIter->CompareAlt(*hashIter) < 0));
- ++removeIter;
- }
- }
- Erase(aFullHashes, out, hashIter);
- }
- static void
- RemoveDeadSubPrefixes(SubPrefixArray& aSubs, ChunkSet& aAddChunks)
- {
- SubPrefix * subIter = aSubs.Elements();
- SubPrefix * subEnd = aSubs.Elements() + aSubs.Length();
- for (SubPrefix * iter = subIter; iter != subEnd; iter++) {
- bool hasChunk = aAddChunks.Has(iter->AddChunk());
- // Keep the subprefix if the chunk it refers to is one
- // we haven't seen it yet.
- if (!hasChunk) {
- *subIter = *iter;
- subIter++;
- }
- }
- LOG(("Removed %u dead SubPrefix entries.", subEnd - subIter));
- aSubs.TruncateLength(subIter - aSubs.Elements());
- }
- #ifdef DEBUG
- template <class T>
- static void EnsureSorted(FallibleTArray<T>* aArray)
- {
- T* start = aArray->Elements();
- T* end = aArray->Elements() + aArray->Length();
- T* iter = start;
- T* previous = start;
- while (iter != end) {
- previous = iter;
- ++iter;
- if (iter != end) {
- MOZ_ASSERT(iter->Compare(*previous) >= 0);
- }
- }
- return;
- }
- #endif
- nsresult
- HashStore::ProcessSubs()
- {
- #ifdef DEBUG
- EnsureSorted(&mAddPrefixes);
- EnsureSorted(&mSubPrefixes);
- EnsureSorted(&mAddCompletes);
- EnsureSorted(&mSubCompletes);
- LOG(("All databases seem to have a consistent sort order."));
- #endif
- RemoveMatchingPrefixes(mSubPrefixes, &mAddCompletes);
- RemoveMatchingPrefixes(mSubPrefixes, &mSubCompletes);
- // Remove any remaining subbed prefixes from both addprefixes
- // and addcompletes.
- KnockoutSubs(&mSubPrefixes, &mAddPrefixes);
- KnockoutSubs(&mSubCompletes, &mAddCompletes);
- // Remove any remaining subprefixes referring to addchunks that
- // we have (and hence have been processed above).
- RemoveDeadSubPrefixes(mSubPrefixes, mAddChunks);
- #ifdef DEBUG
- EnsureSorted(&mAddPrefixes);
- EnsureSorted(&mSubPrefixes);
- EnsureSorted(&mAddCompletes);
- EnsureSorted(&mSubCompletes);
- LOG(("All databases seem to have a consistent sort order."));
- #endif
- return NS_OK;
- }
- nsresult
- HashStore::AugmentAdds(const nsTArray<uint32_t>& aPrefixes)
- {
- uint32_t cnt = aPrefixes.Length();
- if (cnt != mAddPrefixes.Length()) {
- LOG(("Amount of prefixes in cache not consistent with store (%d vs %d)",
- aPrefixes.Length(), mAddPrefixes.Length()));
- return NS_ERROR_FAILURE;
- }
- for (uint32_t i = 0; i < cnt; i++) {
- mAddPrefixes[i].prefix.FromUint32(aPrefixes[i]);
- }
- return NS_OK;
- }
- ChunkSet&
- HashStore::AddChunks()
- {
- ReadChunkNumbers();
- return mAddChunks;
- }
- ChunkSet&
- HashStore::SubChunks()
- {
- ReadChunkNumbers();
- return mSubChunks;
- }
- AddCompleteArray&
- HashStore::AddCompletes()
- {
- ReadCompletions();
- return mAddCompletes;
- }
- SubCompleteArray&
- HashStore::SubCompletes()
- {
- ReadCompletions();
- return mSubCompletes;
- }
- bool
- HashStore::AlreadyReadChunkNumbers()
- {
- // If there are chunks but chunk set not yet contains any data
- // Then we haven't read chunk numbers.
- if ((mHeader.numAddChunks != 0 && mAddChunks.Length() == 0) ||
- (mHeader.numSubChunks != 0 && mSubChunks.Length() == 0)) {
- return false;
- }
- return true;
- }
- bool
- HashStore::AlreadyReadCompletions()
- {
- // If there are completions but completion set not yet contains any data
- // Then we haven't read completions.
- if ((mHeader.numAddCompletes != 0 && mAddCompletes.Length() == 0) ||
- (mHeader.numSubCompletes != 0 && mSubCompletes.Length() == 0)) {
- return false;
- }
- return true;
- }
- } // namespace safebrowsing
- } // namespace mozilla
|