/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "LookupCache.h" #include "HashStore.h" #include "mozilla/ArrayUtils.h" #include "mozilla/Telemetry.h" #include "mozilla/Logging.h" #include "nsNetUtil.h" #include "prprf.h" #include "Classifier.h" #include "nsUrlClassifierInfo.h" // We act as the main entry point for all the real lookups, // so note that those are not done to the actual HashStore. // The latter solely exists to store the data needed to handle // the updates from the protocol. // This module provides a front for PrefixSet, mUpdateCompletions, // and mGetHashCache, which together contain everything needed to // provide a classification as long as the data is up to date. // PrefixSet stores and provides lookups for 4-byte prefixes. // mUpdateCompletions contains 32-byte completions which were // contained in updates. They are retrieved from HashStore/.sbtore // on startup. // mGetHashCache contains 32-byte completions which were // returned from the gethash server. They are not serialized, // only cached until the next update. #define V2_CACHE_DURATION_SEC (15 * 60) // MOZ_LOG=UrlClassifierDbService:5 extern mozilla::LazyLogModule gUrlClassifierDbServiceLog; #define LOG(args) \ MOZ_LOG(gUrlClassifierDbServiceLog, mozilla::LogLevel::Debug, args) #define LOG_ENABLED() \ MOZ_LOG_TEST(gUrlClassifierDbServiceLog, mozilla::LogLevel::Debug) namespace mozilla { namespace safebrowsing { const uint32_t LookupCache::MAX_BUFFER_SIZE = 64 * 1024; const int CacheResultV2::VER = CacheResult::V2; const int CacheResultV4::VER = CacheResult::V4; const int LookupCacheV2::VER = 2; static void CStringToHexString(const nsACString& aIn, nsACString& aOut) { static const char* const lut = "0123456789ABCDEF"; size_t len = aIn.Length(); MOZ_ASSERT(len <= COMPLETE_SIZE); aOut.SetCapacity(2 * len); for (size_t i = 0; i < aIn.Length(); ++i) { const char c = static_cast(aIn[i]); aOut.Append(lut[(c >> 4) & 0x0F]); aOut.Append(lut[c & 15]); } } LookupCache::LookupCache(const nsACString& aTableName, const nsACString& aProvider, nsCOMPtr& aRootStoreDir) : mPrimed(false), mTableName(aTableName), mProvider(aProvider), mRootStoreDirectory(aRootStoreDir) { UpdateRootDirHandle(mRootStoreDirectory); } nsresult LookupCache::Open() { LOG(("Loading PrefixSet for %s", mTableName.get())); nsresult rv = LoadPrefixSet(); NS_ENSURE_SUCCESS(rv, rv); return NS_OK; } nsresult LookupCache::UpdateRootDirHandle( nsCOMPtr& aNewRootStoreDirectory) { nsresult rv; if (aNewRootStoreDirectory != mRootStoreDirectory) { rv = aNewRootStoreDirectory->Clone(getter_AddRefs(mRootStoreDirectory)); NS_ENSURE_SUCCESS(rv, rv); } rv = Classifier::GetPrivateStoreDirectory(mRootStoreDirectory, mTableName, mProvider, getter_AddRefs(mStoreDirectory)); if (NS_FAILED(rv)) { LOG(("Failed to get private store directory for %s", mTableName.get())); mStoreDirectory = mRootStoreDirectory; } if (LOG_ENABLED()) { nsString path; mStoreDirectory->GetPath(path); LOG(("Private store directory for %s is %s", mTableName.get(), NS_ConvertUTF16toUTF8(path).get())); } return rv; } nsresult LookupCache::WriteFile() { if (nsUrlClassifierDBService::ShutdownHasStarted()) { return NS_ERROR_ABORT; } nsCOMPtr psFile; nsresult rv = mStoreDirectory->Clone(getter_AddRefs(psFile)); NS_ENSURE_SUCCESS(rv, rv); rv = psFile->AppendNative(mTableName + GetPrefixSetSuffix()); NS_ENSURE_SUCCESS(rv, rv); rv = StoreToFile(psFile); NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "failed to store the prefixset"); return NS_OK; } nsresult LookupCache::CheckCache(const Completion& aCompletion, bool* aHas, bool* aConfirmed) { // Shouldn't call this function if prefix is not in the database. MOZ_ASSERT(*aHas); *aConfirmed = false; uint32_t prefix = aCompletion.ToUint32(); CachedFullHashResponse* fullHashResponse = mFullHashCache.Get(prefix); if (!fullHashResponse) { return NS_OK; } int64_t nowSec = PR_Now() / PR_USEC_PER_SEC; int64_t expiryTimeSec; FullHashExpiryCache& fullHashes = fullHashResponse->fullHashes; nsDependentCSubstring completion( reinterpret_cast(aCompletion.buf), COMPLETE_SIZE); // Check if we can find the fullhash in positive cache if (fullHashes.Get(completion, &expiryTimeSec)) { if (nowSec <= expiryTimeSec) { // Url is NOT safe. *aConfirmed = true; LOG(("Found a valid fullhash in the positive cache")); } else { // Trigger a gethash request in this case(aConfirmed is false). LOG(("Found an expired fullhash in the positive cache")); // Remove fullhash entry from the cache when the negative cache // is also expired because whether or not the fullhash is cached // locally, we will need to consult the server next time we // lookup this hash. We may as well remove it from our cache. if (fullHashResponse->negativeCacheExpirySec < expiryTimeSec) { fullHashes.Remove(completion); if (fullHashes.Count() == 0 && fullHashResponse->negativeCacheExpirySec < nowSec) { mFullHashCache.Remove(prefix); } } } return NS_OK; } // Check negative cache. if (fullHashResponse->negativeCacheExpirySec >= nowSec) { // Url is safe. LOG(("Found a valid prefix in the negative cache")); *aHas = false; } else { LOG(("Found an expired prefix in the negative cache")); if (fullHashes.Count() == 0) { mFullHashCache.Remove(prefix); } } return NS_OK; } // This function remove cache entries whose negative cache time is expired. // It is possible that a cache entry whose positive cache time is not yet // expired but still being removed after calling this API. Right now we call // this on every update. void LookupCache::InvalidateExpiredCacheEntries() { int64_t nowSec = PR_Now() / PR_USEC_PER_SEC; for (auto iter = mFullHashCache.Iter(); !iter.Done(); iter.Next()) { CachedFullHashResponse* response = iter.UserData(); if (response->negativeCacheExpirySec < nowSec) { iter.Remove(); } } } void LookupCache::CopyFullHashCache(const LookupCache* aSource) { if (!aSource) { return; } CopyClassHashTable(aSource->mFullHashCache, mFullHashCache); } void LookupCache::ClearCache() { mFullHashCache.Clear(); } void LookupCache::ClearAll() { ClearCache(); ClearPrefixes(); mPrimed = false; } void LookupCache::GetCacheInfo(nsIUrlClassifierCacheInfo** aCache) const { MOZ_ASSERT(aCache); RefPtr info = new nsUrlClassifierCacheInfo; info->table = mTableName; for (auto iter = mFullHashCache.ConstIter(); !iter.Done(); iter.Next()) { RefPtr entry = new nsUrlClassifierCacheEntry; // Set prefix of the cache entry. nsAutoCString prefix(reinterpret_cast(&iter.Key()), PREFIX_SIZE); CStringToHexString(prefix, entry->prefix); // Set expiry of the cache entry. CachedFullHashResponse* response = iter.UserData(); entry->expirySec = response->negativeCacheExpirySec; // Set positive cache. FullHashExpiryCache& fullHashes = response->fullHashes; for (auto iter2 = fullHashes.ConstIter(); !iter2.Done(); iter2.Next()) { RefPtr match = new nsUrlClassifierPositiveCacheEntry; // Set fullhash of positive cache entry. CStringToHexString(iter2.Key(), match->fullhash); // Set expiry of positive cache entry. match->expirySec = iter2.Data(); entry->matches.AppendElement( static_cast(match)); } info->entries.AppendElement( static_cast(entry)); } info.forget(aCache); } /* static */ bool LookupCache::IsCanonicalizedIP(const nsACString& aHost) { // The canonicalization process will have left IP addresses in dotted // decimal with no surprises. uint32_t i1, i2, i3, i4; char c; if (PR_sscanf(PromiseFlatCString(aHost).get(), "%u.%u.%u.%u%c", &i1, &i2, &i3, &i4, &c) == 4) { return (i1 <= 0xFF && i2 <= 0xFF && i3 <= 0xFF && i4 <= 0xFF); } return false; } /* static */ nsresult LookupCache::GetLookupFragments(const nsACString& aSpec, nsTArray* aFragments) { aFragments->Clear(); nsACString::const_iterator begin, end, iter; aSpec.BeginReading(begin); aSpec.EndReading(end); iter = begin; if (!FindCharInReadable('/', iter, end)) { return NS_OK; } const nsACString& host = Substring(begin, iter++); nsAutoCString path; path.Assign(Substring(iter, end)); /** * From the protocol doc: * For the hostname, the client will try at most 5 different strings. They * are: * a) The exact hostname of the url * b) The 4 hostnames formed by starting with the last 5 components and * successivly removing the leading component. The top-level component * can be skipped. This is not done if the hostname is a numerical IP. */ nsTArray hosts; hosts.AppendElement(host); if (!IsCanonicalizedIP(host)) { host.BeginReading(begin); host.EndReading(end); int numHostComponents = 0; while (RFindInReadable(NS_LITERAL_CSTRING("."), begin, end) && numHostComponents < MAX_HOST_COMPONENTS) { // don't bother checking toplevel domains if (++numHostComponents >= 2) { host.EndReading(iter); hosts.AppendElement(Substring(end, iter)); } end = begin; host.BeginReading(begin); } } /** * From the protocol doc: * For the path, the client will also try at most 6 different strings. * They are: * a) the exact path of the url, including query parameters * b) the exact path of the url, without query parameters * c) the 4 paths formed by starting at the root (/) and * successively appending path components, including a trailing * slash. This behavior should only extend up to the next-to-last * path component, that is, a trailing slash should never be * appended that was not present in the original url. */ nsTArray paths; nsAutoCString pathToAdd; path.BeginReading(begin); path.EndReading(end); iter = begin; if (FindCharInReadable('?', iter, end)) { pathToAdd = Substring(begin, iter); paths.AppendElement(pathToAdd); end = iter; } int numPathComponents = 1; iter = begin; while (FindCharInReadable('/', iter, end) && numPathComponents < MAX_PATH_COMPONENTS) { iter++; pathToAdd.Assign(Substring(begin, iter)); paths.AppendElement(pathToAdd); numPathComponents++; } // If we haven't already done so, add the full path if (!pathToAdd.Equals(path)) { paths.AppendElement(path); } // Check an empty path (for whole-domain blacklist entries) paths.AppendElement(EmptyCString()); for (uint32_t hostIndex = 0; hostIndex < hosts.Length(); hostIndex++) { for (uint32_t pathIndex = 0; pathIndex < paths.Length(); pathIndex++) { nsCString key; key.Assign(hosts[hostIndex]); key.Append('/'); key.Append(paths[pathIndex]); aFragments->AppendElement(key); } } return NS_OK; } /* static */ nsresult LookupCache::GetHostKeys(const nsACString& aSpec, nsTArray* aHostKeys) { nsACString::const_iterator begin, end, iter; aSpec.BeginReading(begin); aSpec.EndReading(end); iter = begin; if (!FindCharInReadable('/', iter, end)) { return NS_OK; } const nsACString& host = Substring(begin, iter); if (IsCanonicalizedIP(host)) { nsCString* key = aHostKeys->AppendElement(); if (!key) return NS_ERROR_OUT_OF_MEMORY; key->Assign(host); key->AppendLiteral("/"); return NS_OK; } nsTArray hostComponents; ParseString(PromiseFlatCString(host), '.', hostComponents); if (hostComponents.Length() < 2) { // no host or toplevel host, this won't match anything in the db return NS_OK; } // First check with two domain components int32_t last = int32_t(hostComponents.Length()) - 1; nsCString* lookupHost = aHostKeys->AppendElement(); if (!lookupHost) return NS_ERROR_OUT_OF_MEMORY; lookupHost->Assign(hostComponents[last - 1]); lookupHost->AppendLiteral("."); lookupHost->Append(hostComponents[last]); lookupHost->AppendLiteral("/"); // Now check with three domain components if (hostComponents.Length() > 2) { nsCString* lookupHost2 = aHostKeys->AppendElement(); if (!lookupHost2) return NS_ERROR_OUT_OF_MEMORY; lookupHost2->Assign(hostComponents[last - 2]); lookupHost2->AppendLiteral("."); lookupHost2->Append(*lookupHost); } return NS_OK; } nsresult LookupCache::LoadPrefixSet() { nsCOMPtr psFile; nsresult rv = mStoreDirectory->Clone(getter_AddRefs(psFile)); NS_ENSURE_SUCCESS(rv, rv); rv = psFile->AppendNative(mTableName + GetPrefixSetSuffix()); NS_ENSURE_SUCCESS(rv, rv); bool exists; rv = psFile->Exists(&exists); NS_ENSURE_SUCCESS(rv, rv); if (exists) { LOG(("stored PrefixSet exists, loading from disk")); rv = LoadFromFile(psFile); if (NS_FAILED(rv)) { return rv; } mPrimed = true; } else { // The only scenario we load the old .pset file is when we haven't received // a SafeBrowsng update before. After receiving an update, new .vlpset will // be stored while old .pset will be removed. if (NS_SUCCEEDED(LoadLegacyFile())) { mPrimed = true; } else { LOG(("no (usable) stored PrefixSet found")); } } #ifdef DEBUG if (mPrimed) { uint32_t size = SizeOfPrefixSet(); LOG(("SB tree done, size = %d bytes\n", size)); } #endif return NS_OK; } #if defined(DEBUG) static nsCString GetFormattedTimeString(int64_t aCurTimeSec) { PRExplodedTime pret; PR_ExplodeTime(aCurTimeSec * PR_USEC_PER_SEC, PR_GMTParameters, &pret); return nsPrintfCString("%04d-%02d-%02d %02d:%02d:%02d UTC", pret.tm_year, pret.tm_month + 1, pret.tm_mday, pret.tm_hour, pret.tm_min, pret.tm_sec); } void LookupCache::DumpCache() const { if (!LOG_ENABLED()) { return; } for (auto iter = mFullHashCache.ConstIter(); !iter.Done(); iter.Next()) { CachedFullHashResponse* response = iter.UserData(); nsAutoCString prefix; CStringToHexString( nsCString(reinterpret_cast(&iter.Key()), PREFIX_SIZE), prefix); LOG(("Cache prefix(%s): %s, Expiry: %s", mTableName.get(), prefix.get(), GetFormattedTimeString(response->negativeCacheExpirySec).get())); FullHashExpiryCache& fullHashes = response->fullHashes; for (auto iter2 = fullHashes.ConstIter(); !iter2.Done(); iter2.Next()) { nsAutoCString fullhash; CStringToHexString(iter2.Key(), fullhash); LOG((" - %s, Expiry: %s", fullhash.get(), GetFormattedTimeString(iter2.Data()).get())); } } } #endif nsresult LookupCacheV2::Init() { mPrefixSet = new nsUrlClassifierPrefixSet(); nsresult rv = mPrefixSet->Init(mTableName); NS_ENSURE_SUCCESS(rv, rv); return NS_OK; } nsresult LookupCacheV2::Open() { nsresult rv = LookupCache::Open(); NS_ENSURE_SUCCESS(rv, rv); LOG(("Reading Completions")); rv = ReadCompletions(); NS_ENSURE_SUCCESS(rv, rv); return NS_OK; } void LookupCacheV2::ClearAll() { LookupCache::ClearAll(); mUpdateCompletions.Clear(); } nsresult LookupCacheV2::Has(const Completion& aCompletion, bool* aHas, uint32_t* aMatchLength, bool* aConfirmed) { *aHas = *aConfirmed = false; *aMatchLength = 0; uint32_t prefix = aCompletion.ToUint32(); bool found; nsresult rv = mPrefixSet->Contains(prefix, &found); NS_ENSURE_SUCCESS(rv, rv); if (found) { *aHas = true; *aMatchLength = PREFIX_SIZE; } else if (mUpdateCompletions.ContainsSorted(aCompletion)) { // Completions is found in database, confirm the result *aHas = true; *aMatchLength = COMPLETE_SIZE; *aConfirmed = true; } if (*aHas && !(*aConfirmed)) { rv = CheckCache(aCompletion, aHas, aConfirmed); } return rv; } bool LookupCacheV2::IsEmpty() const { bool isEmpty; mPrefixSet->IsEmpty(&isEmpty); return isEmpty; } nsresult LookupCacheV2::Build(AddPrefixArray& aAddPrefixes, AddCompleteArray& aAddCompletes) { Telemetry::Accumulate(Telemetry::URLCLASSIFIER_LC_COMPLETIONS, static_cast(aAddCompletes.Length())); mUpdateCompletions.Clear(); if (!mUpdateCompletions.SetCapacity(aAddCompletes.Length(), fallible)) { return NS_ERROR_OUT_OF_MEMORY; } for (uint32_t i = 0; i < aAddCompletes.Length(); i++) { mUpdateCompletions.AppendElement(aAddCompletes[i].CompleteHash()); } aAddCompletes.Clear(); mUpdateCompletions.Sort(); Telemetry::Accumulate(Telemetry::URLCLASSIFIER_LC_PREFIXES, static_cast(aAddPrefixes.Length())); nsresult rv = ConstructPrefixSet(aAddPrefixes); NS_ENSURE_SUCCESS(rv, rv); return NS_OK; } nsresult LookupCacheV2::GetPrefixes(FallibleTArray& aAddPrefixes) { if (!mPrimed) { // This can happen if its a new table, so no error. LOG(("GetPrefixes from empty LookupCache")); return NS_OK; } return mPrefixSet->GetPrefixesNative(aAddPrefixes); } void LookupCacheV2::AddGethashResultToCache( const AddCompleteArray& aAddCompletes, const MissPrefixArray& aMissPrefixes, int64_t aExpirySec) { int64_t defaultExpirySec = PR_Now() / PR_USEC_PER_SEC + V2_CACHE_DURATION_SEC; if (aExpirySec != 0) { defaultExpirySec = aExpirySec; } for (const AddComplete& add : aAddCompletes) { nsDependentCSubstring fullhash( reinterpret_cast(add.CompleteHash().buf), COMPLETE_SIZE); CachedFullHashResponse* response = mFullHashCache.LookupOrAdd(add.ToUint32()); response->negativeCacheExpirySec = defaultExpirySec; FullHashExpiryCache& fullHashes = response->fullHashes; fullHashes.Put(fullhash, defaultExpirySec); } for (const Prefix& prefix : aMissPrefixes) { CachedFullHashResponse* response = mFullHashCache.LookupOrAdd(prefix.ToUint32()); response->negativeCacheExpirySec = defaultExpirySec; } } nsresult LookupCacheV2::ReadCompletions() { HashStore store(mTableName, mProvider, mRootStoreDirectory); nsresult rv = store.Open(); NS_ENSURE_SUCCESS(rv, rv); mUpdateCompletions.Clear(); const AddCompleteArray& addComplete = store.AddCompletes(); if (!mUpdateCompletions.SetCapacity(addComplete.Length(), fallible)) { return NS_ERROR_OUT_OF_MEMORY; } for (uint32_t i = 0; i < addComplete.Length(); i++) { mUpdateCompletions.AppendElement(addComplete[i].complete); } return NS_OK; } nsresult LookupCacheV2::ClearPrefixes() { return mPrefixSet->SetPrefixes(nullptr, 0); } nsresult LookupCacheV2::StoreToFile(nsCOMPtr& aFile) { nsCOMPtr localOutFile; nsresult rv = NS_NewLocalFileOutputStream(getter_AddRefs(localOutFile), aFile, PR_WRONLY | PR_TRUNCATE | PR_CREATE_FILE); NS_ENSURE_SUCCESS(rv, rv); uint32_t fileSize; // Preallocate the file storage { nsCOMPtr fos(do_QueryInterface(localOutFile)); Telemetry::AutoTimer timer; fileSize = mPrefixSet->CalculatePreallocateSize(); // Ignore failure, the preallocation is a hint and we write out the entire // file later on Unused << fos->Preallocate(fileSize); } // Convert to buffered stream nsCOMPtr out; rv = NS_NewBufferedOutputStream(getter_AddRefs(out), localOutFile.forget(), std::min(fileSize, MAX_BUFFER_SIZE)); NS_ENSURE_SUCCESS(rv, rv); rv = mPrefixSet->WritePrefixes(out); NS_ENSURE_SUCCESS(rv, rv); LOG(("[%s] Storing PrefixSet successful", mTableName.get())); return NS_OK; } nsresult LookupCacheV2::LoadLegacyFile() { return NS_ERROR_NOT_IMPLEMENTED; } nsresult LookupCacheV2::LoadFromFile(nsCOMPtr& aFile) { Telemetry::AutoTimer timer; nsCOMPtr localInFile; nsresult rv = NS_NewLocalFileInputStream(getter_AddRefs(localInFile), aFile, PR_RDONLY | nsIFile::OS_READAHEAD); NS_ENSURE_SUCCESS(rv, rv); // Calculate how big the file is, make sure our read buffer isn't bigger // than the file itself which is just wasting memory. int64_t fileSize; rv = aFile->GetFileSize(&fileSize); NS_ENSURE_SUCCESS(rv, rv); if (fileSize < 0 || fileSize > UINT32_MAX) { return NS_ERROR_FAILURE; } uint32_t bufferSize = std::min(static_cast(fileSize), MAX_BUFFER_SIZE); // Convert to buffered stream nsCOMPtr in; rv = NS_NewBufferedInputStream(getter_AddRefs(in), localInFile.forget(), bufferSize); NS_ENSURE_SUCCESS(rv, rv); rv = mPrefixSet->LoadPrefixes(in); NS_ENSURE_SUCCESS(rv, rv); mPrimed = true; LOG(("[%s] Loading PrefixSet successful", mTableName.get())); return NS_OK; } size_t LookupCacheV2::SizeOfPrefixSet() const { return mPrefixSet->SizeOfIncludingThis(moz_malloc_size_of); } nsCString LookupCacheV2::GetPrefixSetSuffix() const { return NS_LITERAL_CSTRING(".pset"); } #ifdef DEBUG template static void EnsureSorted(T* aArray) { typename T::elem_type* start = aArray->Elements(); typename T::elem_type* end = aArray->Elements() + aArray->Length(); typename T::elem_type* iter = start; typename T::elem_type* previous = start; while (iter != end) { previous = iter; ++iter; if (iter != end) { MOZ_ASSERT(*previous <= *iter); } } return; } #endif nsresult LookupCacheV2::ConstructPrefixSet(AddPrefixArray& aAddPrefixes) { Telemetry::AutoTimer timer; nsTArray array; if (!array.SetCapacity(aAddPrefixes.Length(), fallible)) { return NS_ERROR_OUT_OF_MEMORY; } for (uint32_t i = 0; i < aAddPrefixes.Length(); i++) { array.AppendElement(aAddPrefixes[i].PrefixHash().ToUint32()); } aAddPrefixes.Clear(); #ifdef DEBUG // PrefixSet requires sorted order EnsureSorted(&array); #endif // construct new one, replace old entries nsresult rv = mPrefixSet->SetPrefixes(array.Elements(), array.Length()); NS_ENSURE_SUCCESS(rv, rv); #ifdef DEBUG uint32_t size; size = mPrefixSet->SizeOfIncludingThis(moz_malloc_size_of); LOG(("SB tree done, size = %d bytes\n", size)); #endif mPrimed = true; return NS_OK; } #if defined(DEBUG) void LookupCacheV2::DumpCompletions() const { if (!LOG_ENABLED()) return; for (uint32_t i = 0; i < mUpdateCompletions.Length(); i++) { nsAutoCString str; mUpdateCompletions[i].ToHexString(str); LOG(("Update: %s", str.get())); } } #endif } // namespace safebrowsing } // namespace mozilla