fune/toolkit/components/url-classifier/LookupCache.cpp
Iris Hsiao 3d85c0330d Backed out 4 changesets (bug 1311935) for causing assertion crash by developer's request
Backed out changeset 27e624cd9479 (bug 1311935)
Backed out changeset 4c0381ab0990 (bug 1311935)
Backed out changeset 73587838ef16 (bug 1311935)
Backed out changeset a5a6c0f79733 (bug 1311935)
2017-04-11 11:04:54 +08:00

607 lines
15 KiB
C++

//* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "LookupCache.h"
#include "HashStore.h"
#include "nsISeekableStream.h"
#include "mozilla/Telemetry.h"
#include "mozilla/Logging.h"
#include "nsNetUtil.h"
#include "prprf.h"
#include "Classifier.h"
// We act as the main entry point for all the real lookups,
// so note that those are not done to the actual HashStore.
// The latter solely exists to store the data needed to handle
// the updates from the protocol.
// This module provides a front for PrefixSet, mUpdateCompletions,
// and mGetHashCache, which together contain everything needed to
// provide a classification as long as the data is up to date.
// PrefixSet stores and provides lookups for 4-byte prefixes.
// mUpdateCompletions contains 32-byte completions which were
// contained in updates. They are retrieved from HashStore/.sbtore
// on startup.
// mGetHashCache contains 32-byte completions which were
// returned from the gethash server. They are not serialized,
// only cached until the next update.
// Name of the persistent PrefixSet storage
#define PREFIXSET_SUFFIX ".pset"
// MOZ_LOG=UrlClassifierDbService:5
extern mozilla::LazyLogModule gUrlClassifierDbServiceLog;
#define LOG(args) MOZ_LOG(gUrlClassifierDbServiceLog, mozilla::LogLevel::Debug, args)
#define LOG_ENABLED() MOZ_LOG_TEST(gUrlClassifierDbServiceLog, mozilla::LogLevel::Debug)
namespace mozilla {
namespace safebrowsing {
const int LookupCacheV2::VER = 2;
LookupCache::LookupCache(const nsACString& aTableName,
const nsACString& aProvider,
nsIFile* aRootStoreDir)
: mPrimed(false)
, mTableName(aTableName)
, mProvider(aProvider)
, mRootStoreDirectory(aRootStoreDir)
{
UpdateRootDirHandle(mRootStoreDirectory);
}
nsresult
LookupCache::Open()
{
LOG(("Loading PrefixSet"));
nsresult rv = LoadPrefixSet();
NS_ENSURE_SUCCESS(rv, rv);
return NS_OK;
}
nsresult
LookupCache::UpdateRootDirHandle(nsIFile* aNewRootStoreDirectory)
{
nsresult rv;
if (aNewRootStoreDirectory != mRootStoreDirectory) {
rv = aNewRootStoreDirectory->Clone(getter_AddRefs(mRootStoreDirectory));
NS_ENSURE_SUCCESS(rv, rv);
}
rv = Classifier::GetPrivateStoreDirectory(mRootStoreDirectory,
mTableName,
mProvider,
getter_AddRefs(mStoreDirectory));
if (NS_FAILED(rv)) {
LOG(("Failed to get private store directory for %s", mTableName.get()));
mStoreDirectory = mRootStoreDirectory;
}
if (LOG_ENABLED()) {
nsString path;
mStoreDirectory->GetPath(path);
LOG(("Private store directory for %s is %s", mTableName.get(),
NS_ConvertUTF16toUTF8(path).get()));
}
return rv;
}
nsresult
LookupCache::AddCompletionsToCache(AddCompleteArray& aAddCompletes)
{
for (uint32_t i = 0; i < aAddCompletes.Length(); i++) {
if (mGetHashCache.BinaryIndexOf(aAddCompletes[i].CompleteHash()) == mGetHashCache.NoIndex) {
mGetHashCache.AppendElement(aAddCompletes[i].CompleteHash());
}
}
mGetHashCache.Sort();
return NS_OK;
}
#if defined(DEBUG)
void
LookupCache::DumpCache()
{
if (!LOG_ENABLED())
return;
for (uint32_t i = 0; i < mGetHashCache.Length(); i++) {
nsAutoCString str;
mGetHashCache[i].ToHexString(str);
LOG(("Caches: %s", str.get()));
}
}
#endif
nsresult
LookupCache::WriteFile()
{
if (nsUrlClassifierDBService::ShutdownHasStarted()) {
return NS_ERROR_ABORT;
}
nsCOMPtr<nsIFile> psFile;
nsresult rv = mStoreDirectory->Clone(getter_AddRefs(psFile));
NS_ENSURE_SUCCESS(rv, rv);
rv = psFile->AppendNative(mTableName + NS_LITERAL_CSTRING(PREFIXSET_SUFFIX));
NS_ENSURE_SUCCESS(rv, rv);
rv = StoreToFile(psFile);
NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "failed to store the prefixset");
return NS_OK;
}
void
LookupCache::ClearAll()
{
ClearCache();
ClearPrefixes();
mPrimed = false;
}
void
LookupCache::ClearCache()
{
mGetHashCache.Clear();
}
/* static */ bool
LookupCache::IsCanonicalizedIP(const nsACString& aHost)
{
// The canonicalization process will have left IP addresses in dotted
// decimal with no surprises.
uint32_t i1, i2, i3, i4;
char c;
if (PR_sscanf(PromiseFlatCString(aHost).get(), "%u.%u.%u.%u%c",
&i1, &i2, &i3, &i4, &c) == 4) {
return (i1 <= 0xFF && i2 <= 0xFF && i3 <= 0xFF && i4 <= 0xFF);
}
return false;
}
/* static */ nsresult
LookupCache::GetLookupFragments(const nsACString& aSpec,
nsTArray<nsCString>* aFragments)
{
aFragments->Clear();
nsACString::const_iterator begin, end, iter;
aSpec.BeginReading(begin);
aSpec.EndReading(end);
iter = begin;
if (!FindCharInReadable('/', iter, end)) {
return NS_OK;
}
const nsCSubstring& host = Substring(begin, iter++);
nsAutoCString path;
path.Assign(Substring(iter, end));
/**
* From the protocol doc:
* For the hostname, the client will try at most 5 different strings. They
* are:
* a) The exact hostname of the url
* b) The 4 hostnames formed by starting with the last 5 components and
* successivly removing the leading component. The top-level component
* can be skipped. This is not done if the hostname is a numerical IP.
*/
nsTArray<nsCString> hosts;
hosts.AppendElement(host);
if (!IsCanonicalizedIP(host)) {
host.BeginReading(begin);
host.EndReading(end);
int numHostComponents = 0;
while (RFindInReadable(NS_LITERAL_CSTRING("."), begin, end) &&
numHostComponents < MAX_HOST_COMPONENTS) {
// don't bother checking toplevel domains
if (++numHostComponents >= 2) {
host.EndReading(iter);
hosts.AppendElement(Substring(end, iter));
}
end = begin;
host.BeginReading(begin);
}
}
/**
* From the protocol doc:
* For the path, the client will also try at most 6 different strings.
* They are:
* a) the exact path of the url, including query parameters
* b) the exact path of the url, without query parameters
* c) the 4 paths formed by starting at the root (/) and
* successively appending path components, including a trailing
* slash. This behavior should only extend up to the next-to-last
* path component, that is, a trailing slash should never be
* appended that was not present in the original url.
*/
nsTArray<nsCString> paths;
nsAutoCString pathToAdd;
path.BeginReading(begin);
path.EndReading(end);
iter = begin;
if (FindCharInReadable('?', iter, end)) {
pathToAdd = Substring(begin, iter);
paths.AppendElement(pathToAdd);
end = iter;
}
int numPathComponents = 1;
iter = begin;
while (FindCharInReadable('/', iter, end) &&
numPathComponents < MAX_PATH_COMPONENTS) {
iter++;
pathToAdd.Assign(Substring(begin, iter));
paths.AppendElement(pathToAdd);
numPathComponents++;
}
// If we haven't already done so, add the full path
if (!pathToAdd.Equals(path)) {
paths.AppendElement(path);
}
// Check an empty path (for whole-domain blacklist entries)
paths.AppendElement(EmptyCString());
for (uint32_t hostIndex = 0; hostIndex < hosts.Length(); hostIndex++) {
for (uint32_t pathIndex = 0; pathIndex < paths.Length(); pathIndex++) {
nsCString key;
key.Assign(hosts[hostIndex]);
key.Append('/');
key.Append(paths[pathIndex]);
LOG(("Checking fragment %s", key.get()));
aFragments->AppendElement(key);
}
}
return NS_OK;
}
/* static */ nsresult
LookupCache::GetHostKeys(const nsACString& aSpec,
nsTArray<nsCString>* aHostKeys)
{
nsACString::const_iterator begin, end, iter;
aSpec.BeginReading(begin);
aSpec.EndReading(end);
iter = begin;
if (!FindCharInReadable('/', iter, end)) {
return NS_OK;
}
const nsCSubstring& host = Substring(begin, iter);
if (IsCanonicalizedIP(host)) {
nsCString *key = aHostKeys->AppendElement();
if (!key)
return NS_ERROR_OUT_OF_MEMORY;
key->Assign(host);
key->Append("/");
return NS_OK;
}
nsTArray<nsCString> hostComponents;
ParseString(PromiseFlatCString(host), '.', hostComponents);
if (hostComponents.Length() < 2) {
// no host or toplevel host, this won't match anything in the db
return NS_OK;
}
// First check with two domain components
int32_t last = int32_t(hostComponents.Length()) - 1;
nsCString *lookupHost = aHostKeys->AppendElement();
if (!lookupHost)
return NS_ERROR_OUT_OF_MEMORY;
lookupHost->Assign(hostComponents[last - 1]);
lookupHost->Append(".");
lookupHost->Append(hostComponents[last]);
lookupHost->Append("/");
// Now check with three domain components
if (hostComponents.Length() > 2) {
nsCString *lookupHost2 = aHostKeys->AppendElement();
if (!lookupHost2)
return NS_ERROR_OUT_OF_MEMORY;
lookupHost2->Assign(hostComponents[last - 2]);
lookupHost2->Append(".");
lookupHost2->Append(*lookupHost);
}
return NS_OK;
}
nsresult
LookupCache::LoadPrefixSet()
{
nsCOMPtr<nsIFile> psFile;
nsresult rv = mStoreDirectory->Clone(getter_AddRefs(psFile));
NS_ENSURE_SUCCESS(rv, rv);
rv = psFile->AppendNative(mTableName + NS_LITERAL_CSTRING(PREFIXSET_SUFFIX));
NS_ENSURE_SUCCESS(rv, rv);
bool exists;
rv = psFile->Exists(&exists);
NS_ENSURE_SUCCESS(rv, rv);
if (exists) {
LOG(("stored PrefixSet exists, loading from disk"));
rv = LoadFromFile(psFile);
if (NS_FAILED(rv)) {
return rv;
}
mPrimed = true;
} else {
LOG(("no (usable) stored PrefixSet found"));
}
#ifdef DEBUG
if (mPrimed) {
uint32_t size = SizeOfPrefixSet();
LOG(("SB tree done, size = %d bytes\n", size));
}
#endif
return NS_OK;
}
nsresult
LookupCacheV2::Init()
{
mPrefixSet = new nsUrlClassifierPrefixSet();
nsresult rv = mPrefixSet->Init(mTableName);
NS_ENSURE_SUCCESS(rv, rv);
return NS_OK;
}
nsresult
LookupCacheV2::Open()
{
nsresult rv = LookupCache::Open();
NS_ENSURE_SUCCESS(rv, rv);
LOG(("Reading Completions"));
rv = ReadCompletions();
NS_ENSURE_SUCCESS(rv, rv);
return NS_OK;
}
void
LookupCacheV2::ClearAll()
{
LookupCache::ClearAll();
mUpdateCompletions.Clear();
}
nsresult
LookupCacheV2::Has(const Completion& aCompletion,
bool* aHas, uint32_t* aMatchLength,
bool* aFromCache)
{
*aHas = *aFromCache = false;
*aMatchLength = 0;
uint32_t prefix = aCompletion.ToUint32();
bool found;
nsresult rv = mPrefixSet->Contains(prefix, &found);
NS_ENSURE_SUCCESS(rv, rv);
LOG(("Probe in %s: %X, found %d", mTableName.get(), prefix, found));
if (found) {
*aHas = true;
*aMatchLength = PREFIX_SIZE;
}
if ((mGetHashCache.BinaryIndexOf(aCompletion) != nsTArray<Completion>::NoIndex) ||
(mUpdateCompletions.BinaryIndexOf(aCompletion) != nsTArray<Completion>::NoIndex)) {
LOG(("Complete in %s", mTableName.get()));
*aFromCache = true;
*aHas = true;
*aMatchLength = COMPLETE_SIZE;
}
return NS_OK;
}
void
LookupCacheV2::IsHashEntryConfirmed(const Completion& aEntry,
const TableFreshnessMap& aTableFreshness,
uint32_t aFreshnessGuarantee,
bool* aConfirmed)
{
int64_t age; // in seconds
bool found = aTableFreshness.Get(mTableName, &age);
if (!found) {
*aConfirmed = false;
} else {
int64_t now = (PR_Now() / PR_USEC_PER_SEC);
MOZ_ASSERT(age <= now);
// Considered completion as unsafe if its table is up-to-date.
*aConfirmed = (now - age) < aFreshnessGuarantee;
}
}
bool
LookupCacheV2::IsEmpty()
{
bool isEmpty;
mPrefixSet->IsEmpty(&isEmpty);
return isEmpty;
}
nsresult
LookupCacheV2::Build(AddPrefixArray& aAddPrefixes,
AddCompleteArray& aAddCompletes)
{
Telemetry::Accumulate(Telemetry::URLCLASSIFIER_LC_COMPLETIONS,
static_cast<uint32_t>(aAddCompletes.Length()));
mUpdateCompletions.Clear();
mUpdateCompletions.SetCapacity(aAddCompletes.Length());
for (uint32_t i = 0; i < aAddCompletes.Length(); i++) {
mUpdateCompletions.AppendElement(aAddCompletes[i].CompleteHash());
}
aAddCompletes.Clear();
mUpdateCompletions.Sort();
Telemetry::Accumulate(Telemetry::URLCLASSIFIER_LC_PREFIXES,
static_cast<uint32_t>(aAddPrefixes.Length()));
nsresult rv = ConstructPrefixSet(aAddPrefixes);
NS_ENSURE_SUCCESS(rv, rv);
mPrimed = true;
return NS_OK;
}
nsresult
LookupCacheV2::GetPrefixes(FallibleTArray<uint32_t>& aAddPrefixes)
{
if (!mPrimed) {
// This can happen if its a new table, so no error.
LOG(("GetPrefixes from empty LookupCache"));
return NS_OK;
}
return mPrefixSet->GetPrefixesNative(aAddPrefixes);
}
nsresult
LookupCacheV2::ReadCompletions()
{
HashStore store(mTableName, mProvider, mRootStoreDirectory);
nsresult rv = store.Open();
NS_ENSURE_SUCCESS(rv, rv);
mUpdateCompletions.Clear();
const AddCompleteArray& addComplete = store.AddCompletes();
for (uint32_t i = 0; i < addComplete.Length(); i++) {
mUpdateCompletions.AppendElement(addComplete[i].complete);
}
return NS_OK;
}
nsresult
LookupCacheV2::ClearPrefixes()
{
return mPrefixSet->SetPrefixes(nullptr, 0);
}
nsresult
LookupCacheV2::StoreToFile(nsIFile* aFile)
{
return mPrefixSet->StoreToFile(aFile);
}
nsresult
LookupCacheV2::LoadFromFile(nsIFile* aFile)
{
return mPrefixSet->LoadFromFile(aFile);
}
size_t
LookupCacheV2::SizeOfPrefixSet()
{
return mPrefixSet->SizeOfIncludingThis(moz_malloc_size_of);
}
#ifdef DEBUG
template <class T>
static void EnsureSorted(T* aArray)
{
typename T::elem_type* start = aArray->Elements();
typename T::elem_type* end = aArray->Elements() + aArray->Length();
typename T::elem_type* iter = start;
typename T::elem_type* previous = start;
while (iter != end) {
previous = iter;
++iter;
if (iter != end) {
MOZ_ASSERT(*previous <= *iter);
}
}
return;
}
#endif
nsresult
LookupCacheV2::ConstructPrefixSet(AddPrefixArray& aAddPrefixes)
{
Telemetry::AutoTimer<Telemetry::URLCLASSIFIER_PS_CONSTRUCT_TIME> timer;
nsTArray<uint32_t> array;
if (!array.SetCapacity(aAddPrefixes.Length(), fallible)) {
return NS_ERROR_OUT_OF_MEMORY;
}
for (uint32_t i = 0; i < aAddPrefixes.Length(); i++) {
array.AppendElement(aAddPrefixes[i].PrefixHash().ToUint32());
}
aAddPrefixes.Clear();
#ifdef DEBUG
// PrefixSet requires sorted order
EnsureSorted(&array);
#endif
// construct new one, replace old entries
nsresult rv = mPrefixSet->SetPrefixes(array.Elements(), array.Length());
NS_ENSURE_SUCCESS(rv, rv);
#ifdef DEBUG
uint32_t size;
size = mPrefixSet->SizeOfIncludingThis(moz_malloc_size_of);
LOG(("SB tree done, size = %d bytes\n", size));
#endif
mPrimed = true;
return NS_OK;
}
#if defined(DEBUG)
void
LookupCacheV2::DumpCompletions()
{
if (!LOG_ENABLED())
return;
for (uint32_t i = 0; i < mUpdateCompletions.Length(); i++) {
nsAutoCString str;
mUpdateCompletions[i].ToHexString(str);
LOG(("Update: %s", str.get()));
}
}
#endif
} // namespace safebrowsing
} // namespace mozilla