fune/toolkit/components/url-classifier/LookupCache.cpp
dimi 127351f6ef Bug 1311935 - P2. Process fullHashes.find response. r=francois
This patch includes following changes:

1. nsUrlClassifierHashCompleter.js
   nsUrlClassifierHashCompleter.idl
   - Add completionV4 interface for hashCompleter to pass response data to
     DB service.
   - Process response data includes negative cache duration, matched full
     hashes and cache duration for each match. Full matches are passed through
     nsIFullHashMatch interface.
   - Change _requests.responses from array contains matched fullhashes to
     dictionary so that it can store additional information likes negative cache
     duration.
2. nsUrlClassifierDBService.cpp
   - Implement CompletionV4 interface, store response data to CacheResultV4
     object. Expired duration to expired time is handled here.
   - Add CacheResultToTableUpdate function to convert V2 & V4 cache result
     to TableUpdate object.
3. LookupCache.h
   - Extend CacheResult to CacheResultV2 and CacheResultV4 so we can store
     response data in CompletionV2 and CompletionV4.
4. HashStore.h
   - Add API and member variable in TableUpdateV4 to store response data.
     TableUpdate object is used by DB service to pass update data or gethash
     response to Classifier, so we need to extend TableUpdateV4 to be able
     to store fullHashes.find response.
6. Entry.h
   - Define the structure about how we cache fullHashes.find response.

MozReview-Commit-ID: FV4yAl2SAc6

--HG--
extra : rebase_source : 02676ded9bc0580c24b4c906199a4abaf004e296
2017-04-11 11:50:48 +08:00

610 lines
15 KiB
C++

//* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "LookupCache.h"
#include "HashStore.h"
#include "nsISeekableStream.h"
#include "mozilla/Telemetry.h"
#include "mozilla/Logging.h"
#include "nsNetUtil.h"
#include "prprf.h"
#include "Classifier.h"
// We act as the main entry point for all the real lookups,
// so note that those are not done to the actual HashStore.
// The latter solely exists to store the data needed to handle
// the updates from the protocol.
// This module provides a front for PrefixSet, mUpdateCompletions,
// and mGetHashCache, which together contain everything needed to
// provide a classification as long as the data is up to date.
// PrefixSet stores and provides lookups for 4-byte prefixes.
// mUpdateCompletions contains 32-byte completions which were
// contained in updates. They are retrieved from HashStore/.sbtore
// on startup.
// mGetHashCache contains 32-byte completions which were
// returned from the gethash server. They are not serialized,
// only cached until the next update.
// Name of the persistent PrefixSet storage
#define PREFIXSET_SUFFIX ".pset"
// MOZ_LOG=UrlClassifierDbService:5
extern mozilla::LazyLogModule gUrlClassifierDbServiceLog;
#define LOG(args) MOZ_LOG(gUrlClassifierDbServiceLog, mozilla::LogLevel::Debug, args)
#define LOG_ENABLED() MOZ_LOG_TEST(gUrlClassifierDbServiceLog, mozilla::LogLevel::Debug)
namespace mozilla {
namespace safebrowsing {
const int CacheResultV2::VER = CacheResult::V2;
const int CacheResultV4::VER = CacheResult::V4;
const int LookupCacheV2::VER = 2;
LookupCache::LookupCache(const nsACString& aTableName,
const nsACString& aProvider,
nsIFile* aRootStoreDir)
: mPrimed(false)
, mTableName(aTableName)
, mProvider(aProvider)
, mRootStoreDirectory(aRootStoreDir)
{
UpdateRootDirHandle(mRootStoreDirectory);
}
nsresult
LookupCache::Open()
{
LOG(("Loading PrefixSet"));
nsresult rv = LoadPrefixSet();
NS_ENSURE_SUCCESS(rv, rv);
return NS_OK;
}
nsresult
LookupCache::UpdateRootDirHandle(nsIFile* aNewRootStoreDirectory)
{
nsresult rv;
if (aNewRootStoreDirectory != mRootStoreDirectory) {
rv = aNewRootStoreDirectory->Clone(getter_AddRefs(mRootStoreDirectory));
NS_ENSURE_SUCCESS(rv, rv);
}
rv = Classifier::GetPrivateStoreDirectory(mRootStoreDirectory,
mTableName,
mProvider,
getter_AddRefs(mStoreDirectory));
if (NS_FAILED(rv)) {
LOG(("Failed to get private store directory for %s", mTableName.get()));
mStoreDirectory = mRootStoreDirectory;
}
if (LOG_ENABLED()) {
nsString path;
mStoreDirectory->GetPath(path);
LOG(("Private store directory for %s is %s", mTableName.get(),
NS_ConvertUTF16toUTF8(path).get()));
}
return rv;
}
nsresult
LookupCache::AddCompletionsToCache(AddCompleteArray& aAddCompletes)
{
for (uint32_t i = 0; i < aAddCompletes.Length(); i++) {
if (mGetHashCache.BinaryIndexOf(aAddCompletes[i].CompleteHash()) == mGetHashCache.NoIndex) {
mGetHashCache.AppendElement(aAddCompletes[i].CompleteHash());
}
}
mGetHashCache.Sort();
return NS_OK;
}
#if defined(DEBUG)
void
LookupCache::DumpCache()
{
if (!LOG_ENABLED())
return;
for (uint32_t i = 0; i < mGetHashCache.Length(); i++) {
nsAutoCString str;
mGetHashCache[i].ToHexString(str);
LOG(("Caches: %s", str.get()));
}
}
#endif
nsresult
LookupCache::WriteFile()
{
if (nsUrlClassifierDBService::ShutdownHasStarted()) {
return NS_ERROR_ABORT;
}
nsCOMPtr<nsIFile> psFile;
nsresult rv = mStoreDirectory->Clone(getter_AddRefs(psFile));
NS_ENSURE_SUCCESS(rv, rv);
rv = psFile->AppendNative(mTableName + NS_LITERAL_CSTRING(PREFIXSET_SUFFIX));
NS_ENSURE_SUCCESS(rv, rv);
rv = StoreToFile(psFile);
NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "failed to store the prefixset");
return NS_OK;
}
void
LookupCache::ClearAll()
{
ClearCache();
ClearPrefixes();
mPrimed = false;
}
void
LookupCache::ClearCache()
{
mGetHashCache.Clear();
}
/* static */ bool
LookupCache::IsCanonicalizedIP(const nsACString& aHost)
{
// The canonicalization process will have left IP addresses in dotted
// decimal with no surprises.
uint32_t i1, i2, i3, i4;
char c;
if (PR_sscanf(PromiseFlatCString(aHost).get(), "%u.%u.%u.%u%c",
&i1, &i2, &i3, &i4, &c) == 4) {
return (i1 <= 0xFF && i2 <= 0xFF && i3 <= 0xFF && i4 <= 0xFF);
}
return false;
}
/* static */ nsresult
LookupCache::GetLookupFragments(const nsACString& aSpec,
nsTArray<nsCString>* aFragments)
{
aFragments->Clear();
nsACString::const_iterator begin, end, iter;
aSpec.BeginReading(begin);
aSpec.EndReading(end);
iter = begin;
if (!FindCharInReadable('/', iter, end)) {
return NS_OK;
}
const nsCSubstring& host = Substring(begin, iter++);
nsAutoCString path;
path.Assign(Substring(iter, end));
/**
* From the protocol doc:
* For the hostname, the client will try at most 5 different strings. They
* are:
* a) The exact hostname of the url
* b) The 4 hostnames formed by starting with the last 5 components and
* successivly removing the leading component. The top-level component
* can be skipped. This is not done if the hostname is a numerical IP.
*/
nsTArray<nsCString> hosts;
hosts.AppendElement(host);
if (!IsCanonicalizedIP(host)) {
host.BeginReading(begin);
host.EndReading(end);
int numHostComponents = 0;
while (RFindInReadable(NS_LITERAL_CSTRING("."), begin, end) &&
numHostComponents < MAX_HOST_COMPONENTS) {
// don't bother checking toplevel domains
if (++numHostComponents >= 2) {
host.EndReading(iter);
hosts.AppendElement(Substring(end, iter));
}
end = begin;
host.BeginReading(begin);
}
}
/**
* From the protocol doc:
* For the path, the client will also try at most 6 different strings.
* They are:
* a) the exact path of the url, including query parameters
* b) the exact path of the url, without query parameters
* c) the 4 paths formed by starting at the root (/) and
* successively appending path components, including a trailing
* slash. This behavior should only extend up to the next-to-last
* path component, that is, a trailing slash should never be
* appended that was not present in the original url.
*/
nsTArray<nsCString> paths;
nsAutoCString pathToAdd;
path.BeginReading(begin);
path.EndReading(end);
iter = begin;
if (FindCharInReadable('?', iter, end)) {
pathToAdd = Substring(begin, iter);
paths.AppendElement(pathToAdd);
end = iter;
}
int numPathComponents = 1;
iter = begin;
while (FindCharInReadable('/', iter, end) &&
numPathComponents < MAX_PATH_COMPONENTS) {
iter++;
pathToAdd.Assign(Substring(begin, iter));
paths.AppendElement(pathToAdd);
numPathComponents++;
}
// If we haven't already done so, add the full path
if (!pathToAdd.Equals(path)) {
paths.AppendElement(path);
}
// Check an empty path (for whole-domain blacklist entries)
paths.AppendElement(EmptyCString());
for (uint32_t hostIndex = 0; hostIndex < hosts.Length(); hostIndex++) {
for (uint32_t pathIndex = 0; pathIndex < paths.Length(); pathIndex++) {
nsCString key;
key.Assign(hosts[hostIndex]);
key.Append('/');
key.Append(paths[pathIndex]);
LOG(("Checking fragment %s", key.get()));
aFragments->AppendElement(key);
}
}
return NS_OK;
}
/* static */ nsresult
LookupCache::GetHostKeys(const nsACString& aSpec,
nsTArray<nsCString>* aHostKeys)
{
nsACString::const_iterator begin, end, iter;
aSpec.BeginReading(begin);
aSpec.EndReading(end);
iter = begin;
if (!FindCharInReadable('/', iter, end)) {
return NS_OK;
}
const nsCSubstring& host = Substring(begin, iter);
if (IsCanonicalizedIP(host)) {
nsCString *key = aHostKeys->AppendElement();
if (!key)
return NS_ERROR_OUT_OF_MEMORY;
key->Assign(host);
key->Append("/");
return NS_OK;
}
nsTArray<nsCString> hostComponents;
ParseString(PromiseFlatCString(host), '.', hostComponents);
if (hostComponents.Length() < 2) {
// no host or toplevel host, this won't match anything in the db
return NS_OK;
}
// First check with two domain components
int32_t last = int32_t(hostComponents.Length()) - 1;
nsCString *lookupHost = aHostKeys->AppendElement();
if (!lookupHost)
return NS_ERROR_OUT_OF_MEMORY;
lookupHost->Assign(hostComponents[last - 1]);
lookupHost->Append(".");
lookupHost->Append(hostComponents[last]);
lookupHost->Append("/");
// Now check with three domain components
if (hostComponents.Length() > 2) {
nsCString *lookupHost2 = aHostKeys->AppendElement();
if (!lookupHost2)
return NS_ERROR_OUT_OF_MEMORY;
lookupHost2->Assign(hostComponents[last - 2]);
lookupHost2->Append(".");
lookupHost2->Append(*lookupHost);
}
return NS_OK;
}
nsresult
LookupCache::LoadPrefixSet()
{
nsCOMPtr<nsIFile> psFile;
nsresult rv = mStoreDirectory->Clone(getter_AddRefs(psFile));
NS_ENSURE_SUCCESS(rv, rv);
rv = psFile->AppendNative(mTableName + NS_LITERAL_CSTRING(PREFIXSET_SUFFIX));
NS_ENSURE_SUCCESS(rv, rv);
bool exists;
rv = psFile->Exists(&exists);
NS_ENSURE_SUCCESS(rv, rv);
if (exists) {
LOG(("stored PrefixSet exists, loading from disk"));
rv = LoadFromFile(psFile);
if (NS_FAILED(rv)) {
return rv;
}
mPrimed = true;
} else {
LOG(("no (usable) stored PrefixSet found"));
}
#ifdef DEBUG
if (mPrimed) {
uint32_t size = SizeOfPrefixSet();
LOG(("SB tree done, size = %d bytes\n", size));
}
#endif
return NS_OK;
}
nsresult
LookupCacheV2::Init()
{
mPrefixSet = new nsUrlClassifierPrefixSet();
nsresult rv = mPrefixSet->Init(mTableName);
NS_ENSURE_SUCCESS(rv, rv);
return NS_OK;
}
nsresult
LookupCacheV2::Open()
{
nsresult rv = LookupCache::Open();
NS_ENSURE_SUCCESS(rv, rv);
LOG(("Reading Completions"));
rv = ReadCompletions();
NS_ENSURE_SUCCESS(rv, rv);
return NS_OK;
}
void
LookupCacheV2::ClearAll()
{
LookupCache::ClearAll();
mUpdateCompletions.Clear();
}
nsresult
LookupCacheV2::Has(const Completion& aCompletion,
bool* aHas, uint32_t* aMatchLength,
bool* aFromCache)
{
*aHas = *aFromCache = false;
*aMatchLength = 0;
uint32_t prefix = aCompletion.ToUint32();
bool found;
nsresult rv = mPrefixSet->Contains(prefix, &found);
NS_ENSURE_SUCCESS(rv, rv);
LOG(("Probe in %s: %X, found %d", mTableName.get(), prefix, found));
if (found) {
*aHas = true;
*aMatchLength = PREFIX_SIZE;
}
if ((mGetHashCache.BinaryIndexOf(aCompletion) != nsTArray<Completion>::NoIndex) ||
(mUpdateCompletions.BinaryIndexOf(aCompletion) != nsTArray<Completion>::NoIndex)) {
LOG(("Complete in %s", mTableName.get()));
*aFromCache = true;
*aHas = true;
*aMatchLength = COMPLETE_SIZE;
}
return NS_OK;
}
void
LookupCacheV2::IsHashEntryConfirmed(const Completion& aEntry,
const TableFreshnessMap& aTableFreshness,
uint32_t aFreshnessGuarantee,
bool* aConfirmed)
{
int64_t age; // in seconds
bool found = aTableFreshness.Get(mTableName, &age);
if (!found) {
*aConfirmed = false;
} else {
int64_t now = (PR_Now() / PR_USEC_PER_SEC);
MOZ_ASSERT(age <= now);
// Considered completion as unsafe if its table is up-to-date.
*aConfirmed = (now - age) < aFreshnessGuarantee;
}
}
bool
LookupCacheV2::IsEmpty()
{
bool isEmpty;
mPrefixSet->IsEmpty(&isEmpty);
return isEmpty;
}
nsresult
LookupCacheV2::Build(AddPrefixArray& aAddPrefixes,
AddCompleteArray& aAddCompletes)
{
Telemetry::Accumulate(Telemetry::URLCLASSIFIER_LC_COMPLETIONS,
static_cast<uint32_t>(aAddCompletes.Length()));
mUpdateCompletions.Clear();
mUpdateCompletions.SetCapacity(aAddCompletes.Length());
for (uint32_t i = 0; i < aAddCompletes.Length(); i++) {
mUpdateCompletions.AppendElement(aAddCompletes[i].CompleteHash());
}
aAddCompletes.Clear();
mUpdateCompletions.Sort();
Telemetry::Accumulate(Telemetry::URLCLASSIFIER_LC_PREFIXES,
static_cast<uint32_t>(aAddPrefixes.Length()));
nsresult rv = ConstructPrefixSet(aAddPrefixes);
NS_ENSURE_SUCCESS(rv, rv);
mPrimed = true;
return NS_OK;
}
nsresult
LookupCacheV2::GetPrefixes(FallibleTArray<uint32_t>& aAddPrefixes)
{
if (!mPrimed) {
// This can happen if its a new table, so no error.
LOG(("GetPrefixes from empty LookupCache"));
return NS_OK;
}
return mPrefixSet->GetPrefixesNative(aAddPrefixes);
}
nsresult
LookupCacheV2::ReadCompletions()
{
HashStore store(mTableName, mProvider, mRootStoreDirectory);
nsresult rv = store.Open();
NS_ENSURE_SUCCESS(rv, rv);
mUpdateCompletions.Clear();
const AddCompleteArray& addComplete = store.AddCompletes();
for (uint32_t i = 0; i < addComplete.Length(); i++) {
mUpdateCompletions.AppendElement(addComplete[i].complete);
}
return NS_OK;
}
nsresult
LookupCacheV2::ClearPrefixes()
{
return mPrefixSet->SetPrefixes(nullptr, 0);
}
nsresult
LookupCacheV2::StoreToFile(nsIFile* aFile)
{
return mPrefixSet->StoreToFile(aFile);
}
nsresult
LookupCacheV2::LoadFromFile(nsIFile* aFile)
{
return mPrefixSet->LoadFromFile(aFile);
}
size_t
LookupCacheV2::SizeOfPrefixSet()
{
return mPrefixSet->SizeOfIncludingThis(moz_malloc_size_of);
}
#ifdef DEBUG
template <class T>
static void EnsureSorted(T* aArray)
{
typename T::elem_type* start = aArray->Elements();
typename T::elem_type* end = aArray->Elements() + aArray->Length();
typename T::elem_type* iter = start;
typename T::elem_type* previous = start;
while (iter != end) {
previous = iter;
++iter;
if (iter != end) {
MOZ_ASSERT(*previous <= *iter);
}
}
return;
}
#endif
nsresult
LookupCacheV2::ConstructPrefixSet(AddPrefixArray& aAddPrefixes)
{
Telemetry::AutoTimer<Telemetry::URLCLASSIFIER_PS_CONSTRUCT_TIME> timer;
nsTArray<uint32_t> array;
if (!array.SetCapacity(aAddPrefixes.Length(), fallible)) {
return NS_ERROR_OUT_OF_MEMORY;
}
for (uint32_t i = 0; i < aAddPrefixes.Length(); i++) {
array.AppendElement(aAddPrefixes[i].PrefixHash().ToUint32());
}
aAddPrefixes.Clear();
#ifdef DEBUG
// PrefixSet requires sorted order
EnsureSorted(&array);
#endif
// construct new one, replace old entries
nsresult rv = mPrefixSet->SetPrefixes(array.Elements(), array.Length());
NS_ENSURE_SUCCESS(rv, rv);
#ifdef DEBUG
uint32_t size;
size = mPrefixSet->SizeOfIncludingThis(moz_malloc_size_of);
LOG(("SB tree done, size = %d bytes\n", size));
#endif
mPrimed = true;
return NS_OK;
}
#if defined(DEBUG)
void
LookupCacheV2::DumpCompletions()
{
if (!LOG_ENABLED())
return;
for (uint32_t i = 0; i < mUpdateCompletions.Length(); i++) {
nsAutoCString str;
mUpdateCompletions[i].ToHexString(str);
LOG(("Update: %s", str.get()));
}
}
#endif
} // namespace safebrowsing
} // namespace mozilla