forked from mirrors/gecko-dev
Backed out changeset 0c95d5dec6d9 (bug 1305801) Backed out changeset bca0e706dbc5 (bug 1305801) Backed out changeset def8da367beb (bug 1305801) Backed out changeset 56ceae52d847 (bug 1305801) Backed out changeset 14457cc4c325 (bug 1305801)
566 lines
14 KiB
C++
566 lines
14 KiB
C++
//* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
#include "LookupCache.h"
|
|
#include "HashStore.h"
|
|
#include "nsISeekableStream.h"
|
|
#include "mozilla/Telemetry.h"
|
|
#include "mozilla/Logging.h"
|
|
#include "nsNetUtil.h"
|
|
#include "prprf.h"
|
|
#include "Classifier.h"
|
|
|
|
// We act as the main entry point for all the real lookups,
|
|
// so note that those are not done to the actual HashStore.
|
|
// The latter solely exists to store the data needed to handle
|
|
// the updates from the protocol.
|
|
|
|
// This module provides a front for PrefixSet, mUpdateCompletions,
|
|
// and mGetHashCache, which together contain everything needed to
|
|
// provide a classification as long as the data is up to date.
|
|
|
|
// PrefixSet stores and provides lookups for 4-byte prefixes.
|
|
// mUpdateCompletions contains 32-byte completions which were
|
|
// contained in updates. They are retrieved from HashStore/.sbtore
|
|
// on startup.
|
|
// mGetHashCache contains 32-byte completions which were
|
|
// returned from the gethash server. They are not serialized,
|
|
// only cached until the next update.
|
|
|
|
// Name of the persistent PrefixSet storage
|
|
#define PREFIXSET_SUFFIX ".pset"
|
|
|
|
// MOZ_LOG=UrlClassifierDbService:5
|
|
extern mozilla::LazyLogModule gUrlClassifierDbServiceLog;
|
|
#define LOG(args) MOZ_LOG(gUrlClassifierDbServiceLog, mozilla::LogLevel::Debug, args)
|
|
#define LOG_ENABLED() MOZ_LOG_TEST(gUrlClassifierDbServiceLog, mozilla::LogLevel::Debug)
|
|
|
|
namespace mozilla {
|
|
namespace safebrowsing {
|
|
|
|
LookupCache::LookupCache(const nsACString& aTableName, nsIFile* aRootStoreDir)
|
|
: mPrimed(false)
|
|
, mTableName(aTableName)
|
|
, mRootStoreDirectory(aRootStoreDir)
|
|
{
|
|
UpdateRootDirHandle(mRootStoreDirectory);
|
|
}
|
|
|
|
nsresult
|
|
LookupCache::Init()
|
|
{
|
|
mPrefixSet = new nsUrlClassifierPrefixSet();
|
|
nsresult rv = mPrefixSet->Init(mTableName);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
LookupCache::~LookupCache()
|
|
{
|
|
}
|
|
|
|
nsresult
|
|
LookupCache::Open()
|
|
{
|
|
LOG(("Reading Completions"));
|
|
nsresult rv = ReadCompletions();
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
LOG(("Loading PrefixSet"));
|
|
rv = LoadPrefixSet();
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
nsresult
|
|
LookupCache::UpdateRootDirHandle(nsIFile* aNewRootStoreDirectory)
|
|
{
|
|
nsresult rv;
|
|
|
|
if (aNewRootStoreDirectory != mRootStoreDirectory) {
|
|
rv = aNewRootStoreDirectory->Clone(getter_AddRefs(mRootStoreDirectory));
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
}
|
|
|
|
rv = Classifier::GetPrivateStoreDirectory(mRootStoreDirectory,
|
|
mTableName,
|
|
getter_AddRefs(mStoreDirectory));
|
|
|
|
if (NS_FAILED(rv)) {
|
|
LOG(("Failed to get private store directory for %s", mTableName.get()));
|
|
mStoreDirectory = mRootStoreDirectory;
|
|
}
|
|
|
|
if (LOG_ENABLED()) {
|
|
nsString path;
|
|
mStoreDirectory->GetPath(path);
|
|
LOG(("Private store directory for %s is %s", mTableName.get(),
|
|
NS_ConvertUTF16toUTF8(path).get()));
|
|
}
|
|
|
|
return rv;
|
|
}
|
|
|
|
nsresult
|
|
LookupCache::Reset()
|
|
{
|
|
LOG(("LookupCache resetting"));
|
|
|
|
nsCOMPtr<nsIFile> prefixsetFile;
|
|
nsresult rv = mStoreDirectory->Clone(getter_AddRefs(prefixsetFile));
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
rv = prefixsetFile->AppendNative(mTableName + NS_LITERAL_CSTRING(PREFIXSET_SUFFIX));
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
rv = prefixsetFile->Remove(false);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
ClearAll();
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
|
|
nsresult
|
|
LookupCache::Build(AddPrefixArray& aAddPrefixes,
|
|
AddCompleteArray& aAddCompletes)
|
|
{
|
|
Telemetry::Accumulate(Telemetry::URLCLASSIFIER_LC_COMPLETIONS,
|
|
static_cast<uint32_t>(aAddCompletes.Length()));
|
|
|
|
mUpdateCompletions.Clear();
|
|
mUpdateCompletions.SetCapacity(aAddCompletes.Length());
|
|
for (uint32_t i = 0; i < aAddCompletes.Length(); i++) {
|
|
mUpdateCompletions.AppendElement(aAddCompletes[i].CompleteHash());
|
|
}
|
|
aAddCompletes.Clear();
|
|
mUpdateCompletions.Sort();
|
|
|
|
Telemetry::Accumulate(Telemetry::URLCLASSIFIER_LC_PREFIXES,
|
|
static_cast<uint32_t>(aAddPrefixes.Length()));
|
|
|
|
nsresult rv = ConstructPrefixSet(aAddPrefixes);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
mPrimed = true;
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
nsresult
|
|
LookupCache::AddCompletionsToCache(AddCompleteArray& aAddCompletes)
|
|
{
|
|
for (uint32_t i = 0; i < aAddCompletes.Length(); i++) {
|
|
if (mGetHashCache.BinaryIndexOf(aAddCompletes[i].CompleteHash()) == mGetHashCache.NoIndex) {
|
|
mGetHashCache.AppendElement(aAddCompletes[i].CompleteHash());
|
|
}
|
|
}
|
|
mGetHashCache.Sort();
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
#if defined(DEBUG)
|
|
void
|
|
LookupCache::DumpCache()
|
|
{
|
|
if (!LOG_ENABLED())
|
|
return;
|
|
|
|
for (uint32_t i = 0; i < mGetHashCache.Length(); i++) {
|
|
nsAutoCString str;
|
|
mGetHashCache[i].ToHexString(str);
|
|
LOG(("Caches: %s", str.get()));
|
|
}
|
|
}
|
|
|
|
void
|
|
LookupCache::Dump()
|
|
{
|
|
if (!LOG_ENABLED())
|
|
return;
|
|
|
|
for (uint32_t i = 0; i < mUpdateCompletions.Length(); i++) {
|
|
nsAutoCString str;
|
|
mUpdateCompletions[i].ToHexString(str);
|
|
LOG(("Update: %s", str.get()));
|
|
}
|
|
}
|
|
#endif
|
|
|
|
nsresult
|
|
LookupCache::Has(const Completion& aCompletion,
|
|
bool* aHas, bool* aComplete)
|
|
{
|
|
*aHas = *aComplete = false;
|
|
|
|
uint32_t prefix = aCompletion.ToUint32();
|
|
|
|
bool found;
|
|
nsresult rv = mPrefixSet->Contains(prefix, &found);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
LOG(("Probe in %s: %X, found %d", mTableName.get(), prefix, found));
|
|
|
|
if (found) {
|
|
*aHas = true;
|
|
}
|
|
|
|
// TODO: We may need to distinguish completions found in cache or update in the future
|
|
if ((mGetHashCache.BinaryIndexOf(aCompletion) != nsTArray<Completion>::NoIndex) ||
|
|
(mUpdateCompletions.BinaryIndexOf(aCompletion) != nsTArray<Completion>::NoIndex)) {
|
|
LOG(("Complete in %s", mTableName.get()));
|
|
*aComplete = true;
|
|
*aHas = true;
|
|
}
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
nsresult
|
|
LookupCache::WriteFile()
|
|
{
|
|
nsCOMPtr<nsIFile> psFile;
|
|
nsresult rv = mStoreDirectory->Clone(getter_AddRefs(psFile));
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
rv = psFile->AppendNative(mTableName + NS_LITERAL_CSTRING(PREFIXSET_SUFFIX));
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
rv = mPrefixSet->StoreToFile(psFile);
|
|
NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "failed to store the prefixset");
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
void
|
|
LookupCache::ClearAll()
|
|
{
|
|
ClearCache();
|
|
ClearUpdatedCompletions();
|
|
mPrefixSet->SetPrefixes(nullptr, 0);
|
|
mPrimed = false;
|
|
}
|
|
|
|
void
|
|
LookupCache::ClearUpdatedCompletions()
|
|
{
|
|
mUpdateCompletions.Clear();
|
|
}
|
|
|
|
void
|
|
LookupCache::ClearCache()
|
|
{
|
|
mGetHashCache.Clear();
|
|
}
|
|
|
|
nsresult
|
|
LookupCache::ReadCompletions()
|
|
{
|
|
HashStore store(mTableName, mRootStoreDirectory);
|
|
|
|
nsresult rv = store.Open();
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
mUpdateCompletions.Clear();
|
|
|
|
const AddCompleteArray& addComplete = store.AddCompletes();
|
|
for (uint32_t i = 0; i < addComplete.Length(); i++) {
|
|
mUpdateCompletions.AppendElement(addComplete[i].complete);
|
|
}
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
/* static */ bool
|
|
LookupCache::IsCanonicalizedIP(const nsACString& aHost)
|
|
{
|
|
// The canonicalization process will have left IP addresses in dotted
|
|
// decimal with no surprises.
|
|
uint32_t i1, i2, i3, i4;
|
|
char c;
|
|
if (PR_sscanf(PromiseFlatCString(aHost).get(), "%u.%u.%u.%u%c",
|
|
&i1, &i2, &i3, &i4, &c) == 4) {
|
|
return (i1 <= 0xFF && i2 <= 0xFF && i3 <= 0xFF && i4 <= 0xFF);
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/* static */ nsresult
|
|
LookupCache::GetLookupFragments(const nsACString& aSpec,
|
|
nsTArray<nsCString>* aFragments)
|
|
|
|
{
|
|
aFragments->Clear();
|
|
|
|
nsACString::const_iterator begin, end, iter;
|
|
aSpec.BeginReading(begin);
|
|
aSpec.EndReading(end);
|
|
|
|
iter = begin;
|
|
if (!FindCharInReadable('/', iter, end)) {
|
|
return NS_OK;
|
|
}
|
|
|
|
const nsCSubstring& host = Substring(begin, iter++);
|
|
nsAutoCString path;
|
|
path.Assign(Substring(iter, end));
|
|
|
|
/**
|
|
* From the protocol doc:
|
|
* For the hostname, the client will try at most 5 different strings. They
|
|
* are:
|
|
* a) The exact hostname of the url
|
|
* b) The 4 hostnames formed by starting with the last 5 components and
|
|
* successivly removing the leading component. The top-level component
|
|
* can be skipped. This is not done if the hostname is a numerical IP.
|
|
*/
|
|
nsTArray<nsCString> hosts;
|
|
hosts.AppendElement(host);
|
|
|
|
if (!IsCanonicalizedIP(host)) {
|
|
host.BeginReading(begin);
|
|
host.EndReading(end);
|
|
int numHostComponents = 0;
|
|
while (RFindInReadable(NS_LITERAL_CSTRING("."), begin, end) &&
|
|
numHostComponents < MAX_HOST_COMPONENTS) {
|
|
// don't bother checking toplevel domains
|
|
if (++numHostComponents >= 2) {
|
|
host.EndReading(iter);
|
|
hosts.AppendElement(Substring(end, iter));
|
|
}
|
|
end = begin;
|
|
host.BeginReading(begin);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* From the protocol doc:
|
|
* For the path, the client will also try at most 6 different strings.
|
|
* They are:
|
|
* a) the exact path of the url, including query parameters
|
|
* b) the exact path of the url, without query parameters
|
|
* c) the 4 paths formed by starting at the root (/) and
|
|
* successively appending path components, including a trailing
|
|
* slash. This behavior should only extend up to the next-to-last
|
|
* path component, that is, a trailing slash should never be
|
|
* appended that was not present in the original url.
|
|
*/
|
|
nsTArray<nsCString> paths;
|
|
nsAutoCString pathToAdd;
|
|
|
|
path.BeginReading(begin);
|
|
path.EndReading(end);
|
|
iter = begin;
|
|
if (FindCharInReadable('?', iter, end)) {
|
|
pathToAdd = Substring(begin, iter);
|
|
paths.AppendElement(pathToAdd);
|
|
end = iter;
|
|
}
|
|
|
|
int numPathComponents = 1;
|
|
iter = begin;
|
|
while (FindCharInReadable('/', iter, end) &&
|
|
numPathComponents < MAX_PATH_COMPONENTS) {
|
|
iter++;
|
|
pathToAdd.Assign(Substring(begin, iter));
|
|
paths.AppendElement(pathToAdd);
|
|
numPathComponents++;
|
|
}
|
|
|
|
// If we haven't already done so, add the full path
|
|
if (!pathToAdd.Equals(path)) {
|
|
paths.AppendElement(path);
|
|
}
|
|
// Check an empty path (for whole-domain blacklist entries)
|
|
paths.AppendElement(EmptyCString());
|
|
|
|
for (uint32_t hostIndex = 0; hostIndex < hosts.Length(); hostIndex++) {
|
|
for (uint32_t pathIndex = 0; pathIndex < paths.Length(); pathIndex++) {
|
|
nsCString key;
|
|
key.Assign(hosts[hostIndex]);
|
|
key.Append('/');
|
|
key.Append(paths[pathIndex]);
|
|
LOG(("Checking fragment %s", key.get()));
|
|
|
|
aFragments->AppendElement(key);
|
|
}
|
|
}
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
/* static */ nsresult
|
|
LookupCache::GetHostKeys(const nsACString& aSpec,
|
|
nsTArray<nsCString>* aHostKeys)
|
|
{
|
|
nsACString::const_iterator begin, end, iter;
|
|
aSpec.BeginReading(begin);
|
|
aSpec.EndReading(end);
|
|
|
|
iter = begin;
|
|
if (!FindCharInReadable('/', iter, end)) {
|
|
return NS_OK;
|
|
}
|
|
|
|
const nsCSubstring& host = Substring(begin, iter);
|
|
|
|
if (IsCanonicalizedIP(host)) {
|
|
nsCString *key = aHostKeys->AppendElement();
|
|
if (!key)
|
|
return NS_ERROR_OUT_OF_MEMORY;
|
|
|
|
key->Assign(host);
|
|
key->Append("/");
|
|
return NS_OK;
|
|
}
|
|
|
|
nsTArray<nsCString> hostComponents;
|
|
ParseString(PromiseFlatCString(host), '.', hostComponents);
|
|
|
|
if (hostComponents.Length() < 2) {
|
|
// no host or toplevel host, this won't match anything in the db
|
|
return NS_OK;
|
|
}
|
|
|
|
// First check with two domain components
|
|
int32_t last = int32_t(hostComponents.Length()) - 1;
|
|
nsCString *lookupHost = aHostKeys->AppendElement();
|
|
if (!lookupHost)
|
|
return NS_ERROR_OUT_OF_MEMORY;
|
|
|
|
lookupHost->Assign(hostComponents[last - 1]);
|
|
lookupHost->Append(".");
|
|
lookupHost->Append(hostComponents[last]);
|
|
lookupHost->Append("/");
|
|
|
|
// Now check with three domain components
|
|
if (hostComponents.Length() > 2) {
|
|
nsCString *lookupHost2 = aHostKeys->AppendElement();
|
|
if (!lookupHost2)
|
|
return NS_ERROR_OUT_OF_MEMORY;
|
|
lookupHost2->Assign(hostComponents[last - 2]);
|
|
lookupHost2->Append(".");
|
|
lookupHost2->Append(*lookupHost);
|
|
}
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
bool LookupCache::IsPrimed()
|
|
{
|
|
return mPrimed;
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
template <class T>
|
|
static void EnsureSorted(T* aArray)
|
|
{
|
|
typename T::elem_type* start = aArray->Elements();
|
|
typename T::elem_type* end = aArray->Elements() + aArray->Length();
|
|
typename T::elem_type* iter = start;
|
|
typename T::elem_type* previous = start;
|
|
|
|
while (iter != end) {
|
|
previous = iter;
|
|
++iter;
|
|
if (iter != end) {
|
|
MOZ_ASSERT(*previous <= *iter);
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
nsresult
|
|
LookupCache::ConstructPrefixSet(AddPrefixArray& aAddPrefixes)
|
|
{
|
|
Telemetry::AutoTimer<Telemetry::URLCLASSIFIER_PS_CONSTRUCT_TIME> timer;
|
|
|
|
nsTArray<uint32_t> array;
|
|
if (!array.SetCapacity(aAddPrefixes.Length(), fallible)) {
|
|
return NS_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
|
|
for (uint32_t i = 0; i < aAddPrefixes.Length(); i++) {
|
|
array.AppendElement(aAddPrefixes[i].PrefixHash().ToUint32());
|
|
}
|
|
aAddPrefixes.Clear();
|
|
|
|
#ifdef DEBUG
|
|
// PrefixSet requires sorted order
|
|
EnsureSorted(&array);
|
|
#endif
|
|
|
|
// construct new one, replace old entries
|
|
nsresult rv = mPrefixSet->SetPrefixes(array.Elements(), array.Length());
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
#ifdef DEBUG
|
|
uint32_t size;
|
|
size = mPrefixSet->SizeOfIncludingThis(moz_malloc_size_of);
|
|
LOG(("SB tree done, size = %d bytes\n", size));
|
|
#endif
|
|
|
|
mPrimed = true;
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
nsresult
|
|
LookupCache::LoadPrefixSet()
|
|
{
|
|
nsCOMPtr<nsIFile> psFile;
|
|
nsresult rv = mStoreDirectory->Clone(getter_AddRefs(psFile));
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
rv = psFile->AppendNative(mTableName + NS_LITERAL_CSTRING(PREFIXSET_SUFFIX));
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
bool exists;
|
|
rv = psFile->Exists(&exists);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
if (exists) {
|
|
LOG(("stored PrefixSet exists, loading from disk"));
|
|
rv = mPrefixSet->LoadFromFile(psFile);
|
|
if (NS_FAILED(rv)) {
|
|
if (rv == NS_ERROR_FILE_CORRUPTED) {
|
|
Reset();
|
|
}
|
|
return rv;
|
|
}
|
|
mPrimed = true;
|
|
} else {
|
|
LOG(("no (usable) stored PrefixSet found"));
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
if (mPrimed) {
|
|
uint32_t size = mPrefixSet->SizeOfIncludingThis(moz_malloc_size_of);
|
|
LOG(("SB tree done, size = %d bytes\n", size));
|
|
}
|
|
#endif
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
nsresult
|
|
LookupCache::GetPrefixes(FallibleTArray<uint32_t>& aAddPrefixes)
|
|
{
|
|
if (!mPrimed) {
|
|
// This can happen if its a new table, so no error.
|
|
LOG(("GetPrefixes from empty LookupCache"));
|
|
return NS_OK;
|
|
}
|
|
return mPrefixSet->GetPrefixesNative(aAddPrefixes);
|
|
}
|
|
|
|
|
|
} // namespace safebrowsing
|
|
} // namespace mozilla
|