forked from mirrors/gecko-dev
This simplifies the logic around clearing the prefix set and also adds the clearing of the mIndexDeltasChecksum which should have been done as part of 3a00711bb0e6. Additionally, the checks for whether or not the prefix set is empty include some sanity-checking asserts. Finally, mTotalPrefixes could be out of sync with mIndexPrefixes and mIndexDeltas if LoadPrefixes() or MakePrefixSet() fail so we now only update it once all elements have been added successfully. There is now a release assert to catch grossly out-of-sync (or corrupt) values of mTotalPrefixes. MozReview-Commit-ID: BSbyD2dGsUY Differential Revision: https://phabricator.services.mozilla.com/D2062 --HG-- extra : moz-landing-system : lando
462 lines
14 KiB
C++
462 lines
14 KiB
C++
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
|
|
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
#include "VariableLengthPrefixSet.h"
|
|
#include "nsUrlClassifierPrefixSet.h"
|
|
#include "nsPrintfCString.h"
|
|
#include "nsThreadUtils.h"
|
|
#include "mozilla/EndianUtils.h"
|
|
#include "mozilla/Telemetry.h"
|
|
#include "mozilla/Unused.h"
|
|
#include <algorithm>
|
|
|
|
// MOZ_LOG=UrlClassifierPrefixSet:5
|
|
static mozilla::LazyLogModule gUrlClassifierPrefixSetLog("UrlClassifierPrefixSet");
|
|
#define LOG(args) MOZ_LOG(gUrlClassifierPrefixSetLog, mozilla::LogLevel::Debug, args)
|
|
#define LOG_ENABLED() MOZ_LOG_TEST(gUrlClassifierPrefixSetLog, mozilla::LogLevel::Debug)
|
|
|
|
namespace mozilla {
|
|
namespace safebrowsing {
|
|
|
|
#define PREFIX_SIZE_FIXED 4
|
|
|
|
NS_IMPL_ISUPPORTS(VariableLengthPrefixSet, nsIMemoryReporter)
|
|
|
|
// Definition required due to std::max<>()
|
|
const uint32_t VariableLengthPrefixSet::MAX_BUFFER_SIZE;
|
|
|
|
// This class will process prefix size between 4~32. But for 4 bytes prefixes,
|
|
// they will be passed to nsUrlClassifierPrefixSet because of better optimization.
|
|
VariableLengthPrefixSet::VariableLengthPrefixSet()
|
|
: mLock("VariableLengthPrefixSet.mLock")
|
|
, mFixedPrefixSet(new nsUrlClassifierPrefixSet)
|
|
{
|
|
}
|
|
|
|
nsresult
|
|
VariableLengthPrefixSet::Init(const nsACString& aName)
|
|
{
|
|
mMemoryReportPath =
|
|
nsPrintfCString(
|
|
"explicit/storage/prefix-set/%s",
|
|
(!aName.IsEmpty() ? PromiseFlatCString(aName).get() : "?!")
|
|
);
|
|
|
|
RegisterWeakMemoryReporter(this);
|
|
|
|
return mFixedPrefixSet->Init(aName);
|
|
}
|
|
|
|
VariableLengthPrefixSet::~VariableLengthPrefixSet()
|
|
{
|
|
UnregisterWeakMemoryReporter(this);
|
|
}
|
|
|
|
nsresult
|
|
VariableLengthPrefixSet::SetPrefixes(const PrefixStringMap& aPrefixMap)
|
|
{
|
|
MutexAutoLock lock(mLock);
|
|
|
|
// Prefix size should not less than 4-bytes or greater than 32-bytes
|
|
for (auto iter = aPrefixMap.ConstIter(); !iter.Done(); iter.Next()) {
|
|
if (iter.Key() < PREFIX_SIZE_FIXED ||
|
|
iter.Key() > COMPLETE_SIZE) {
|
|
return NS_ERROR_FAILURE;
|
|
}
|
|
}
|
|
|
|
// Clear old prefixSet before setting new one.
|
|
mFixedPrefixSet->SetPrefixes(nullptr, 0);
|
|
mVLPrefixSet.Clear();
|
|
|
|
// 4-bytes prefixes are handled by nsUrlClassifierPrefixSet.
|
|
nsCString* prefixes = aPrefixMap.Get(PREFIX_SIZE_FIXED);
|
|
if (prefixes) {
|
|
NS_ENSURE_TRUE(prefixes->Length() % PREFIX_SIZE_FIXED == 0, NS_ERROR_FAILURE);
|
|
|
|
uint32_t numPrefixes = prefixes->Length() / PREFIX_SIZE_FIXED;
|
|
|
|
#if MOZ_BIG_ENDIAN
|
|
const uint32_t* arrayPtr = reinterpret_cast<const uint32_t*>(prefixes->BeginReading());
|
|
#else
|
|
FallibleTArray<uint32_t> array;
|
|
// Prefixes are lexicographically-sorted, so the interger array
|
|
// passed to nsUrlClassifierPrefixSet should also follow the same order.
|
|
// To make sure of that, we convert char array to integer with Big-Endian
|
|
// instead of casting to integer directly.
|
|
if (!array.SetCapacity(numPrefixes, fallible)) {
|
|
return NS_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
|
|
const char* begin = prefixes->BeginReading();
|
|
const char* end = prefixes->EndReading();
|
|
|
|
while (begin != end) {
|
|
array.AppendElement(BigEndian::readUint32(begin), fallible);
|
|
begin += sizeof(uint32_t);
|
|
}
|
|
MOZ_ASSERT(array.Length() == numPrefixes);
|
|
|
|
const uint32_t* arrayPtr = array.Elements();
|
|
#endif
|
|
|
|
nsresult rv = mFixedPrefixSet->SetPrefixes(arrayPtr, numPrefixes);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
}
|
|
|
|
// 5~32 bytes prefixes are stored in mVLPrefixSet.
|
|
for (auto iter = aPrefixMap.ConstIter(); !iter.Done(); iter.Next()) {
|
|
// Skip 4bytes prefixes because it is already stored in mFixedPrefixSet.
|
|
if (iter.Key() == PREFIX_SIZE_FIXED) {
|
|
continue;
|
|
}
|
|
|
|
mVLPrefixSet.Put(iter.Key(), new nsCString(*iter.Data()));
|
|
}
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
nsresult
|
|
VariableLengthPrefixSet::GetPrefixes(PrefixStringMap& aPrefixMap)
|
|
{
|
|
MutexAutoLock lock(mLock);
|
|
|
|
// 4-bytes prefixes are handled by nsUrlClassifierPrefixSet.
|
|
FallibleTArray<uint32_t> array;
|
|
nsresult rv = mFixedPrefixSet->GetPrefixesNative(array);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
size_t count = array.Length();
|
|
if (count) {
|
|
nsCString* prefixes = new nsCString();
|
|
if (!prefixes->SetLength(PREFIX_SIZE_FIXED * count, fallible)) {
|
|
return NS_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
|
|
// Writing integer array to character array
|
|
uint32_t* begin = reinterpret_cast<uint32_t*>(prefixes->BeginWriting());
|
|
for (uint32_t i = 0; i < count; i++) {
|
|
begin[i] = NativeEndian::swapToBigEndian(array[i]);
|
|
}
|
|
|
|
aPrefixMap.Put(PREFIX_SIZE_FIXED, prefixes);
|
|
}
|
|
|
|
// Copy variable-length prefix set
|
|
for (auto iter = mVLPrefixSet.ConstIter(); !iter.Done(); iter.Next()) {
|
|
aPrefixMap.Put(iter.Key(), new nsCString(*iter.Data()));
|
|
}
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
nsresult
|
|
VariableLengthPrefixSet::GetFixedLengthPrefixes(FallibleTArray<uint32_t>& aPrefixes)
|
|
{
|
|
return mFixedPrefixSet->GetPrefixesNative(aPrefixes);
|
|
}
|
|
|
|
// It should never be the case that more than one hash prefixes match a given
|
|
// full hash. However, if that happens, this method returns any one of them.
|
|
// It does not guarantee which one of those will be returned.
|
|
nsresult
|
|
VariableLengthPrefixSet::Matches(const nsACString& aFullHash,
|
|
uint32_t* aLength) const
|
|
{
|
|
MutexAutoLock lock(mLock);
|
|
|
|
// Only allow full-length hash to check if match any of the prefix
|
|
MOZ_ASSERT(aFullHash.Length() == COMPLETE_SIZE);
|
|
NS_ENSURE_ARG_POINTER(aLength);
|
|
|
|
*aLength = 0;
|
|
|
|
// Check if it matches 4-bytes prefixSet first
|
|
const uint32_t* hash = reinterpret_cast<const uint32_t*>(aFullHash.BeginReading());
|
|
uint32_t value = BigEndian::readUint32(hash);
|
|
|
|
bool found = false;
|
|
nsresult rv = mFixedPrefixSet->Contains(value, &found);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
if (found) {
|
|
*aLength = PREFIX_SIZE_FIXED;
|
|
return NS_OK;
|
|
}
|
|
|
|
for (auto iter = mVLPrefixSet.ConstIter(); !iter.Done(); iter.Next()) {
|
|
if (BinarySearch(aFullHash, *iter.Data(), iter.Key())) {
|
|
*aLength = iter.Key();
|
|
MOZ_ASSERT(*aLength > 4);
|
|
return NS_OK;
|
|
}
|
|
}
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
nsresult
|
|
VariableLengthPrefixSet::IsEmpty(bool* aEmpty) const
|
|
{
|
|
MutexAutoLock lock(mLock);
|
|
|
|
NS_ENSURE_ARG_POINTER(aEmpty);
|
|
|
|
mFixedPrefixSet->IsEmpty(aEmpty);
|
|
*aEmpty = *aEmpty && mVLPrefixSet.IsEmpty();
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
nsresult
|
|
VariableLengthPrefixSet::LoadFromFile(nsCOMPtr<nsIFile>& aFile)
|
|
{
|
|
MutexAutoLock lock(mLock);
|
|
|
|
NS_ENSURE_ARG_POINTER(aFile);
|
|
|
|
Telemetry::AutoTimer<Telemetry::URLCLASSIFIER_VLPS_FILELOAD_TIME> timer;
|
|
|
|
nsCOMPtr<nsIInputStream> localInFile;
|
|
nsresult rv = NS_NewLocalFileInputStream(getter_AddRefs(localInFile), aFile,
|
|
PR_RDONLY | nsIFile::OS_READAHEAD);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
// Calculate how big the file is, make sure our read buffer isn't bigger
|
|
// than the file itself which is just wasting memory.
|
|
int64_t fileSize;
|
|
rv = aFile->GetFileSize(&fileSize);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
if (fileSize < 0 || fileSize > UINT32_MAX) {
|
|
return NS_ERROR_FAILURE;
|
|
}
|
|
|
|
uint32_t bufferSize = std::min<uint32_t>(static_cast<uint32_t>(fileSize),
|
|
MAX_BUFFER_SIZE);
|
|
|
|
// Convert to buffered stream
|
|
nsCOMPtr<nsIInputStream> in;
|
|
rv = NS_NewBufferedInputStream(getter_AddRefs(in), localInFile.forget(),
|
|
bufferSize);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
rv = mFixedPrefixSet->LoadPrefixes(in);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
rv = LoadPrefixes(in);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
return NS_OK;;
|
|
}
|
|
|
|
nsresult
|
|
VariableLengthPrefixSet::StoreToFile(nsCOMPtr<nsIFile>& aFile) const
|
|
{
|
|
NS_ENSURE_ARG_POINTER(aFile);
|
|
|
|
MutexAutoLock lock(mLock);
|
|
|
|
nsCOMPtr<nsIOutputStream> localOutFile;
|
|
nsresult rv = NS_NewLocalFileOutputStream(getter_AddRefs(localOutFile), aFile,
|
|
PR_WRONLY | PR_TRUNCATE | PR_CREATE_FILE);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
uint32_t fileSize = 0;
|
|
// Preallocate the file storage
|
|
{
|
|
nsCOMPtr<nsIFileOutputStream> fos(do_QueryInterface(localOutFile));
|
|
Telemetry::AutoTimer<Telemetry::URLCLASSIFIER_VLPS_FALLOCATE_TIME> timer;
|
|
|
|
fileSize += mFixedPrefixSet->CalculatePreallocateSize();
|
|
fileSize += CalculatePreallocateSize();
|
|
|
|
Unused << fos->Preallocate(fileSize);
|
|
}
|
|
|
|
// Convert to buffered stream
|
|
nsCOMPtr<nsIOutputStream> out;
|
|
rv = NS_NewBufferedOutputStream(getter_AddRefs(out), localOutFile.forget(),
|
|
std::min(fileSize, MAX_BUFFER_SIZE));
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
rv = mFixedPrefixSet->WritePrefixes(out);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
rv = WritePrefixes(out);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
nsresult
|
|
VariableLengthPrefixSet::LoadPrefixes(nsCOMPtr<nsIInputStream>& in)
|
|
{
|
|
uint32_t magic;
|
|
uint32_t read;
|
|
|
|
nsresult rv = in->Read(reinterpret_cast<char*>(&magic), sizeof(uint32_t), &read);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
NS_ENSURE_TRUE(read == sizeof(uint32_t), NS_ERROR_FAILURE);
|
|
|
|
if (magic != PREFIXSET_VERSION_MAGIC) {
|
|
LOG(("Version magic mismatch, not loading"));
|
|
return NS_ERROR_FILE_CORRUPTED;
|
|
}
|
|
|
|
mVLPrefixSet.Clear();
|
|
|
|
uint32_t count;
|
|
rv = in->Read(reinterpret_cast<char*>(&count), sizeof(uint32_t), &read);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
NS_ENSURE_TRUE(read == sizeof(uint32_t), NS_ERROR_FAILURE);
|
|
|
|
for(;count > 0; count--) {
|
|
uint8_t prefixSize;
|
|
rv = in->Read(reinterpret_cast<char*>(&prefixSize), sizeof(uint8_t), &read);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
NS_ENSURE_TRUE(read == sizeof(uint8_t), NS_ERROR_FAILURE);
|
|
|
|
if (prefixSize < PREFIX_SIZE || prefixSize > COMPLETE_SIZE) {
|
|
return NS_ERROR_FILE_CORRUPTED;
|
|
}
|
|
|
|
uint32_t stringLength;
|
|
rv = in->Read(reinterpret_cast<char*>(&stringLength), sizeof(uint32_t), &read);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
NS_ENSURE_TRUE(read == sizeof(uint32_t), NS_ERROR_FAILURE);
|
|
|
|
nsCString* vlPrefixes = new nsCString();
|
|
if (!vlPrefixes->SetLength(stringLength, fallible)) {
|
|
return NS_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
|
|
rv = in->Read(reinterpret_cast<char*>(vlPrefixes->BeginWriting()), stringLength, &read);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
NS_ENSURE_TRUE(read == stringLength, NS_ERROR_FAILURE);
|
|
|
|
mVLPrefixSet.Put(prefixSize, vlPrefixes);
|
|
}
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
uint32_t
|
|
VariableLengthPrefixSet::CalculatePreallocateSize() const
|
|
{
|
|
uint32_t fileSize = 0;
|
|
|
|
// Store how many prefix string.
|
|
fileSize += sizeof(uint32_t);
|
|
|
|
for (auto iter = mVLPrefixSet.ConstIter(); !iter.Done(); iter.Next()) {
|
|
// Store prefix size, prefix string length, and prefix string.
|
|
fileSize += sizeof(uint8_t);
|
|
fileSize += sizeof(uint32_t);
|
|
fileSize += iter.Data()->Length();
|
|
}
|
|
return fileSize;
|
|
}
|
|
|
|
nsresult
|
|
VariableLengthPrefixSet::WritePrefixes(nsCOMPtr<nsIOutputStream>& out) const
|
|
{
|
|
uint32_t written;
|
|
uint32_t writelen = sizeof(uint32_t);
|
|
uint32_t magic = PREFIXSET_VERSION_MAGIC;
|
|
nsresult rv = out->Write(reinterpret_cast<char*>(&magic), writelen, &written);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
NS_ENSURE_TRUE(written == writelen, NS_ERROR_FAILURE);
|
|
|
|
uint32_t count = mVLPrefixSet.Count();
|
|
rv = out->Write(reinterpret_cast<char*>(&count), writelen, &written);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
NS_ENSURE_TRUE(written == writelen, NS_ERROR_FAILURE);
|
|
|
|
// Store PrefixSize, Length of Prefix String and then Prefix String
|
|
for (auto iter = mVLPrefixSet.ConstIter(); !iter.Done(); iter.Next()) {
|
|
const nsCString& vlPrefixes = *iter.Data();
|
|
|
|
uint8_t prefixSize = iter.Key();
|
|
writelen = sizeof(uint8_t);
|
|
rv = out->Write(reinterpret_cast<char*>(&prefixSize), writelen, &written);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
NS_ENSURE_TRUE(written == writelen, NS_ERROR_FAILURE);
|
|
|
|
uint32_t stringLength = vlPrefixes.Length();
|
|
writelen = sizeof(uint32_t);
|
|
rv = out->Write(reinterpret_cast<char*>(&stringLength), writelen, &written);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
NS_ENSURE_TRUE(written == writelen, NS_ERROR_FAILURE);
|
|
|
|
rv = out->Write(const_cast<char*>(vlPrefixes.BeginReading()),
|
|
stringLength, &written);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
NS_ENSURE_TRUE(stringLength == written, NS_ERROR_FAILURE);
|
|
}
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
bool
|
|
VariableLengthPrefixSet::BinarySearch(const nsACString& aFullHash,
|
|
const nsACString& aPrefixes,
|
|
uint32_t aPrefixSize) const
|
|
{
|
|
const char* fullhash = aFullHash.BeginReading();
|
|
const char* prefixes = aPrefixes.BeginReading();
|
|
int32_t begin = 0, end = aPrefixes.Length() / aPrefixSize;
|
|
|
|
while (end > begin) {
|
|
int32_t mid = (begin + end) >> 1;
|
|
int cmp = memcmp(fullhash, prefixes + mid*aPrefixSize, aPrefixSize);
|
|
if (cmp < 0) {
|
|
end = mid;
|
|
} else if (cmp > 0) {
|
|
begin = mid + 1;
|
|
} else {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
MOZ_DEFINE_MALLOC_SIZE_OF(UrlClassifierMallocSizeOf)
|
|
|
|
NS_IMETHODIMP
|
|
VariableLengthPrefixSet::CollectReports(nsIHandleReportCallback* aHandleReport,
|
|
nsISupports* aData, bool aAnonymize)
|
|
{
|
|
MOZ_ASSERT(NS_IsMainThread());
|
|
|
|
size_t amount = SizeOfIncludingThis(UrlClassifierMallocSizeOf);
|
|
|
|
return aHandleReport->Callback(
|
|
EmptyCString(), mMemoryReportPath, KIND_HEAP, UNITS_BYTES, amount,
|
|
NS_LITERAL_CSTRING("Memory used by the variable-length prefix set for a URL classifier."),
|
|
aData);
|
|
}
|
|
|
|
size_t
|
|
VariableLengthPrefixSet::SizeOfIncludingThis(mozilla::MallocSizeOf aMallocSizeOf) const
|
|
{
|
|
MutexAutoLock lock(mLock);
|
|
|
|
size_t n = 0;
|
|
n += aMallocSizeOf(this);
|
|
n += mFixedPrefixSet->SizeOfIncludingThis(moz_malloc_size_of) - aMallocSizeOf(mFixedPrefixSet);
|
|
|
|
n += mVLPrefixSet.ShallowSizeOfExcludingThis(aMallocSizeOf);
|
|
for (auto iter = mVLPrefixSet.ConstIter(); !iter.Done(); iter.Next()) {
|
|
n += iter.Data()->SizeOfExcludingThisIfUnshared(aMallocSizeOf);
|
|
}
|
|
|
|
return n;
|
|
}
|
|
|
|
} // namespace safebrowsing
|
|
} // namespace mozilla
|