forked from mirrors/gecko-dev
Backed out 3 changesets (bug 1743322) for causing multiple failures CLOSED TREE
Backed out changeset fa443c853d2d Backed out changeset d299fa6358f7 (bug 1743322) Backed out changeset 25647d613534 (bug 1743322)
This commit is contained in:
parent
568b6a87f9
commit
70bc66e002
1 changed files with 90 additions and 107 deletions
|
|
@ -492,7 +492,7 @@ static size_t gPageSize;
|
|||
# define END_GLOBALS
|
||||
# define DEFINE_GLOBAL(type) static const type
|
||||
# define GLOBAL_LOG2 LOG2
|
||||
# define GLOBAL_ASSERT_HELPER1(x) static_assert(x, #x)
|
||||
# define GLOBAL_ASSERT_HELPER1(x) static_assert(x, # x)
|
||||
# define GLOBAL_ASSERT_HELPER2(x, y) static_assert(x, y)
|
||||
# define GLOBAL_ASSERT(...) \
|
||||
MACRO_CALL( \
|
||||
|
|
@ -772,75 +772,6 @@ class SizeClass {
|
|||
size_t mSize;
|
||||
};
|
||||
|
||||
// Fast division
|
||||
//
|
||||
// During deallocation we want to divide by the size class. This class
|
||||
// provides a routine and sets up a constant as follows.
|
||||
//
|
||||
// To divide by a number D that is not a power of two we multiply by (2^17 /
|
||||
// D) and then right shift by 17 positions.
|
||||
//
|
||||
// X / D
|
||||
//
|
||||
// becomes
|
||||
//
|
||||
// (X * Inv) >> SIZE_INV_SHIFT
|
||||
//
|
||||
// Where Inv is calculated during the FastDivisor constructor as:
|
||||
//
|
||||
// Inv = 2^SIZE_INV_SHIFT / D
|
||||
//
|
||||
template <typename T>
|
||||
class FastDivisor {
|
||||
private:
|
||||
// The shift amount is chosen to minimise the size of inv while
|
||||
// working for divisors up to 65536 in steps of 16. I arrived at 17
|
||||
// experimentally. I wanted a low number to minimise the range of inv
|
||||
// so it can fit in a uint16_t, 16 didn't work but 17 worked perfectly.
|
||||
//
|
||||
// We'd need to increase this if we allocated memory on smaller boundaries
|
||||
// than 16.
|
||||
static const unsigned divide_inv_shift = 17;
|
||||
|
||||
// We can fit the inverted divisor in 16 bits.
|
||||
T inv;
|
||||
|
||||
public:
|
||||
// Needed so mBins can be constructed.
|
||||
FastDivisor() : inv(0) {}
|
||||
|
||||
FastDivisor(unsigned div, unsigned max) {
|
||||
MOZ_ASSERT(div <= max);
|
||||
|
||||
// divide_inv_shift is large enough.
|
||||
MOZ_ASSERT((1U << divide_inv_shift) >= div);
|
||||
|
||||
unsigned inv_ = ((1U << divide_inv_shift) / div) + 1;
|
||||
|
||||
// Make sure that max * inv does not overflow.
|
||||
MOZ_DIAGNOSTIC_ASSERT(max < UINT_MAX / inv_);
|
||||
|
||||
MOZ_ASSERT(inv_ <= std::numeric_limits<T>::max());
|
||||
inv = static_cast<T>(inv_);
|
||||
|
||||
// Initialisation made inv non-zero.
|
||||
MOZ_ASSERT(inv);
|
||||
}
|
||||
|
||||
// Note that this always occurs in unsigned regardless of inv's type. That
|
||||
// is, inv is zero-extended before the operation.
|
||||
inline unsigned divide(unsigned num) const {
|
||||
// Check that inv was initialised.
|
||||
MOZ_ASSERT(inv);
|
||||
return (num * inv) >> divide_inv_shift;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
unsigned inline operator/(unsigned num, FastDivisor<T> divisor) {
|
||||
return divisor.divide(num);
|
||||
}
|
||||
|
||||
// ***************************************************************************
|
||||
// Radix tree data structures.
|
||||
//
|
||||
|
|
@ -993,6 +924,9 @@ struct arena_bin_t {
|
|||
// Bin's size class.
|
||||
size_t mSizeClass;
|
||||
|
||||
// Total size of a run for this bin's size class.
|
||||
size_t mRunSize;
|
||||
|
||||
// Total number of regions in a run for this bin's size class.
|
||||
uint32_t mRunNumRegions;
|
||||
|
||||
|
|
@ -1003,14 +937,7 @@ struct arena_bin_t {
|
|||
uint32_t mRunFirstRegionOffset;
|
||||
|
||||
// Current number of runs in this bin, full or otherwise.
|
||||
uint32_t mNumRuns;
|
||||
|
||||
// A constant for fast division by size class. This value is 16 bits wide so
|
||||
// it is placed last.
|
||||
FastDivisor<uint16_t> mSizeDivisor;
|
||||
|
||||
// Total number of pages in a run for this bin's size class.
|
||||
uint8_t mRunSizePages;
|
||||
unsigned long mNumRuns;
|
||||
|
||||
// Amount of overhead runs are allowed to have.
|
||||
static constexpr double kRunOverhead = 1.6_percent;
|
||||
|
|
@ -1035,17 +962,6 @@ struct arena_bin_t {
|
|||
inline void Init(SizeClass aSizeClass);
|
||||
};
|
||||
|
||||
// We try to keep the above structure aligned with common cache lines sizes,
|
||||
// often that's 64 bytes on x86 and ARM, we don't make assumptions for other
|
||||
// architectures.
|
||||
#if defined(__x86_64__) || defined(__aarch64__)
|
||||
// On 64bit platforms this structure is often 48 bytes
|
||||
// long, which means every other array element will be properly aligned.
|
||||
static_assert(sizeof(arena_bin_t) == 48);
|
||||
#elif defined(__x86__) || defined(__arm__)
|
||||
static_assert(sizeof(arena_bin_t) == 32);
|
||||
#endif
|
||||
|
||||
struct arena_t {
|
||||
#if defined(MOZ_DIAGNOSTIC_ASSERT_ENABLED)
|
||||
uint32_t mMagic;
|
||||
|
|
@ -2432,6 +2348,68 @@ inline void* arena_t::ArenaRunRegAlloc(arena_run_t* aRun, arena_bin_t* aBin) {
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
// To divide by a number D that is not a power of two we multiply by (2^21 /
|
||||
// D) and then right shift by 21 positions.
|
||||
//
|
||||
// X / D
|
||||
//
|
||||
// becomes
|
||||
//
|
||||
// (X * size_invs[D - 3]) >> SIZE_INV_SHIFT
|
||||
//
|
||||
// Where D is d/Q and Q is a constant factor.
|
||||
template <unsigned Q, unsigned Max>
|
||||
struct FastDivide {
|
||||
static_assert(IsPowerOfTwo(Q), "q must be a power-of-two");
|
||||
|
||||
// We don't need FastDivide when dividing by a power-of-two. So when we set
|
||||
// the range (min_divisor - max_divisor inclusive) we can avoid powers-of-two.
|
||||
|
||||
// Because Q is a power of two Q*3 is the first not-power-of-two.
|
||||
static const unsigned min_divisor = Q * 3;
|
||||
static const unsigned max_divisor =
|
||||
mozilla::IsPowerOfTwo(Max) ? Max - Q : Max;
|
||||
// +1 because this range is inclusive.
|
||||
static const unsigned num_divisors = (max_divisor - min_divisor) / Q + 1;
|
||||
|
||||
static const unsigned inv_shift = 21;
|
||||
|
||||
static constexpr unsigned inv(unsigned s) {
|
||||
return ((1U << inv_shift) / (s * Q)) + 1;
|
||||
}
|
||||
|
||||
static unsigned divide(size_t num, unsigned div) {
|
||||
// clang-format off
|
||||
static const unsigned size_invs[] = {
|
||||
inv(3),
|
||||
inv(4), inv(5), inv(6), inv(7),
|
||||
inv(8), inv(9), inv(10), inv(11),
|
||||
inv(12), inv(13), inv(14), inv(15),
|
||||
inv(16), inv(17), inv(18), inv(19),
|
||||
inv(20), inv(21), inv(22), inv(23),
|
||||
inv(24), inv(25), inv(26), inv(27),
|
||||
inv(28), inv(29), inv(30), inv(31)
|
||||
};
|
||||
// clang-format on
|
||||
|
||||
// If the divisor is valid (min is below max) then the size_invs array must
|
||||
// be large enough.
|
||||
static_assert(!(min_divisor < max_divisor) ||
|
||||
num_divisors <= sizeof(size_invs) / sizeof(unsigned),
|
||||
"num_divisors does not match array size");
|
||||
|
||||
MOZ_ASSERT(div >= min_divisor);
|
||||
MOZ_ASSERT(div <= max_divisor);
|
||||
MOZ_ASSERT(div % Q == 0);
|
||||
|
||||
// If Q isn't a power of two this optimisation would be pointless, we expect
|
||||
// /Q to be reduced to a shift, but we asserted this above.
|
||||
const unsigned idx = div / Q - 3;
|
||||
MOZ_ASSERT(idx < sizeof(size_invs) / sizeof(unsigned));
|
||||
return (num * size_invs[idx]) >> inv_shift;
|
||||
}
|
||||
};
|
||||
|
||||
static inline void arena_run_reg_dalloc(arena_run_t* run, arena_bin_t* bin,
|
||||
void* ptr, size_t size) {
|
||||
unsigned diff, regind, elm, bit;
|
||||
|
|
@ -2442,11 +2420,22 @@ static inline void arena_run_reg_dalloc(arena_run_t* run, arena_bin_t* bin,
|
|||
// actual division here can reduce allocator throughput by over 20%!
|
||||
diff =
|
||||
(unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->mRunFirstRegionOffset);
|
||||
|
||||
MOZ_ASSERT(diff <=
|
||||
(static_cast<unsigned>(bin->mRunSizePages) << gPageSize2Pow));
|
||||
regind = diff / bin->mSizeDivisor;
|
||||
|
||||
if (mozilla::IsPowerOfTwo(size)) {
|
||||
regind = diff >> FloorLog2(size);
|
||||
} else {
|
||||
SizeClass sc(size);
|
||||
switch (sc.Type()) {
|
||||
case SizeClass::Quantum:
|
||||
regind = FastDivide<kQuantum, kMaxQuantumClass>::divide(diff, size);
|
||||
break;
|
||||
case SizeClass::QuantumWide:
|
||||
regind =
|
||||
FastDivide<kQuantumWide, kMaxQuantumWideClass>::divide(diff, size);
|
||||
break;
|
||||
default:
|
||||
regind = diff / size;
|
||||
}
|
||||
}
|
||||
MOZ_DIAGNOSTIC_ASSERT(diff == regind * size);
|
||||
MOZ_DIAGNOSTIC_ASSERT(regind < bin->mRunNumRegions);
|
||||
|
||||
|
|
@ -2804,11 +2793,10 @@ void arena_t::DallocRun(arena_run_t* aRun, bool aDirty) {
|
|||
MOZ_RELEASE_ASSERT(run_ind < gChunkNumPages - 1);
|
||||
if ((chunk->map[run_ind].bits & CHUNK_MAP_LARGE) != 0) {
|
||||
size = chunk->map[run_ind].bits & ~gPageSizeMask;
|
||||
run_pages = (size >> gPageSize2Pow);
|
||||
} else {
|
||||
run_pages = aRun->mBin->mRunSizePages;
|
||||
size = run_pages << gPageSize2Pow;
|
||||
size = aRun->mBin->mRunSize;
|
||||
}
|
||||
run_pages = (size >> gPageSize2Pow);
|
||||
|
||||
// Mark pages as unallocated in the chunk map.
|
||||
if (aDirty) {
|
||||
|
|
@ -2942,8 +2930,7 @@ arena_run_t* arena_t::GetNonFullBinRun(arena_bin_t* aBin) {
|
|||
// No existing runs have any space available.
|
||||
|
||||
// Allocate a new run.
|
||||
run = AllocRun(static_cast<size_t>(aBin->mRunSizePages) << gPageSize2Pow,
|
||||
false, false);
|
||||
run = AllocRun(aBin->mRunSize, false, false);
|
||||
if (!run) {
|
||||
return nullptr;
|
||||
}
|
||||
|
|
@ -2994,7 +2981,7 @@ void arena_bin_t::Init(SizeClass aSizeClass) {
|
|||
mSizeClass = aSizeClass.Size();
|
||||
mNumRuns = 0;
|
||||
|
||||
// Run size expansion loop.
|
||||
// mRunSize expansion loop.
|
||||
while (true) {
|
||||
try_nregs = ((try_run_size - kFixedHeaderSize) / mSizeClass) +
|
||||
1; // Counter-act try_nregs-- in loop.
|
||||
|
|
@ -3055,12 +3042,10 @@ void arena_bin_t::Init(SizeClass aSizeClass) {
|
|||
MOZ_ASSERT((try_mask_nelms << (LOG2(sizeof(int)) + 3)) >= try_nregs);
|
||||
|
||||
// Copy final settings.
|
||||
MOZ_ASSERT((try_run_size >> gPageSize2Pow) <= UINT8_MAX);
|
||||
mRunSizePages = static_cast<uint8_t>(try_run_size >> gPageSize2Pow);
|
||||
mRunSize = try_run_size;
|
||||
mRunNumRegions = try_nregs;
|
||||
mRunNumRegionsMask = try_mask_nelms;
|
||||
mRunFirstRegionOffset = try_reg0_offset;
|
||||
mSizeDivisor = FastDivisor<uint16_t>(aSizeClass.Size(), try_run_size);
|
||||
}
|
||||
|
||||
void* arena_t::MallocSmall(size_t aSize, bool aZero) {
|
||||
|
|
@ -4597,11 +4582,9 @@ inline void MozJemalloc::jemalloc_stats_internal(
|
|||
aBinStats[j].num_non_full_runs += num_non_full_runs;
|
||||
aBinStats[j].num_runs += bin->mNumRuns;
|
||||
aBinStats[j].bytes_unused += bin_unused;
|
||||
size_t bytes_per_run = static_cast<size_t>(bin->mRunSizePages)
|
||||
<< gPageSize2Pow;
|
||||
aBinStats[j].bytes_total +=
|
||||
bin->mNumRuns * (bytes_per_run - bin->mRunFirstRegionOffset);
|
||||
aBinStats[j].bytes_per_run = bytes_per_run;
|
||||
bin->mNumRuns * (bin->mRunSize - bin->mRunFirstRegionOffset);
|
||||
aBinStats[j].bytes_per_run = bin->mRunSize;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue