Backed out 3 changesets (bug 1743322) for causing multiple failures CLOSED TREE

Backed out changeset fa443c853d2d
Backed out changeset d299fa6358f7 (bug 1743322)
Backed out changeset 25647d613534 (bug 1743322)
This commit is contained in:
Norisz Fay 2022-12-06 11:48:14 +02:00
parent 568b6a87f9
commit 70bc66e002

View file

@ -492,7 +492,7 @@ static size_t gPageSize;
# define END_GLOBALS
# define DEFINE_GLOBAL(type) static const type
# define GLOBAL_LOG2 LOG2
# define GLOBAL_ASSERT_HELPER1(x) static_assert(x, #x)
# define GLOBAL_ASSERT_HELPER1(x) static_assert(x, # x)
# define GLOBAL_ASSERT_HELPER2(x, y) static_assert(x, y)
# define GLOBAL_ASSERT(...) \
MACRO_CALL( \
@ -772,75 +772,6 @@ class SizeClass {
size_t mSize;
};
// Fast division
//
// During deallocation we want to divide by the size class. This class
// provides a routine and sets up a constant as follows.
//
// To divide by a number D that is not a power of two we multiply by (2^17 /
// D) and then right shift by 17 positions.
//
// X / D
//
// becomes
//
// (X * Inv) >> SIZE_INV_SHIFT
//
// Where Inv is calculated during the FastDivisor constructor as:
//
// Inv = 2^SIZE_INV_SHIFT / D
//
template <typename T>
class FastDivisor {
private:
// The shift amount is chosen to minimise the size of inv while
// working for divisors up to 65536 in steps of 16. I arrived at 17
// experimentally. I wanted a low number to minimise the range of inv
// so it can fit in a uint16_t, 16 didn't work but 17 worked perfectly.
//
// We'd need to increase this if we allocated memory on smaller boundaries
// than 16.
static const unsigned divide_inv_shift = 17;
// We can fit the inverted divisor in 16 bits.
T inv;
public:
// Needed so mBins can be constructed.
FastDivisor() : inv(0) {}
FastDivisor(unsigned div, unsigned max) {
MOZ_ASSERT(div <= max);
// divide_inv_shift is large enough.
MOZ_ASSERT((1U << divide_inv_shift) >= div);
unsigned inv_ = ((1U << divide_inv_shift) / div) + 1;
// Make sure that max * inv does not overflow.
MOZ_DIAGNOSTIC_ASSERT(max < UINT_MAX / inv_);
MOZ_ASSERT(inv_ <= std::numeric_limits<T>::max());
inv = static_cast<T>(inv_);
// Initialisation made inv non-zero.
MOZ_ASSERT(inv);
}
// Note that this always occurs in unsigned regardless of inv's type. That
// is, inv is zero-extended before the operation.
inline unsigned divide(unsigned num) const {
// Check that inv was initialised.
MOZ_ASSERT(inv);
return (num * inv) >> divide_inv_shift;
}
};
template <typename T>
unsigned inline operator/(unsigned num, FastDivisor<T> divisor) {
return divisor.divide(num);
}
// ***************************************************************************
// Radix tree data structures.
//
@ -993,6 +924,9 @@ struct arena_bin_t {
// Bin's size class.
size_t mSizeClass;
// Total size of a run for this bin's size class.
size_t mRunSize;
// Total number of regions in a run for this bin's size class.
uint32_t mRunNumRegions;
@ -1003,14 +937,7 @@ struct arena_bin_t {
uint32_t mRunFirstRegionOffset;
// Current number of runs in this bin, full or otherwise.
uint32_t mNumRuns;
// A constant for fast division by size class. This value is 16 bits wide so
// it is placed last.
FastDivisor<uint16_t> mSizeDivisor;
// Total number of pages in a run for this bin's size class.
uint8_t mRunSizePages;
unsigned long mNumRuns;
// Amount of overhead runs are allowed to have.
static constexpr double kRunOverhead = 1.6_percent;
@ -1035,17 +962,6 @@ struct arena_bin_t {
inline void Init(SizeClass aSizeClass);
};
// We try to keep the above structure aligned with common cache lines sizes,
// often that's 64 bytes on x86 and ARM, we don't make assumptions for other
// architectures.
#if defined(__x86_64__) || defined(__aarch64__)
// On 64bit platforms this structure is often 48 bytes
// long, which means every other array element will be properly aligned.
static_assert(sizeof(arena_bin_t) == 48);
#elif defined(__x86__) || defined(__arm__)
static_assert(sizeof(arena_bin_t) == 32);
#endif
struct arena_t {
#if defined(MOZ_DIAGNOSTIC_ASSERT_ENABLED)
uint32_t mMagic;
@ -2432,6 +2348,68 @@ inline void* arena_t::ArenaRunRegAlloc(arena_run_t* aRun, arena_bin_t* aBin) {
return nullptr;
}
// To divide by a number D that is not a power of two we multiply by (2^21 /
// D) and then right shift by 21 positions.
//
// X / D
//
// becomes
//
// (X * size_invs[D - 3]) >> SIZE_INV_SHIFT
//
// Where D is d/Q and Q is a constant factor.
template <unsigned Q, unsigned Max>
struct FastDivide {
static_assert(IsPowerOfTwo(Q), "q must be a power-of-two");
// We don't need FastDivide when dividing by a power-of-two. So when we set
// the range (min_divisor - max_divisor inclusive) we can avoid powers-of-two.
// Because Q is a power of two Q*3 is the first not-power-of-two.
static const unsigned min_divisor = Q * 3;
static const unsigned max_divisor =
mozilla::IsPowerOfTwo(Max) ? Max - Q : Max;
// +1 because this range is inclusive.
static const unsigned num_divisors = (max_divisor - min_divisor) / Q + 1;
static const unsigned inv_shift = 21;
static constexpr unsigned inv(unsigned s) {
return ((1U << inv_shift) / (s * Q)) + 1;
}
static unsigned divide(size_t num, unsigned div) {
// clang-format off
static const unsigned size_invs[] = {
inv(3),
inv(4), inv(5), inv(6), inv(7),
inv(8), inv(9), inv(10), inv(11),
inv(12), inv(13), inv(14), inv(15),
inv(16), inv(17), inv(18), inv(19),
inv(20), inv(21), inv(22), inv(23),
inv(24), inv(25), inv(26), inv(27),
inv(28), inv(29), inv(30), inv(31)
};
// clang-format on
// If the divisor is valid (min is below max) then the size_invs array must
// be large enough.
static_assert(!(min_divisor < max_divisor) ||
num_divisors <= sizeof(size_invs) / sizeof(unsigned),
"num_divisors does not match array size");
MOZ_ASSERT(div >= min_divisor);
MOZ_ASSERT(div <= max_divisor);
MOZ_ASSERT(div % Q == 0);
// If Q isn't a power of two this optimisation would be pointless, we expect
// /Q to be reduced to a shift, but we asserted this above.
const unsigned idx = div / Q - 3;
MOZ_ASSERT(idx < sizeof(size_invs) / sizeof(unsigned));
return (num * size_invs[idx]) >> inv_shift;
}
};
static inline void arena_run_reg_dalloc(arena_run_t* run, arena_bin_t* bin,
void* ptr, size_t size) {
unsigned diff, regind, elm, bit;
@ -2442,11 +2420,22 @@ static inline void arena_run_reg_dalloc(arena_run_t* run, arena_bin_t* bin,
// actual division here can reduce allocator throughput by over 20%!
diff =
(unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->mRunFirstRegionOffset);
MOZ_ASSERT(diff <=
(static_cast<unsigned>(bin->mRunSizePages) << gPageSize2Pow));
regind = diff / bin->mSizeDivisor;
if (mozilla::IsPowerOfTwo(size)) {
regind = diff >> FloorLog2(size);
} else {
SizeClass sc(size);
switch (sc.Type()) {
case SizeClass::Quantum:
regind = FastDivide<kQuantum, kMaxQuantumClass>::divide(diff, size);
break;
case SizeClass::QuantumWide:
regind =
FastDivide<kQuantumWide, kMaxQuantumWideClass>::divide(diff, size);
break;
default:
regind = diff / size;
}
}
MOZ_DIAGNOSTIC_ASSERT(diff == regind * size);
MOZ_DIAGNOSTIC_ASSERT(regind < bin->mRunNumRegions);
@ -2804,11 +2793,10 @@ void arena_t::DallocRun(arena_run_t* aRun, bool aDirty) {
MOZ_RELEASE_ASSERT(run_ind < gChunkNumPages - 1);
if ((chunk->map[run_ind].bits & CHUNK_MAP_LARGE) != 0) {
size = chunk->map[run_ind].bits & ~gPageSizeMask;
run_pages = (size >> gPageSize2Pow);
} else {
run_pages = aRun->mBin->mRunSizePages;
size = run_pages << gPageSize2Pow;
size = aRun->mBin->mRunSize;
}
run_pages = (size >> gPageSize2Pow);
// Mark pages as unallocated in the chunk map.
if (aDirty) {
@ -2942,8 +2930,7 @@ arena_run_t* arena_t::GetNonFullBinRun(arena_bin_t* aBin) {
// No existing runs have any space available.
// Allocate a new run.
run = AllocRun(static_cast<size_t>(aBin->mRunSizePages) << gPageSize2Pow,
false, false);
run = AllocRun(aBin->mRunSize, false, false);
if (!run) {
return nullptr;
}
@ -2994,7 +2981,7 @@ void arena_bin_t::Init(SizeClass aSizeClass) {
mSizeClass = aSizeClass.Size();
mNumRuns = 0;
// Run size expansion loop.
// mRunSize expansion loop.
while (true) {
try_nregs = ((try_run_size - kFixedHeaderSize) / mSizeClass) +
1; // Counter-act try_nregs-- in loop.
@ -3055,12 +3042,10 @@ void arena_bin_t::Init(SizeClass aSizeClass) {
MOZ_ASSERT((try_mask_nelms << (LOG2(sizeof(int)) + 3)) >= try_nregs);
// Copy final settings.
MOZ_ASSERT((try_run_size >> gPageSize2Pow) <= UINT8_MAX);
mRunSizePages = static_cast<uint8_t>(try_run_size >> gPageSize2Pow);
mRunSize = try_run_size;
mRunNumRegions = try_nregs;
mRunNumRegionsMask = try_mask_nelms;
mRunFirstRegionOffset = try_reg0_offset;
mSizeDivisor = FastDivisor<uint16_t>(aSizeClass.Size(), try_run_size);
}
void* arena_t::MallocSmall(size_t aSize, bool aZero) {
@ -4597,11 +4582,9 @@ inline void MozJemalloc::jemalloc_stats_internal(
aBinStats[j].num_non_full_runs += num_non_full_runs;
aBinStats[j].num_runs += bin->mNumRuns;
aBinStats[j].bytes_unused += bin_unused;
size_t bytes_per_run = static_cast<size_t>(bin->mRunSizePages)
<< gPageSize2Pow;
aBinStats[j].bytes_total +=
bin->mNumRuns * (bytes_per_run - bin->mRunFirstRegionOffset);
aBinStats[j].bytes_per_run = bytes_per_run;
bin->mNumRuns * (bin->mRunSize - bin->mRunFirstRegionOffset);
aBinStats[j].bytes_per_run = bin->mRunSize;
}
}
}