diff --git a/mozglue/baseprofiler/moz.build b/mozglue/baseprofiler/moz.build index 2adf12a77583..0532e8fb91d9 100644 --- a/mozglue/baseprofiler/moz.build +++ b/mozglue/baseprofiler/moz.build @@ -83,6 +83,7 @@ EXPORTS += [ EXPORTS.mozilla += [ 'public/BaseProfilerCounts.h', + 'public/leb128iterator.h', 'public/PowerOfTwo.h', ] diff --git a/mozglue/baseprofiler/public/leb128iterator.h b/mozglue/baseprofiler/public/leb128iterator.h new file mode 100644 index 000000000000..9785415a7405 --- /dev/null +++ b/mozglue/baseprofiler/public/leb128iterator.h @@ -0,0 +1,147 @@ +/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* vim: set ts=8 sts=2 et sw=2 tw=80: */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +// LEB128 utilities that can read/write unsigned LEB128 numbers from/to +// iterators. +// +// LEB128 = Little Endian Base 128, where small numbers take few bytes, but +// large numbers are still allowed, which is ideal when serializing numbers that +// are likely to be small. +// Each byte contains 7 bits from the number, starting at the "little end", the +// top bit is 0 for the last byte, 1 otherwise. +// Numbers 0-127 only take 1 byte. 128-16383 take 2 bytes. Etc. +// +// Iterators only need to provide: +// - `*it` to return a reference to the next byte to be read from or written to. +// - `++it` to advance the iterator after a byte is written. +// +// The caller must always provide sufficient space to write any number, by: +// - pre-allocating a large enough buffer, or +// - allocating more space when `++it` reaches the end and/or `*it` is invoked +// after the end, or +// - moving the underlying pointer to an appropriate location (e.g., wrapping +// around a circular buffer). +// The caller must also provide enough bytes to read a full value (i.e., at +// least one byte should have its top bit unset), and a type large enough to +// hold the stored value. +// +// Note: There are insufficient checks for validity! These functions are +// intended to be used together, i.e., the user should only `ReadULEB128()` from +// a sufficiently-large buffer that the same user filled with `WriteULEB128()`. +// Using with externally-sourced data (e.g., DWARF) is *not* recommended. +// +// https://en.wikipedia.org/wiki/LEB128 + +#ifndef leb128iterator_h +#define leb128iterator_h + +#include +#include +#include +#include + +namespace mozilla { + +// Number of bytes needed to represent `aValue`. +template +constexpr uint_fast8_t ULEB128Size(T aValue) { + static_assert(!std::numeric_limits::is_signed, + "ULEB128Size only takes unsigned types"); + // We need one output byte per 7 bits of non-zero value. So we just remove + // 7 least significant bits at a time until the value becomes zero. + // Note the special case of 0, which still needs 1 output byte; this is done + // by starting the first loop before we check for 0. + uint_fast8_t size = 0; + for (;;) { + size += 1; + aValue >>= 7; + // Expecting small values, so it should be more likely that `aValue == 0`. + if (MOZ_LIKELY(aValue == 0)) { + return size; + } + } +} + +// Maximum number of bytes needed to represent any value of type `T`. +template +constexpr uint_fast8_t ULEB128MaxSize() { + return ULEB128Size(std::numeric_limits::max()); +} + +// Write `aValue` in LEB128 to `aIterator`. +// The iterator will be moved past the last byte. +template +void WriteULEB128(T aValue, It& aIterator) { + static_assert(!std::numeric_limits::is_signed, + "WriteULEB128 only takes unsigned types"); + using IteratorValue = std::remove_reference_t; + static_assert(sizeof(IteratorValue) == 1, + "WriteULEB128 expects an iterator to single bytes"); + // 0. Don't test for 0 yet, as we want to output one byte for it. + for (;;) { + // 1. Extract the 7 least significant bits. + const uint_fast8_t byte = aValue & 0x7Fu; + // 2. Remove them from `aValue`. + aValue >>= 7; + // 3. Write the 7 bits, and set the 8th bit if `aValue` is not 0 yet + // (meaning there will be more bytes after this one.) + // Expecting small values, so it should be more likely that `aValue == 0`. + // Note: No absolute need to force-cast to IteratorValue, because we have + // only changed the bottom 8 bits above. However the compiler could warn + // about a narrowing conversion from potentially-multibyte uint_fast8_t down + // to whatever single-byte type `*iterator* expects, so we make it explicit. + *aIterator = static_cast( + MOZ_LIKELY(aValue == 0) ? byte : (byte | 0x80u)); + // 4. Always advance the iterator to the next byte. + ++aIterator; + // 5. We're done if `aValue` is 0. + // Expecting small values, so it should be more likely that `aValue == 0`. + if (MOZ_LIKELY(aValue == 0)) { + return; + } + } +} + +// Read an LEB128 value from `aIterator`. +// The iterator will be moved past the last byte. +template +T ReadULEB128(It& aIterator) { + static_assert(!std::numeric_limits::is_signed, + "ReadULEB128 must return an unsigned type"); + using IteratorValue = std::remove_reference_t; + static_assert(sizeof(IteratorValue) == 1, + "ReadULEB128 expects an iterator to single bytes"); + // Incoming bits will be added to `result`... + T result = 0; + // ... starting with the least significant bits. + uint_fast8_t shift = 0; + for (;;) { + // 1. Read one byte from the iterator. + // `static_cast` just in case IteratorValue is not implicitly convertible to + // uint_fast8_t. It wouldn't matter if the sign was extended, we're only + // dealing with the bottom 8 bits below. + const uint_fast8_t byte = static_cast(*aIterator); + // 2. Always advance the iterator. + ++aIterator; + // 3. Extract the 7 bits of value, and shift them in place into `result`. + result |= static_cast(byte & 0x7fu) << shift; + // 4. If the 8th bit is *not* set, this was the last byte. + // Expecting small values, so it should be more likely that the bit is off. + if (MOZ_LIKELY((byte & 0x80u) == 0)) { + return result; + } + // There are more bytes to read. + // 5. Next byte will contain more significant bits above the past 7. + shift += 7; + // Safety check that we're not going to shift by >= than the type size, + // which is Undefined Behavior in C++. + MOZ_ASSERT(shift < CHAR_BIT * sizeof(T)); + } +} + +} // namespace mozilla + +#endif // leb128iterator_h diff --git a/mozglue/tests/TestBaseProfiler.cpp b/mozglue/tests/TestBaseProfiler.cpp index 8515dfb22291..b3c5b03590e1 100644 --- a/mozglue/tests/TestBaseProfiler.cpp +++ b/mozglue/tests/TestBaseProfiler.cpp @@ -8,6 +8,7 @@ #ifdef MOZ_BASE_PROFILER +# include "mozilla/leb128iterator.h" # include "mozilla/PowerOfTwo.h" # include "mozilla/Attributes.h" @@ -150,6 +151,105 @@ void TestPowerOfTwo() { printf("TestPowerOfTwo done\n"); } +void TestLEB128() { + printf("TestLEB128...\n"); + + MOZ_RELEASE_ASSERT(ULEB128MaxSize() == 2); + MOZ_RELEASE_ASSERT(ULEB128MaxSize() == 3); + MOZ_RELEASE_ASSERT(ULEB128MaxSize() == 5); + MOZ_RELEASE_ASSERT(ULEB128MaxSize() == 10); + + struct TestDataU64 { + uint64_t mValue; + unsigned mSize; + const char* mBytes; + }; + // clang-format off + TestDataU64 tests[] = { + // Small numbers should keep their normal byte representation. + { 0u, 1, "\0" }, + { 1u, 1, "\x01" }, + + // 0111 1111 (127, or 0x7F) is the highest number that fits into a single + // LEB128 byte. It gets encoded as 0111 1111, note the most significant bit + // is off. + { 0x7Fu, 1, "\x7F" }, + + // Next number: 128, or 0x80. + // Original data representation: 1000 0000 + // Broken up into groups of 7: 1 0000000 + // Padded with 0 (msB) or 1 (lsB): 00000001 10000000 + // Byte representation: 0x01 0x80 + // Little endian order: -> 0x80 0x01 + { 0x80u, 2, "\x80\x01" }, + + // Next: 129, or 0x81 (showing that we don't lose low bits.) + // Original data representation: 1000 0001 + // Broken up into groups of 7: 1 0000001 + // Padded with 0 (msB) or 1 (lsB): 00000001 10000001 + // Byte representation: 0x01 0x81 + // Little endian order: -> 0x81 0x01 + { 0x81u, 2, "\x81\x01" }, + + // Highest 8-bit number: 255, or 0xFF. + // Original data representation: 1111 1111 + // Broken up into groups of 7: 1 1111111 + // Padded with 0 (msB) or 1 (lsB): 00000001 11111111 + // Byte representation: 0x01 0xFF + // Little endian order: -> 0xFF 0x01 + { 0xFFu, 2, "\xFF\x01" }, + + // Next: 256, or 0x100. + // Original data representation: 1 0000 0000 + // Broken up into groups of 7: 10 0000000 + // Padded with 0 (msB) or 1 (lsB): 00000010 10000000 + // Byte representation: 0x10 0x80 + // Little endian order: -> 0x80 0x02 + { 0x100u, 2, "\x80\x02" }, + + // Highest 32-bit number: 0xFFFFFFFF (8 bytes, all bits set). + // Original: 1111 1111 1111 1111 1111 1111 1111 1111 + // Groups: 1111 1111111 1111111 1111111 1111111 + // Padded: 00001111 11111111 11111111 11111111 11111111 + // Bytes: 0x0F 0xFF 0xFF 0xFF 0xFF + // Little Endian: -> 0xFF 0xFF 0xFF 0xFF 0x0F + { 0xFFFFFFFFu, 5, "\xFF\xFF\xFF\xFF\x0F" }, + + // Highest 64-bit number: 0xFFFFFFFFFFFFFFFF (16 bytes, all bits set). + // 64 bits, that's 9 groups of 7 bits, plus 1 (most significant) bit. + { 0xFFFFFFFFFFFFFFFFu, 10, "\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF\x01" } + }; + // clang-format on + + for (const TestDataU64& test : tests) { + MOZ_RELEASE_ASSERT(ULEB128Size(test.mValue) == test.mSize); + // Prepare a buffer that can accomodate the largest-possible LEB128. + uint8_t buffer[ULEB128MaxSize()]; + // Use a pointer into the buffer as iterator. + uint8_t* p = buffer; + // And write the LEB128. + WriteULEB128(test.mValue, p); + // Pointer (iterator) should have advanced just past the expected LEB128 + // size. + MOZ_RELEASE_ASSERT(p == buffer + test.mSize); + // Check expected bytes. + for (unsigned i = 0; i < test.mSize; ++i) { + MOZ_RELEASE_ASSERT(buffer[i] == uint8_t(test.mBytes[i])); + } + // Move pointer (iterator) back to start of buffer. + p = buffer; + // And read the LEB128 we wrote above. + uint64_t read = ReadULEB128(p); + // Pointer (iterator) should have also advanced just past the expected + // LEB128 size. + MOZ_RELEASE_ASSERT(p == buffer + test.mSize); + // And check the read value. + MOZ_RELEASE_ASSERT(read == test.mValue); + } + + printf("TestLEB128 done\n"); +} + // Increase the depth, to a maximum (to avoid too-deep recursion). static constexpr size_t NextDepth(size_t aDepth) { constexpr size_t MAX_DEPTH = 128; @@ -185,6 +285,7 @@ void TestProfiler() { // Test dependencies. TestPowerOfTwoMask(); TestPowerOfTwo(); + TestLEB128(); { printf("profiler_init()...\n");