forked from mirrors/gecko-dev
Bug 1749046 - Resampling input audio to the encoder sample-rate. r=chunmin
Differential Revision: https://phabricator.services.mozilla.com/D199534
This commit is contained in:
parent
fa10e30c03
commit
b1c61fbf38
4 changed files with 133 additions and 30 deletions
|
|
@ -23,6 +23,7 @@
|
|||
# include "mozilla/gfx/Rect.h"
|
||||
# include "nsString.h"
|
||||
# include "nsTArray.h"
|
||||
# include "EncoderConfig.h"
|
||||
|
||||
namespace mozilla {
|
||||
|
||||
|
|
@ -723,6 +724,9 @@ class MediaRawData final : public MediaData {
|
|||
// Currently this is only used for the media engine DRM playback.
|
||||
bool mShouldCopyCryptoToRemoteRawData = false;
|
||||
|
||||
// Config used to encode this packet.
|
||||
UniquePtr<const EncoderConfig> mConfig;
|
||||
|
||||
// It's only used when the remote decoder reconstructs the media raw data.
|
||||
CryptoSample& GetWritableCrypto() { return mCryptoInternal; }
|
||||
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@
|
|||
#include "FFmpegRuntimeLinker.h"
|
||||
#include "FFmpegLog.h"
|
||||
#include "FFmpegUtils.h"
|
||||
#include "MediaData.h"
|
||||
|
||||
#include "AudioSegment.h"
|
||||
|
||||
|
|
@ -34,6 +35,11 @@ nsCString FFmpegAudioEncoder<LIBAV_VER>::GetDescriptionName() const {
|
|||
#endif
|
||||
}
|
||||
|
||||
void FFmpegAudioEncoder<LIBAV_VER>::ResamplerDestroy::operator()(
|
||||
SpeexResamplerState* aResampler) {
|
||||
speex_resampler_destroy(aResampler);
|
||||
}
|
||||
|
||||
nsresult FFmpegAudioEncoder<LIBAV_VER>::InitSpecific() {
|
||||
MOZ_ASSERT(mTaskQueue->IsOnCurrentThread());
|
||||
|
||||
|
|
@ -46,6 +52,54 @@ nsresult FFmpegAudioEncoder<LIBAV_VER>::InitSpecific() {
|
|||
return NS_ERROR_DOM_MEDIA_NOT_SUPPORTED_ERR;
|
||||
}
|
||||
|
||||
// Find a compatible input rate for the codec, update the encoder config, and
|
||||
// note the rate at which this instance was configured.
|
||||
mInputSampleRate = AssertedCast<int>(mConfig.mSampleRate);
|
||||
if (codec->supported_samplerates) {
|
||||
// Ensure the sample-rate list is sorted, iterate and either find that the
|
||||
// sample rate is supported, or pick the same rate just above the audio
|
||||
// input sample-rate (as to not lose information). If the audio is higher
|
||||
// than the highest supported sample-rate, down-sample to the highest
|
||||
// sample-rate supported by the codec. This is the case when encoding high
|
||||
// samplerate audio to opus.
|
||||
AutoTArray<int, 16> supportedSampleRates;
|
||||
IterateZeroTerminated(codec->supported_samplerates,
|
||||
[&supportedSampleRates](int aRate) mutable {
|
||||
supportedSampleRates.AppendElement(aRate);
|
||||
});
|
||||
supportedSampleRates.Sort();
|
||||
|
||||
for (const auto& rate : supportedSampleRates) {
|
||||
if (mInputSampleRate == rate) {
|
||||
mConfig.mSampleRate = rate;
|
||||
break;
|
||||
}
|
||||
if (mInputSampleRate < rate) {
|
||||
// This rate is the smallest supported rate above the content's rate.
|
||||
mConfig.mSampleRate = rate;
|
||||
break;
|
||||
}
|
||||
if (mInputSampleRate > rate) {
|
||||
mConfig.mSampleRate = rate;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (mConfig.mSampleRate != AssertedCast<uint32_t>(mInputSampleRate)) {
|
||||
// Need to resample to targetRate
|
||||
int err;
|
||||
SpeexResamplerState* resampler = speex_resampler_init(
|
||||
mConfig.mNumberOfChannels, mInputSampleRate, mConfig.mSampleRate,
|
||||
SPEEX_RESAMPLER_QUALITY_DEFAULT, &err);
|
||||
if (!err) {
|
||||
mResampler.reset(resampler);
|
||||
} else {
|
||||
FFMPEG_LOG(
|
||||
"Error creating resampler in FFmpegAudioEncoder %dHz -> %dHz (%dch)",
|
||||
mInputSampleRate, mConfig.mSampleRate, mConfig.mNumberOfChannels);
|
||||
}
|
||||
}
|
||||
|
||||
// And now the audio-specific part
|
||||
mCodecContext->sample_rate = AssertedCast<int>(mConfig.mSampleRate);
|
||||
mCodecContext->channels = AssertedCast<int>(mConfig.mNumberOfChannels);
|
||||
|
|
@ -75,7 +129,7 @@ nsresult FFmpegAudioEncoder<LIBAV_VER>::InitSpecific() {
|
|||
mLib->av_opt_set(mCodecContext->priv_data, "vbr", "off", 0);
|
||||
}
|
||||
}
|
||||
// Override the time base: always the sample-rate in audio
|
||||
// Override the time base: always the sample-rate the encoder is running at
|
||||
mCodecContext->time_base =
|
||||
AVRational{.num = 1, .den = mCodecContext->sample_rate};
|
||||
|
||||
|
|
@ -99,8 +153,6 @@ nsresult FFmpegAudioEncoder<LIBAV_VER>::InitSpecific() {
|
|||
|
||||
Result<MediaDataEncoder::EncodedData, nsresult>
|
||||
FFmpegAudioEncoder<LIBAV_VER>::EncodeOnePacket(Span<float> aSamples,
|
||||
uint32_t aChannels,
|
||||
uint32_t aRate,
|
||||
media::TimeUnit aPts) {
|
||||
// Allocate AVFrame.
|
||||
if (!PrepareFrame()) {
|
||||
|
|
@ -108,13 +160,13 @@ FFmpegAudioEncoder<LIBAV_VER>::EncodeOnePacket(Span<float> aSamples,
|
|||
return Err(NS_ERROR_OUT_OF_MEMORY);
|
||||
}
|
||||
|
||||
uint32_t frameCount = aFrames.Length() / aChannels;
|
||||
uint32_t frameCount = aSamples.Length() / mConfig.mNumberOfChannels;
|
||||
|
||||
// This method assumes that the audio has been packetized appropriately --
|
||||
// packets smaller than the packet size are allowed when draining.
|
||||
MOZ_ASSERT(AssertedCast<int>(frameCount) <= mCodecContext->frame_size);
|
||||
|
||||
mFrame->channels = AssertedCast<int>(aChannels);
|
||||
mFrame->channels = AssertedCast<int>(mConfig.mNumberOfChannels);
|
||||
|
||||
# if LIBAVCODEC_VERSION_MAJOR >= 60
|
||||
int rv = mLib->av_channel_layout_copy(&mFrame->ch_layout,
|
||||
|
|
@ -126,16 +178,17 @@ FFmpegAudioEncoder<LIBAV_VER>::EncodeOnePacket(Span<float> aSamples,
|
|||
}
|
||||
# endif
|
||||
|
||||
mFrame->sample_rate = AssertedCast<int>(aRate);
|
||||
mFrame->sample_rate = AssertedCast<int>(mConfig.mSampleRate);
|
||||
// Not a mistake, nb_samples is per channel in ffmpeg
|
||||
mFrame->nb_samples = AssertedCast<int>(frameCount);
|
||||
// Audio is converted below if needed
|
||||
mFrame->format = mCodecContext->sample_fmt;
|
||||
// Set presentation timestamp and duration of the AVFrame.
|
||||
# if LIBAVCODEC_VERSION_MAJOR >= 59
|
||||
mFrame->time_base = AVRational{.num = 1, .den = static_cast<int>(aRate)};
|
||||
mFrame->time_base =
|
||||
AVRational{.num = 1, .den = static_cast<int>(mConfig.mSampleRate)};
|
||||
# endif
|
||||
mFrame->pts = aPts.ToTicksAtRate(aRate);
|
||||
mFrame->pts = aPts.ToTicksAtRate(mConfig.mSampleRate);
|
||||
mFrame->pkt_duration = frameCount;
|
||||
# if LIBAVCODEC_VERSION_MAJOR >= 60
|
||||
mFrame->duration = frameCount;
|
||||
|
|
@ -163,9 +216,9 @@ FFmpegAudioEncoder<LIBAV_VER>::EncodeOnePacket(Span<float> aSamples,
|
|||
aSamples.Length());
|
||||
} else {
|
||||
MOZ_ASSERT(mCodecContext->sample_fmt == AV_SAMPLE_FMT_FLTP);
|
||||
for (uint32_t i = 0; i < aChannels; i++) {
|
||||
for (uint32_t i = 0; i < mConfig.mNumberOfChannels; i++) {
|
||||
DeinterleaveAndConvertBuffer(aSamples.data(), mFrame->nb_samples,
|
||||
aChannels, mFrame->data);
|
||||
mFrame->channels, mFrame->data);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -184,14 +237,13 @@ Result<MediaDataEncoder::EncodedData, nsresult> FFmpegAudioEncoder<
|
|||
FFMPEG_LOG("Encoding %" PRIu32 " frames of audio at pts: %s",
|
||||
sample->Frames(), sample->mTime.ToString().get());
|
||||
|
||||
// TODO: Validate input -- error out of sample rate or channel count is not
|
||||
// what's expected
|
||||
|
||||
if (sample->mRate != mConfig.mSampleRate ||
|
||||
if ((!mResampler && sample->mRate != mConfig.mSampleRate) ||
|
||||
(mResampler &&
|
||||
sample->mRate != AssertedCast<uint32_t>(mInputSampleRate)) ||
|
||||
sample->mChannels != mConfig.mNumberOfChannels) {
|
||||
FFMPEG_LOG(
|
||||
"Rate or sample-rate mismatch at the input of the audio encoder, "
|
||||
"erroring out");
|
||||
"Rate or sample-rate at the inputof the encoder different from what "
|
||||
"has been configured initially, erroring out");
|
||||
return Result<MediaDataEncoder::EncodedData, nsresult>(
|
||||
NS_ERROR_DOM_ENCODING_NOT_SUPPORTED_ERR);
|
||||
}
|
||||
|
|
@ -213,10 +265,32 @@ Result<MediaDataEncoder::EncodedData, nsresult> FFmpegAudioEncoder<
|
|||
mFirstPacketPts = sample->mTime;
|
||||
}
|
||||
|
||||
Span<float> audio = sample->Data();
|
||||
|
||||
if (mResampler) {
|
||||
// Ensure that all input frames are consumed each time by oversizing the
|
||||
// output buffer.
|
||||
int bufferLengthGuess = std::ceil(2. * static_cast<float>(audio.size()) *
|
||||
mConfig.mSampleRate / mInputSampleRate);
|
||||
mTempBuffer.SetLength(bufferLengthGuess);
|
||||
uint32_t inputFrames = audio.size() / mConfig.mNumberOfChannels;
|
||||
uint32_t inputFramesProcessed = inputFrames;
|
||||
uint32_t outputFrames = bufferLengthGuess / mConfig.mNumberOfChannels;
|
||||
DebugOnly<int> rv = speex_resampler_process_interleaved_float(
|
||||
mResampler.get(), audio.data(), &inputFramesProcessed,
|
||||
mTempBuffer.Elements(), &outputFrames);
|
||||
audio = Span<float>(mTempBuffer.Elements(),
|
||||
outputFrames * mConfig.mNumberOfChannels);
|
||||
MOZ_ASSERT(inputFrames == inputFramesProcessed,
|
||||
"increate the buffer to consume all input each time");
|
||||
MOZ_ASSERT(rv == RESAMPLER_ERR_SUCCESS);
|
||||
}
|
||||
|
||||
EncodedData output;
|
||||
MediaResult rv = NS_OK;
|
||||
|
||||
mPacketizer->Input(sample->Data().data(), sample->Frames());
|
||||
mPacketizer->Input(audio.data(),
|
||||
audio.Length() / mConfig.mNumberOfChannels);
|
||||
|
||||
// Dequeue and encode each packet
|
||||
while (mPacketizer->PacketsAvailable() && rv.Code() == NS_OK) {
|
||||
|
|
@ -252,8 +326,7 @@ FFmpegAudioEncoder<LIBAV_VER>::DrainWithModernAPIs() {
|
|||
media::TimeUnit pts = mPacketizer->Drain(mTempBuffer.Elements(), written);
|
||||
auto audio =
|
||||
Span(mTempBuffer.Elements(), written * mPacketizer->ChannelCount());
|
||||
auto encodeResult = EncodeOnePacket(audio, mPacketizer->ChannelCount(),
|
||||
mConfig.mSampleRate, pts);
|
||||
auto encodeResult = EncodeOnePacket(audio, pts);
|
||||
if (encodeResult.isOk()) {
|
||||
auto array = encodeResult.unwrap();
|
||||
output.AppendElements(std::move(array));
|
||||
|
|
@ -283,6 +356,7 @@ RefPtr<MediaRawData> FFmpegAudioEncoder<LIBAV_VER>::ToMediaRawData(
|
|||
data->mTimecode = data->mTime;
|
||||
data->mDuration =
|
||||
media::TimeUnit(mCodecContext->frame_size, mConfig.mSampleRate);
|
||||
// Add here the new rate
|
||||
|
||||
// Handle encoder delay
|
||||
// Tracked in https://github.com/w3c/webcodecs/issues/626 because not quite
|
||||
|
|
@ -299,8 +373,13 @@ RefPtr<MediaRawData> FFmpegAudioEncoder<LIBAV_VER>::ToMediaRawData(
|
|||
data->mTime = mFirstPacketPts;
|
||||
}
|
||||
|
||||
if (auto r = GetExtraData(aPacket); r.isOk()) {
|
||||
data->mExtraData = r.unwrap();
|
||||
if (mPacketsDelivered++ == 0) {
|
||||
// Attach extradata, and the config (including any channel / samplerate
|
||||
// modification to fit the encoder requirements), if needed.
|
||||
if (auto r = GetExtraData(aPacket); r.isOk()) {
|
||||
data->mExtraData = r.unwrap();
|
||||
}
|
||||
data->mConfig = MakeUnique<EncoderConfig>(mConfig);
|
||||
}
|
||||
|
||||
if (data->mExtraData) {
|
||||
|
|
@ -318,13 +397,16 @@ RefPtr<MediaRawData> FFmpegAudioEncoder<LIBAV_VER>::ToMediaRawData(
|
|||
}
|
||||
|
||||
Result<already_AddRefed<MediaByteBuffer>, nsresult>
|
||||
FFmpegAudioEncoder<LIBAV_VER>::GetExtraData(AVPacket* aPacket) {
|
||||
MOZ_ASSERT(aPacket);
|
||||
|
||||
// Create extra data.
|
||||
FFmpegAudioEncoder<LIBAV_VER>::GetExtraData(AVPacket* /* aPacket */) {
|
||||
if (!mCodecContext->extradata_size) {
|
||||
return Err(NS_ERROR_NOT_AVAILABLE);
|
||||
}
|
||||
// Create extra data -- they are on the context.
|
||||
auto extraData = MakeRefPtr<MediaByteBuffer>();
|
||||
extraData->SetLength(mCodecContext->extradata_size);
|
||||
MOZ_ASSERT(extraData);
|
||||
PodCopy(extraData->Elements(), mCodecContext->extradata,
|
||||
mCodecContext->extradata_size);
|
||||
return extraData.forget();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@
|
|||
|
||||
// This must be the last header included
|
||||
#include "FFmpegLibs.h"
|
||||
#include "speex/speex_resampler.h"
|
||||
|
||||
namespace mozilla {
|
||||
|
||||
|
|
@ -34,8 +35,6 @@ class FFmpegAudioEncoder<LIBAV_VER> : public FFmpegDataEncoder<LIBAV_VER> {
|
|||
virtual nsresult InitSpecific() override;
|
||||
#if LIBAVCODEC_VERSION_MAJOR >= 58
|
||||
Result<EncodedData, nsresult> EncodeOnePacket(Span<float> aSamples,
|
||||
uint32_t aChannels,
|
||||
uint32_t aRate,
|
||||
media::TimeUnit aPts);
|
||||
Result<EncodedData, nsresult> EncodeInputWithModernAPIs(
|
||||
RefPtr<const MediaData> aSample) override;
|
||||
|
|
@ -47,11 +46,20 @@ class FFmpegAudioEncoder<LIBAV_VER> : public FFmpegDataEncoder<LIBAV_VER> {
|
|||
AVPacket* aPacket) override;
|
||||
// Most audio codecs (except PCM) require a very specific frame size.
|
||||
Maybe<TimedPacketizer<float, float>> mPacketizer;
|
||||
// A temporary buffer kept around for shuffling audio frames, if needed.
|
||||
// A temporary buffer kept around for shuffling audio frames, resampling,
|
||||
// packetization, etc.
|
||||
nsTArray<float> mTempBuffer;
|
||||
// The pts of the first packet this encoder has seen, to be able to properly
|
||||
// mark encoder delay as such.
|
||||
media::TimeUnit mFirstPacketPts{media::TimeUnit::Invalid()};
|
||||
struct ResamplerDestroy {
|
||||
void operator()(SpeexResamplerState* aResampler);
|
||||
};
|
||||
// Rate at which this instance has been configured, which might be different
|
||||
// from the rate the underlying encoder is running at.
|
||||
int mInputSampleRate = 0;
|
||||
UniquePtr<SpeexResamplerState, ResamplerDestroy> mResampler;
|
||||
uint64_t mPacketsDelivered = 0;
|
||||
};
|
||||
|
||||
} // namespace mozilla
|
||||
|
|
|
|||
|
|
@ -464,9 +464,18 @@ AudioDecoderConfigInternal AudioEncoder::EncoderConfigToDecoderConfig(
|
|||
const AudioEncoderConfigInternal& aOutputConfig) const {
|
||||
MOZ_ASSERT(aOutputConfig.mSampleRate.isSome());
|
||||
MOZ_ASSERT(aOutputConfig.mNumberOfChannels.isSome());
|
||||
uint32_t sampleRate = aOutputConfig.mSampleRate.value();
|
||||
uint32_t channelCount = aOutputConfig.mNumberOfChannels.value();
|
||||
// Check if the encoder had to modify the settings because of codec
|
||||
// constraints. e.g. FFmpegAudioEncoder can encode any sample-rate, but if the
|
||||
// codec is Opus, then it will resample the audio one of the specific rates
|
||||
// supported by the encoder.
|
||||
if (aRawData->mConfig) {
|
||||
sampleRate = aRawData->mConfig->mSampleRate;
|
||||
channelCount = aRawData->mConfig->mNumberOfChannels;
|
||||
}
|
||||
return AudioDecoderConfigInternal(
|
||||
aOutputConfig.mCodec, aOutputConfig.mSampleRate.value(),
|
||||
aOutputConfig.mNumberOfChannels.value(),
|
||||
aOutputConfig.mCodec, sampleRate, channelCount,
|
||||
aRawData->mExtraData ? Some(aRawData->mExtraData) : Nothing());
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue