forked from mirrors/gecko-dev
677 lines
24 KiB
C++
677 lines
24 KiB
C++
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
#include "AudioDecoderInputTrack.h"
|
|
|
|
#include "MediaData.h"
|
|
#include "mozilla/ScopeExit.h"
|
|
#include "mozilla/StaticPrefs_media.h"
|
|
#include "Tracing.h"
|
|
|
|
#include "RLBoxSoundTouch.h"
|
|
|
|
namespace mozilla {
|
|
|
|
extern LazyLogModule gMediaDecoderLog;
|
|
|
|
#define LOG(msg, ...) \
|
|
MOZ_LOG(gMediaDecoderLog, LogLevel::Debug, \
|
|
("AudioDecoderInputTrack=%p " msg, this, ##__VA_ARGS__))
|
|
|
|
#define LOG_M(msg, this, ...) \
|
|
MOZ_LOG(gMediaDecoderLog, LogLevel::Debug, \
|
|
("AudioDecoderInputTrack=%p " msg, this, ##__VA_ARGS__))
|
|
|
|
/* static */
|
|
AudioDecoderInputTrack* AudioDecoderInputTrack::Create(
|
|
MediaTrackGraph* aGraph, nsISerialEventTarget* aDecoderThread,
|
|
const AudioInfo& aInfo, float aPlaybackRate, float aVolume,
|
|
bool aPreservesPitch) {
|
|
MOZ_ASSERT(aGraph);
|
|
MOZ_ASSERT(aDecoderThread);
|
|
AudioDecoderInputTrack* track =
|
|
new AudioDecoderInputTrack(aDecoderThread, aGraph->GraphRate(), aInfo,
|
|
aPlaybackRate, aVolume, aPreservesPitch);
|
|
aGraph->AddTrack(track);
|
|
return track;
|
|
}
|
|
|
|
AudioDecoderInputTrack::AudioDecoderInputTrack(
|
|
nsISerialEventTarget* aDecoderThread, TrackRate aGraphRate,
|
|
const AudioInfo& aInfo, float aPlaybackRate, float aVolume,
|
|
bool aPreservesPitch)
|
|
: ProcessedMediaTrack(aGraphRate, MediaSegment::AUDIO, new AudioSegment()),
|
|
mDecoderThread(aDecoderThread),
|
|
mResamplerChannelCount(0),
|
|
mInitialInputChannels(aInfo.mChannels),
|
|
mInputSampleRate(aInfo.mRate),
|
|
mDelayedScheduler(mDecoderThread),
|
|
mPlaybackRate(aPlaybackRate),
|
|
mVolume(aVolume),
|
|
mPreservesPitch(aPreservesPitch) {}
|
|
|
|
bool AudioDecoderInputTrack::ConvertAudioDataToSegment(
|
|
AudioData* aAudio, AudioSegment& aSegment,
|
|
const PrincipalHandle& aPrincipalHandle) {
|
|
AssertOnDecoderThread();
|
|
MOZ_ASSERT(aAudio);
|
|
MOZ_ASSERT(aSegment.IsEmpty());
|
|
if (!aAudio->Frames()) {
|
|
LOG("Ignore audio with zero frame");
|
|
return false;
|
|
}
|
|
|
|
aAudio->EnsureAudioBuffer();
|
|
RefPtr<SharedBuffer> buffer = aAudio->mAudioBuffer;
|
|
AudioDataValue* bufferData = static_cast<AudioDataValue*>(buffer->Data());
|
|
AutoTArray<const AudioDataValue*, 2> channels;
|
|
for (uint32_t i = 0; i < aAudio->mChannels; ++i) {
|
|
channels.AppendElement(bufferData + i * aAudio->Frames());
|
|
}
|
|
aSegment.AppendFrames(buffer.forget(), channels, aAudio->Frames(),
|
|
aPrincipalHandle);
|
|
const TrackRate newInputRate = static_cast<TrackRate>(aAudio->mRate);
|
|
if (newInputRate != mInputSampleRate) {
|
|
LOG("Input sample rate changed %u -> %u", mInputSampleRate, newInputRate);
|
|
mInputSampleRate = newInputRate;
|
|
mResampler.own(nullptr);
|
|
mResamplerChannelCount = 0;
|
|
}
|
|
if (mInputSampleRate != Graph()->GraphRate()) {
|
|
aSegment.ResampleChunks(mResampler, &mResamplerChannelCount,
|
|
mInputSampleRate, Graph()->GraphRate());
|
|
}
|
|
return aSegment.GetDuration() > 0;
|
|
}
|
|
|
|
void AudioDecoderInputTrack::AppendData(
|
|
AudioData* aAudio, const PrincipalHandle& aPrincipalHandle) {
|
|
AssertOnDecoderThread();
|
|
MOZ_ASSERT(aAudio);
|
|
nsTArray<RefPtr<AudioData>> audio;
|
|
audio.AppendElement(aAudio);
|
|
AppendData(audio, aPrincipalHandle);
|
|
}
|
|
|
|
void AudioDecoderInputTrack::AppendData(
|
|
nsTArray<RefPtr<AudioData>>& aAudioArray,
|
|
const PrincipalHandle& aPrincipalHandle) {
|
|
AssertOnDecoderThread();
|
|
MOZ_ASSERT(!mShutdownSPSCQueue);
|
|
|
|
// Batching all new data together in order to push them as a single unit that
|
|
// gives the SPSC queue more spaces.
|
|
for (const auto& audio : aAudioArray) {
|
|
BatchData(audio, aPrincipalHandle);
|
|
}
|
|
|
|
// If SPSC queue doesn't have much available capacity now, we would push
|
|
// batched later.
|
|
if (ShouldBatchData()) {
|
|
return;
|
|
}
|
|
PushBatchedDataIfNeeded();
|
|
}
|
|
|
|
bool AudioDecoderInputTrack::ShouldBatchData() const {
|
|
AssertOnDecoderThread();
|
|
// If the SPSC queue has less available capacity than the threshold, then all
|
|
// input audio data should be batched together, in order not to increase the
|
|
// pressure of SPSC queue.
|
|
static const int kThresholdNumerator = 3;
|
|
static const int kThresholdDenominator = 10;
|
|
return mSPSCQueue.AvailableWrite() <
|
|
mSPSCQueue.Capacity() * kThresholdNumerator / kThresholdDenominator;
|
|
}
|
|
|
|
bool AudioDecoderInputTrack::HasBatchedData() const {
|
|
AssertOnDecoderThread();
|
|
return !mBatchedData.mSegment.IsEmpty();
|
|
}
|
|
|
|
void AudioDecoderInputTrack::BatchData(
|
|
AudioData* aAudio, const PrincipalHandle& aPrincipalHandle) {
|
|
AssertOnDecoderThread();
|
|
AudioSegment segment;
|
|
if (!ConvertAudioDataToSegment(aAudio, segment, aPrincipalHandle)) {
|
|
return;
|
|
}
|
|
mBatchedData.mSegment.AppendFrom(&segment);
|
|
if (!mBatchedData.mStartTime.IsValid()) {
|
|
mBatchedData.mStartTime = aAudio->mTime;
|
|
}
|
|
mBatchedData.mEndTime = aAudio->GetEndTime();
|
|
LOG("batched data [%" PRId64 ":%" PRId64 "] sz=%" PRId64,
|
|
aAudio->mTime.ToMicroseconds(), aAudio->GetEndTime().ToMicroseconds(),
|
|
mBatchedData.mSegment.GetDuration());
|
|
DispatchPushBatchedDataIfNeeded();
|
|
}
|
|
|
|
void AudioDecoderInputTrack::DispatchPushBatchedDataIfNeeded() {
|
|
AssertOnDecoderThread();
|
|
MOZ_ASSERT(!mShutdownSPSCQueue);
|
|
// The graph thread runs iteration around per 2~10ms. Doing this to ensure
|
|
// that we can keep consuming data. If the producer stops pushing new data
|
|
// due to MDSM stops decoding, which is because MDSM thinks the data stored
|
|
// in the audio queue are enough. The way to remove those data from the
|
|
// audio queue is driven by us, so we have to keep consuming data.
|
|
// Otherwise, we would get stuck because those batched data would never be
|
|
// consumed.
|
|
static const uint8_t kTimeoutMS = 10;
|
|
TimeStamp target =
|
|
TimeStamp::Now() + TimeDuration::FromMilliseconds(kTimeoutMS);
|
|
mDelayedScheduler.Ensure(
|
|
target,
|
|
[self = RefPtr<AudioDecoderInputTrack>(this), this]() {
|
|
LOG("In the task of DispatchPushBatchedDataIfNeeded");
|
|
mDelayedScheduler.CompleteRequest();
|
|
MOZ_ASSERT(!mShutdownSPSCQueue);
|
|
MOZ_ASSERT(HasBatchedData());
|
|
// The capacity in SPSC is still not enough, so we can't push data now.
|
|
// Retrigger another task to push batched data.
|
|
if (ShouldBatchData()) {
|
|
DispatchPushBatchedDataIfNeeded();
|
|
return;
|
|
}
|
|
PushBatchedDataIfNeeded();
|
|
},
|
|
[]() { MOZ_DIAGNOSTIC_ASSERT(false); });
|
|
}
|
|
|
|
void AudioDecoderInputTrack::PushBatchedDataIfNeeded() {
|
|
AssertOnDecoderThread();
|
|
if (!HasBatchedData()) {
|
|
return;
|
|
}
|
|
LOG("Append batched data [%" PRId64 ":%" PRId64 "], available SPSC sz=%u",
|
|
mBatchedData.mStartTime.ToMicroseconds(),
|
|
mBatchedData.mEndTime.ToMicroseconds(), mSPSCQueue.AvailableWrite());
|
|
SPSCData data({SPSCData::DecodedData(std::move(mBatchedData))});
|
|
PushDataToSPSCQueue(data);
|
|
MOZ_ASSERT(mBatchedData.mSegment.IsEmpty());
|
|
// No batched data remains, we can cancel the pending tasks.
|
|
mDelayedScheduler.Reset();
|
|
}
|
|
|
|
void AudioDecoderInputTrack::NotifyEndOfStream() {
|
|
AssertOnDecoderThread();
|
|
// Force to push all data before EOS. Otherwise, the track would be ended too
|
|
// early without sending all data.
|
|
PushBatchedDataIfNeeded();
|
|
SPSCData data({SPSCData::EOS()});
|
|
LOG("Set EOS, available SPSC sz=%u", mSPSCQueue.AvailableWrite());
|
|
PushDataToSPSCQueue(data);
|
|
}
|
|
|
|
void AudioDecoderInputTrack::ClearFutureData() {
|
|
AssertOnDecoderThread();
|
|
// Clear the data hasn't been pushed to SPSC queue yet.
|
|
mBatchedData.Clear();
|
|
mDelayedScheduler.Reset();
|
|
SPSCData data({SPSCData::ClearFutureData()});
|
|
LOG("Set clear future data, available SPSC sz=%u",
|
|
mSPSCQueue.AvailableWrite());
|
|
PushDataToSPSCQueue(data);
|
|
}
|
|
|
|
void AudioDecoderInputTrack::PushDataToSPSCQueue(SPSCData& data) {
|
|
AssertOnDecoderThread();
|
|
const bool rv = mSPSCQueue.Enqueue(data);
|
|
MOZ_DIAGNOSTIC_ASSERT(rv, "Failed to push data, SPSC queue is full!");
|
|
Unused << rv;
|
|
}
|
|
|
|
void AudioDecoderInputTrack::SetVolume(float aVolume) {
|
|
AssertOnDecoderThread();
|
|
LOG("Set volume=%f", aVolume);
|
|
GetMainThreadSerialEventTarget()->Dispatch(
|
|
NS_NewRunnableFunction("AudioDecoderInputTrack::SetVolume",
|
|
[self = RefPtr<AudioDecoderInputTrack>(this),
|
|
aVolume] { self->SetVolumeImpl(aVolume); }));
|
|
}
|
|
|
|
void AudioDecoderInputTrack::SetVolumeImpl(float aVolume) {
|
|
MOZ_ASSERT(NS_IsMainThread());
|
|
QueueControlMessageWithNoShutdown([self = RefPtr{this}, this, aVolume] {
|
|
TRACE_COMMENT("AudioDecoderInputTrack::SetVolume ControlMessage", "%f",
|
|
aVolume);
|
|
LOG_M("Apply volume=%f", this, aVolume);
|
|
mVolume = aVolume;
|
|
});
|
|
}
|
|
|
|
void AudioDecoderInputTrack::SetPlaybackRate(float aPlaybackRate) {
|
|
AssertOnDecoderThread();
|
|
LOG("Set playback rate=%f", aPlaybackRate);
|
|
GetMainThreadSerialEventTarget()->Dispatch(NS_NewRunnableFunction(
|
|
"AudioDecoderInputTrack::SetPlaybackRate",
|
|
[self = RefPtr<AudioDecoderInputTrack>(this), aPlaybackRate] {
|
|
self->SetPlaybackRateImpl(aPlaybackRate);
|
|
}));
|
|
}
|
|
|
|
void AudioDecoderInputTrack::SetPlaybackRateImpl(float aPlaybackRate) {
|
|
MOZ_ASSERT(NS_IsMainThread());
|
|
QueueControlMessageWithNoShutdown([self = RefPtr{this}, this, aPlaybackRate] {
|
|
TRACE_COMMENT("AudioDecoderInputTrack::SetPlaybackRate ControlMessage",
|
|
"%f", aPlaybackRate);
|
|
LOG_M("Apply playback rate=%f", this, aPlaybackRate);
|
|
mPlaybackRate = aPlaybackRate;
|
|
SetTempoAndRateForTimeStretcher();
|
|
});
|
|
}
|
|
|
|
void AudioDecoderInputTrack::SetPreservesPitch(bool aPreservesPitch) {
|
|
AssertOnDecoderThread();
|
|
LOG("Set preserves pitch=%d", aPreservesPitch);
|
|
GetMainThreadSerialEventTarget()->Dispatch(NS_NewRunnableFunction(
|
|
"AudioDecoderInputTrack::SetPreservesPitch",
|
|
[self = RefPtr<AudioDecoderInputTrack>(this), aPreservesPitch] {
|
|
self->SetPreservesPitchImpl(aPreservesPitch);
|
|
}));
|
|
}
|
|
|
|
void AudioDecoderInputTrack::SetPreservesPitchImpl(bool aPreservesPitch) {
|
|
MOZ_ASSERT(NS_IsMainThread());
|
|
QueueControlMessageWithNoShutdown(
|
|
[self = RefPtr{this}, this, aPreservesPitch] {
|
|
TRACE_COMMENT("AudioDecoderInputTrack::SetPreservesPitch", "%s",
|
|
aPreservesPitch ? "true" : "false");
|
|
LOG_M("Apply preserves pitch=%d", this, aPreservesPitch);
|
|
mPreservesPitch = aPreservesPitch;
|
|
SetTempoAndRateForTimeStretcher();
|
|
});
|
|
}
|
|
|
|
void AudioDecoderInputTrack::Close() {
|
|
AssertOnDecoderThread();
|
|
LOG("Close");
|
|
mShutdownSPSCQueue = true;
|
|
mBatchedData.Clear();
|
|
mDelayedScheduler.Reset();
|
|
}
|
|
|
|
void AudioDecoderInputTrack::DestroyImpl() {
|
|
LOG("DestroyImpl");
|
|
AssertOnGraphThreadOrNotRunning();
|
|
mBufferedData.Clear();
|
|
if (mTimeStretcher) {
|
|
delete mTimeStretcher;
|
|
mTimeStretcher = nullptr;
|
|
}
|
|
ProcessedMediaTrack::DestroyImpl();
|
|
}
|
|
|
|
AudioDecoderInputTrack::~AudioDecoderInputTrack() {
|
|
MOZ_ASSERT(mBatchedData.mSegment.IsEmpty());
|
|
MOZ_ASSERT(mShutdownSPSCQueue);
|
|
mResampler.own(nullptr);
|
|
}
|
|
|
|
void AudioDecoderInputTrack::ProcessInput(GraphTime aFrom, GraphTime aTo,
|
|
uint32_t aFlags) {
|
|
AssertOnGraphThread();
|
|
if (Ended()) {
|
|
return;
|
|
}
|
|
|
|
TrackTime consumedDuration = 0;
|
|
auto notify = MakeScopeExit([this, &consumedDuration] {
|
|
NotifyInTheEndOfProcessInput(consumedDuration);
|
|
});
|
|
|
|
if (mSentAllData && (aFlags & ALLOW_END)) {
|
|
LOG("End track");
|
|
mEnded = true;
|
|
return;
|
|
}
|
|
|
|
const TrackTime expectedDuration = aTo - aFrom;
|
|
LOG("ProcessInput [%" PRId64 " to %" PRId64 "], duration=%" PRId64, aFrom,
|
|
aTo, expectedDuration);
|
|
|
|
// Drain all data from SPSC queue first, because we want that the SPSC queue
|
|
// always has capacity of accepting data from the producer. In addition, we
|
|
// also need to check if there is any control related data that should be
|
|
// applied to output segment, eg. `ClearFutureData`.
|
|
SPSCData data;
|
|
while (mSPSCQueue.Dequeue(&data, 1) > 0) {
|
|
HandleSPSCData(data);
|
|
}
|
|
|
|
consumedDuration += AppendBufferedDataToOutput(expectedDuration);
|
|
if (HasSentAllData()) {
|
|
LOG("Sent all data, should end track in next iteration");
|
|
mSentAllData = true;
|
|
}
|
|
}
|
|
|
|
void AudioDecoderInputTrack::HandleSPSCData(SPSCData& aData) {
|
|
AssertOnGraphThread();
|
|
if (aData.IsDecodedData()) {
|
|
MOZ_ASSERT(!mReceivedEOS);
|
|
AudioSegment& segment = aData.AsDecodedData()->mSegment;
|
|
LOG("popped out data [%" PRId64 ":%" PRId64 "] sz=%" PRId64,
|
|
aData.AsDecodedData()->mStartTime.ToMicroseconds(),
|
|
aData.AsDecodedData()->mEndTime.ToMicroseconds(),
|
|
segment.GetDuration());
|
|
mBufferedData.AppendFrom(&segment);
|
|
return;
|
|
}
|
|
if (aData.IsEOS()) {
|
|
MOZ_ASSERT(!Ended());
|
|
LOG("Received EOS");
|
|
mReceivedEOS = true;
|
|
return;
|
|
}
|
|
if (aData.IsClearFutureData()) {
|
|
LOG("Clear future data");
|
|
mBufferedData.Clear();
|
|
if (!Ended()) {
|
|
LOG("Clear EOS");
|
|
mReceivedEOS = false;
|
|
}
|
|
return;
|
|
}
|
|
MOZ_ASSERT_UNREACHABLE("unsupported SPSC data");
|
|
}
|
|
|
|
TrackTime AudioDecoderInputTrack::AppendBufferedDataToOutput(
|
|
TrackTime aExpectedDuration) {
|
|
AssertOnGraphThread();
|
|
|
|
// Remove the necessary part from `mBufferedData` to create a new
|
|
// segment in order to apply some operation without affecting all data.
|
|
AudioSegment outputSegment;
|
|
TrackTime consumedDuration = 0;
|
|
if (mPlaybackRate != 1.0) {
|
|
consumedDuration =
|
|
AppendTimeStretchedDataToSegment(aExpectedDuration, outputSegment);
|
|
} else {
|
|
consumedDuration =
|
|
AppendUnstretchedDataToSegment(aExpectedDuration, outputSegment);
|
|
}
|
|
|
|
// Apply any necessary change on the segement which would be outputed to the
|
|
// graph.
|
|
const TrackTime appendedDuration = outputSegment.GetDuration();
|
|
outputSegment.ApplyVolume(mVolume);
|
|
ApplyTrackDisabling(&outputSegment);
|
|
mSegment->AppendFrom(&outputSegment);
|
|
|
|
unsigned int numSamples = 0;
|
|
if (mTimeStretcher) {
|
|
numSamples = mTimeStretcher->numSamples().unverified_safe_because(
|
|
"Only used for logging.");
|
|
}
|
|
|
|
LOG("Appended %" PRId64 ", consumed %" PRId64
|
|
", remaining raw buffered %" PRId64 ", remaining time-stretched %u",
|
|
appendedDuration, consumedDuration, mBufferedData.GetDuration(),
|
|
numSamples);
|
|
if (auto gap = aExpectedDuration - appendedDuration; gap > 0) {
|
|
LOG("Audio underrun, fill silence %" PRId64, gap);
|
|
MOZ_ASSERT(mBufferedData.IsEmpty());
|
|
mSegment->AppendNullData(gap);
|
|
}
|
|
return consumedDuration;
|
|
}
|
|
|
|
TrackTime AudioDecoderInputTrack::AppendTimeStretchedDataToSegment(
|
|
TrackTime aExpectedDuration, AudioSegment& aOutput) {
|
|
AssertOnGraphThread();
|
|
EnsureTimeStretcher();
|
|
|
|
MOZ_ASSERT(mPlaybackRate != 1.0f);
|
|
MOZ_ASSERT(aExpectedDuration >= 0);
|
|
MOZ_ASSERT(mTimeStretcher);
|
|
MOZ_ASSERT(aOutput.IsEmpty());
|
|
|
|
// If we don't have enough data that have been time-stretched, fill raw data
|
|
// into the time stretcher until the amount of samples that time stretcher
|
|
// finishes processed reaches or exceeds the expected duration.
|
|
TrackTime consumedDuration = 0;
|
|
mTimeStretcher->numSamples().copy_and_verify([&](auto numSamples) {
|
|
// Attacker controlled soundtouch can return a bogus numSamples, which
|
|
// can result in filling data into the time stretcher (or not). This is
|
|
// safe as long as filling (and getting) data is checked.
|
|
if (numSamples < aExpectedDuration) {
|
|
consumedDuration = FillDataToTimeStretcher(aExpectedDuration);
|
|
}
|
|
});
|
|
MOZ_ASSERT(consumedDuration >= 0);
|
|
Unused << GetDataFromTimeStretcher(aExpectedDuration, aOutput);
|
|
return consumedDuration;
|
|
}
|
|
|
|
TrackTime AudioDecoderInputTrack::FillDataToTimeStretcher(
|
|
TrackTime aExpectedDuration) {
|
|
AssertOnGraphThread();
|
|
MOZ_ASSERT(mPlaybackRate != 1.0f);
|
|
MOZ_ASSERT(aExpectedDuration >= 0);
|
|
MOZ_ASSERT(mTimeStretcher);
|
|
|
|
TrackTime consumedDuration = 0;
|
|
const uint32_t channels = GetChannelCountForTimeStretcher();
|
|
mBufferedData.IterateOnChunks([&](AudioChunk* aChunk) {
|
|
MOZ_ASSERT(aChunk);
|
|
if (aChunk->IsNull() && aChunk->GetDuration() == 0) {
|
|
// Skip this chunk and wait for next one.
|
|
return false;
|
|
}
|
|
const uint32_t bufferLength = channels * aChunk->GetDuration();
|
|
if (bufferLength > mInterleavedBuffer.Capacity()) {
|
|
mInterleavedBuffer.SetCapacity(bufferLength);
|
|
}
|
|
mInterleavedBuffer.SetLengthAndRetainStorage(bufferLength);
|
|
if (aChunk->IsNull()) {
|
|
MOZ_ASSERT(aChunk->GetDuration(), "chunk with only silence");
|
|
memset(mInterleavedBuffer.Elements(), 0, mInterleavedBuffer.Length());
|
|
} else {
|
|
// Do the up-mix/down-mix first if necessary that forces to change the
|
|
// data's channel count to the time stretcher's channel count. Then
|
|
// perform a transformation from planar to interleaved.
|
|
switch (aChunk->mBufferFormat) {
|
|
case AUDIO_FORMAT_S16:
|
|
WriteChunk<int16_t>(*aChunk, channels, 1.0f,
|
|
mInterleavedBuffer.Elements());
|
|
break;
|
|
case AUDIO_FORMAT_FLOAT32:
|
|
WriteChunk<float>(*aChunk, channels, 1.0f,
|
|
mInterleavedBuffer.Elements());
|
|
break;
|
|
default:
|
|
MOZ_ASSERT_UNREACHABLE("Not expected format");
|
|
}
|
|
}
|
|
mTimeStretcher->putSamples(mInterleavedBuffer.Elements(),
|
|
aChunk->GetDuration());
|
|
consumedDuration += aChunk->GetDuration();
|
|
return mTimeStretcher->numSamples().copy_and_verify(
|
|
[&aExpectedDuration](auto numSamples) {
|
|
// Attacker controlled soundtouch can return a bogus numSamples to
|
|
// return early or force additional iterations. This is safe
|
|
// as long as all the writes in the lambda are checked.
|
|
return numSamples >= aExpectedDuration;
|
|
});
|
|
});
|
|
mBufferedData.RemoveLeading(consumedDuration);
|
|
return consumedDuration;
|
|
}
|
|
|
|
TrackTime AudioDecoderInputTrack::AppendUnstretchedDataToSegment(
|
|
TrackTime aExpectedDuration, AudioSegment& aOutput) {
|
|
AssertOnGraphThread();
|
|
MOZ_ASSERT(mPlaybackRate == 1.0f);
|
|
MOZ_ASSERT(aExpectedDuration >= 0);
|
|
MOZ_ASSERT(aOutput.IsEmpty());
|
|
|
|
const TrackTime drained =
|
|
DrainStretchedDataIfNeeded(aExpectedDuration, aOutput);
|
|
const TrackTime available =
|
|
std::min(aExpectedDuration - drained, mBufferedData.GetDuration());
|
|
aOutput.AppendSlice(mBufferedData, 0, available);
|
|
MOZ_ASSERT(aOutput.GetDuration() <= aExpectedDuration);
|
|
mBufferedData.RemoveLeading(available);
|
|
return available;
|
|
}
|
|
|
|
TrackTime AudioDecoderInputTrack::DrainStretchedDataIfNeeded(
|
|
TrackTime aExpectedDuration, AudioSegment& aOutput) {
|
|
AssertOnGraphThread();
|
|
MOZ_ASSERT(mPlaybackRate == 1.0f);
|
|
MOZ_ASSERT(aExpectedDuration >= 0);
|
|
|
|
if (!mTimeStretcher) {
|
|
return 0;
|
|
}
|
|
auto numSamples = mTimeStretcher->numSamples().unverified_safe_because(
|
|
"Bogus numSamples can result in draining the stretched data (or not).");
|
|
if (numSamples == 0) {
|
|
return 0;
|
|
}
|
|
return GetDataFromTimeStretcher(aExpectedDuration, aOutput);
|
|
}
|
|
|
|
TrackTime AudioDecoderInputTrack::GetDataFromTimeStretcher(
|
|
TrackTime aExpectedDuration, AudioSegment& aOutput) {
|
|
AssertOnGraphThread();
|
|
MOZ_ASSERT(mTimeStretcher);
|
|
MOZ_ASSERT(aExpectedDuration >= 0);
|
|
|
|
auto numSamples =
|
|
mTimeStretcher->numSamples().unverified_safe_because("Used for logging");
|
|
|
|
mTimeStretcher->numUnprocessedSamples().copy_and_verify([&](auto samples) {
|
|
if (HasSentAllData() && samples) {
|
|
mTimeStretcher->flush();
|
|
LOG("Flush %u frames from the time stretcher", numSamples);
|
|
}
|
|
});
|
|
|
|
// Flushing may have change the number of samples
|
|
numSamples = mTimeStretcher->numSamples().unverified_safe_because(
|
|
"Used to decide to flush (or not), which is checked.");
|
|
|
|
const TrackTime available =
|
|
std::min((TrackTime)numSamples, aExpectedDuration);
|
|
if (available == 0) {
|
|
// Either running out of stretched data, or the raw data we filled into
|
|
// the time stretcher were not enough for producing stretched data.
|
|
return 0;
|
|
}
|
|
|
|
// Retrieve interleaved data from the time stretcher.
|
|
const uint32_t channelCount = GetChannelCountForTimeStretcher();
|
|
const uint32_t bufferLength = channelCount * available;
|
|
if (bufferLength > mInterleavedBuffer.Capacity()) {
|
|
mInterleavedBuffer.SetCapacity(bufferLength);
|
|
}
|
|
mInterleavedBuffer.SetLengthAndRetainStorage(bufferLength);
|
|
mTimeStretcher->receiveSamples(mInterleavedBuffer.Elements(), available);
|
|
|
|
// Perform a transformation from interleaved to planar.
|
|
CheckedInt<size_t> bufferSize(sizeof(AudioDataValue));
|
|
bufferSize *= bufferLength;
|
|
RefPtr<SharedBuffer> buffer = SharedBuffer::Create(bufferSize);
|
|
AudioDataValue* bufferData = static_cast<AudioDataValue*>(buffer->Data());
|
|
AutoTArray<AudioDataValue*, 2> planarBuffer;
|
|
planarBuffer.SetLength(channelCount);
|
|
for (size_t idx = 0; idx < channelCount; idx++) {
|
|
planarBuffer[idx] = bufferData + idx * available;
|
|
}
|
|
DeinterleaveAndConvertBuffer(mInterleavedBuffer.Elements(), available,
|
|
channelCount, planarBuffer.Elements());
|
|
AutoTArray<const AudioDataValue*, 2> outputChannels;
|
|
outputChannels.AppendElements(planarBuffer);
|
|
aOutput.AppendFrames(buffer.forget(), outputChannels,
|
|
static_cast<int32_t>(available),
|
|
mBufferedData.GetOldestPrinciple());
|
|
return available;
|
|
}
|
|
|
|
void AudioDecoderInputTrack::NotifyInTheEndOfProcessInput(
|
|
TrackTime aFillDuration) {
|
|
AssertOnGraphThread();
|
|
mWrittenFrames += aFillDuration;
|
|
LOG("Notify, fill=%" PRId64 ", total written=%" PRId64 ", ended=%d",
|
|
aFillDuration, mWrittenFrames, Ended());
|
|
if (aFillDuration > 0) {
|
|
mOnOutput.Notify(mWrittenFrames);
|
|
}
|
|
if (Ended()) {
|
|
mOnEnd.Notify();
|
|
}
|
|
}
|
|
|
|
bool AudioDecoderInputTrack::HasSentAllData() const {
|
|
AssertOnGraphThread();
|
|
return mReceivedEOS && mSPSCQueue.AvailableRead() == 0 &&
|
|
mBufferedData.IsEmpty();
|
|
}
|
|
|
|
uint32_t AudioDecoderInputTrack::NumberOfChannels() const {
|
|
AssertOnGraphThread();
|
|
const uint32_t maxChannelCount = GetData<AudioSegment>()->MaxChannelCount();
|
|
return maxChannelCount ? maxChannelCount : mInitialInputChannels;
|
|
}
|
|
|
|
void AudioDecoderInputTrack::EnsureTimeStretcher() {
|
|
AssertOnGraphThread();
|
|
if (!mTimeStretcher) {
|
|
mTimeStretcher = new RLBoxSoundTouch();
|
|
MOZ_RELEASE_ASSERT(mTimeStretcher);
|
|
MOZ_RELEASE_ASSERT(mTimeStretcher->Init());
|
|
|
|
mTimeStretcher->setSampleRate(Graph()->GraphRate());
|
|
mTimeStretcher->setChannels(GetChannelCountForTimeStretcher());
|
|
mTimeStretcher->setPitch(1.0);
|
|
|
|
// SoundTouch v2.1.2 uses automatic time-stretch settings with the following
|
|
// values:
|
|
// Tempo 0.5: 90ms sequence, 20ms seekwindow, 8ms overlap
|
|
// Tempo 2.0: 40ms sequence, 15ms seekwindow, 8ms overlap
|
|
// We are going to use a smaller 10ms sequence size to improve speech
|
|
// clarity, giving more resolution at high tempo and less reverb at low
|
|
// tempo. Maintain 15ms seekwindow and 8ms overlap for smoothness.
|
|
mTimeStretcher->setSetting(
|
|
SETTING_SEQUENCE_MS,
|
|
StaticPrefs::media_audio_playbackrate_soundtouch_sequence_ms());
|
|
mTimeStretcher->setSetting(
|
|
SETTING_SEEKWINDOW_MS,
|
|
StaticPrefs::media_audio_playbackrate_soundtouch_seekwindow_ms());
|
|
mTimeStretcher->setSetting(
|
|
SETTING_OVERLAP_MS,
|
|
StaticPrefs::media_audio_playbackrate_soundtouch_overlap_ms());
|
|
SetTempoAndRateForTimeStretcher();
|
|
LOG("Create TimeStretcher (channel=%d, playbackRate=%f, preservePitch=%d)",
|
|
GetChannelCountForTimeStretcher(), mPlaybackRate, mPreservesPitch);
|
|
}
|
|
}
|
|
|
|
void AudioDecoderInputTrack::SetTempoAndRateForTimeStretcher() {
|
|
AssertOnGraphThread();
|
|
if (!mTimeStretcher) {
|
|
return;
|
|
}
|
|
if (mPreservesPitch) {
|
|
mTimeStretcher->setTempo(mPlaybackRate);
|
|
mTimeStretcher->setRate(1.0f);
|
|
} else {
|
|
mTimeStretcher->setTempo(1.0f);
|
|
mTimeStretcher->setRate(mPlaybackRate);
|
|
}
|
|
}
|
|
|
|
uint32_t AudioDecoderInputTrack::GetChannelCountForTimeStretcher() const {
|
|
// The time stretcher MUST be initialized with a fixed channel count, but the
|
|
// channel count in audio chunks might vary. Therefore, we always use the
|
|
// initial input channel count to initialize the time stretcher and perform a
|
|
// real-time down-mix/up-mix for audio chunks which have different channel
|
|
// count than the initial input channel count.
|
|
return mInitialInputChannels;
|
|
}
|
|
|
|
#undef LOG
|
|
} // namespace mozilla
|