fune/dom/media/webcodecs/AudioDecoder.cpp

472 lines
15 KiB
C++

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim:set ts=2 sw=2 sts=2 et cindent: */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "mozilla/dom/AudioDecoder.h"
#include "mozilla/dom/AudioDecoderBinding.h"
#include "DecoderTraits.h"
#include "MediaContainerType.h"
#include "MediaData.h"
#include "VideoUtils.h"
#include "mozilla/Assertions.h"
#include "mozilla/Logging.h"
#include "mozilla/Maybe.h"
#include "mozilla/Try.h"
#include "mozilla/Unused.h"
#include "mozilla/dom/AudioDataBinding.h"
#include "mozilla/dom/EncodedAudioChunk.h"
#include "mozilla/dom/EncodedAudioChunkBinding.h"
#include "mozilla/dom/ImageUtils.h"
#include "mozilla/dom/Promise.h"
#include "mozilla/dom/WebCodecsUtils.h"
#include "nsPrintfCString.h"
#include "nsReadableUtils.h"
extern mozilla::LazyLogModule gWebCodecsLog;
namespace mozilla::dom {
#ifdef LOG_INTERNAL
# undef LOG_INTERNAL
#endif // LOG_INTERNAL
#define LOG_INTERNAL(level, msg, ...) \
MOZ_LOG(gWebCodecsLog, LogLevel::level, (msg, ##__VA_ARGS__))
#ifdef LOG
# undef LOG
#endif // LOG
#define LOG(msg, ...) LOG_INTERNAL(Debug, msg, ##__VA_ARGS__)
#ifdef LOGW
# undef LOGW
#endif // LOGW
#define LOGW(msg, ...) LOG_INTERNAL(Warning, msg, ##__VA_ARGS__)
#ifdef LOGE
# undef LOGE
#endif // LOGE
#define LOGE(msg, ...) LOG_INTERNAL(Error, msg, ##__VA_ARGS__)
#ifdef LOGV
# undef LOGV
#endif // LOGV
#define LOGV(msg, ...) LOG_INTERNAL(Verbose, msg, ##__VA_ARGS__)
NS_IMPL_CYCLE_COLLECTION_INHERITED(AudioDecoder, DOMEventTargetHelper,
mErrorCallback, mOutputCallback)
NS_IMPL_ADDREF_INHERITED(AudioDecoder, DOMEventTargetHelper)
NS_IMPL_RELEASE_INHERITED(AudioDecoder, DOMEventTargetHelper)
NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(AudioDecoder)
NS_INTERFACE_MAP_END_INHERITING(DOMEventTargetHelper)
/*
* Below are helper classes
*/
AudioDecoderConfigInternal::AudioDecoderConfigInternal(
const nsAString& aCodec, uint32_t aSampleRate, uint32_t aNumberOfChannels,
Maybe<RefPtr<MediaByteBuffer>>&& aDescription)
: mCodec(aCodec),
mSampleRate(aSampleRate),
mNumberOfChannels(aNumberOfChannels),
mDescription(std::move(aDescription)) {}
/*static*/
UniquePtr<AudioDecoderConfigInternal> AudioDecoderConfigInternal::Create(
const AudioDecoderConfig& aConfig) {
nsCString errorMessage;
if (!AudioDecoderTraits::Validate(aConfig, errorMessage)) {
LOGE("Failed to create AudioDecoderConfigInternal: %s", errorMessage.get());
return nullptr;
}
Maybe<RefPtr<MediaByteBuffer>> description;
if (aConfig.mDescription.WasPassed()) {
auto rv = GetExtraDataFromArrayBuffer(aConfig.mDescription.Value());
if (rv.isErr()) { // Invalid description data.
nsCString error;
GetErrorName(rv.unwrapErr(), error);
LOGE(
"Failed to create AudioDecoderConfigInternal due to invalid "
"description data. Error: %s",
error.get());
return nullptr;
}
description.emplace(rv.unwrap());
}
return UniquePtr<AudioDecoderConfigInternal>(new AudioDecoderConfigInternal(
aConfig.mCodec, aConfig.mSampleRate, aConfig.mNumberOfChannels,
std::move(description)));
}
/*
* The followings are helpers for AudioDecoder methods
*/
struct AudioMIMECreateParam {
explicit AudioMIMECreateParam(const AudioDecoderConfigInternal& aConfig)
: mParsedCodec(ParseCodecString(aConfig.mCodec).valueOr(EmptyString())) {}
explicit AudioMIMECreateParam(const AudioDecoderConfig& aConfig)
: mParsedCodec(ParseCodecString(aConfig.mCodec).valueOr(EmptyString())) {}
const nsString mParsedCodec;
};
// Map between WebCodecs pcm types as strings and codec numbers
// All other codecs
nsCString ConvertCodecName(const nsCString& aContainer,
const nsCString& aCodec) {
if (!aContainer.EqualsLiteral("x-wav")) {
return aCodec;
}
if (aCodec.EqualsLiteral("ulaw")) {
return nsCString("7");
}
if (aCodec.EqualsLiteral("alaw")) {
return nsCString("6");
}
if (aCodec.Find("f32")) {
return nsCString("3");
}
// Linear PCM
return nsCString("1");
}
static nsTArray<nsCString> GuessMIMETypes(const AudioMIMECreateParam& aParam) {
nsCString codec = NS_ConvertUTF16toUTF8(aParam.mParsedCodec);
nsTArray<nsCString> types;
for (const nsCString& container : GuessContainers(aParam.mParsedCodec)) {
codec = ConvertCodecName(container, codec);
nsPrintfCString mime("audio/%s; codecs=%s", container.get(), codec.get());
types.AppendElement(mime);
}
return types;
}
static bool IsSupportedAudioCodec(const nsAString& aCodec) {
LOG("IsSupportedAudioCodec: %s", NS_ConvertUTF16toUTF8(aCodec).get());
return aCodec.EqualsLiteral("flac") || aCodec.EqualsLiteral("mp3") ||
IsAACCodecString(aCodec) || aCodec.EqualsLiteral("opus") ||
aCodec.EqualsLiteral("ulaw") || aCodec.EqualsLiteral("alaw") ||
aCodec.EqualsLiteral("pcm-u8") || aCodec.EqualsLiteral("pcm-s16") ||
aCodec.EqualsLiteral("pcm-s24") || aCodec.EqualsLiteral("pcm-s32") ||
aCodec.EqualsLiteral("pcm-f32");
}
// https://w3c.github.io/webcodecs/#check-configuration-support
template <typename Config>
static bool CanDecodeAudio(const Config& aConfig) {
auto param = AudioMIMECreateParam(aConfig);
if (!IsSupportedAudioCodec(param.mParsedCodec)) {
return false;
}
if (IsOnAndroid() && IsAACCodecString(param.mParsedCodec)) {
return false;
}
// TODO: Instead of calling CanHandleContainerType with the guessed the
// containers, DecoderTraits should provide an API to tell if a codec is
// decodable or not.
for (const nsCString& mime : GuessMIMETypes(param)) {
if (Maybe<MediaContainerType> containerType =
MakeMediaExtendedMIMEType(mime)) {
if (DecoderTraits::CanHandleContainerType(
*containerType, nullptr /* DecoderDoctorDiagnostics */) !=
CANPLAY_NO) {
return true;
}
}
}
return false;
}
static nsTArray<UniquePtr<TrackInfo>> GetTracksInfo(
const AudioDecoderConfigInternal& aConfig) {
// TODO: Instead of calling GetTracksInfo with the guessed containers,
// DecoderTraits should provide an API to create the TrackInfo directly.
for (const nsCString& mime : GuessMIMETypes(AudioMIMECreateParam(aConfig))) {
if (Maybe<MediaContainerType> containerType =
MakeMediaExtendedMIMEType(mime)) {
if (nsTArray<UniquePtr<TrackInfo>> tracks =
DecoderTraits::GetTracksInfo(*containerType);
!tracks.IsEmpty()) {
return tracks;
}
}
}
return {};
}
static Result<Ok, nsresult> CloneConfiguration(
RootedDictionary<AudioDecoderConfig>& aDest, JSContext* aCx,
const AudioDecoderConfig& aConfig) {
aDest.mCodec = aConfig.mCodec;
if (aConfig.mDescription.WasPassed()) {
aDest.mDescription.Construct();
MOZ_TRY(CloneBuffer(aCx, aDest.mDescription.Value(),
aConfig.mDescription.Value()));
}
aDest.mNumberOfChannels = aConfig.mNumberOfChannels;
aDest.mSampleRate = aConfig.mSampleRate;
return Ok();
}
// https://w3c.github.io/webcodecs/#create-a-audiodata
static RefPtr<AudioData> CreateAudioData(nsIGlobalObject* aGlobalObject,
mozilla::AudioData* aData) {
MOZ_ASSERT(aGlobalObject);
MOZ_ASSERT(aData);
mozilla::dom::AudioDataInit init;
init.mFormat = mozilla::dom::AudioSampleFormat::F32;
init.mNumberOfChannels = aData->mChannels;
init.mSampleRate = AssertedCast<float>(aData->mRate);
init.mTimestamp = aData->mTime.ToMicroseconds();
auto buf = aData->MoveableData();
init.mNumberOfFrames = buf.Length() / init.mNumberOfChannels;
RefPtr<AudioDataResource> resource = AudioDataResource::Create(Span{
reinterpret_cast<uint8_t*>(buf.Data()), buf.Length() * sizeof(float)});
return MakeRefPtr<AudioData>(aGlobalObject, resource.forget(), init);
}
/* static */
bool AudioDecoderTraits::IsSupported(
const AudioDecoderConfigInternal& aConfig) {
return CanDecodeAudio(aConfig);
}
/* static */
Result<UniquePtr<TrackInfo>, nsresult> AudioDecoderTraits::CreateTrackInfo(
const AudioDecoderConfigInternal& aConfig) {
LOG("Create a AudioInfo from %s config",
NS_ConvertUTF16toUTF8(aConfig.mCodec).get());
nsTArray<UniquePtr<TrackInfo>> tracks = GetTracksInfo(aConfig);
if (tracks.Length() != 1 || tracks[0]->GetType() != TrackInfo::kAudioTrack) {
LOGE("Failed to get TrackInfo");
return Err(NS_ERROR_INVALID_ARG);
}
UniquePtr<TrackInfo> track(std::move(tracks[0]));
AudioInfo* ai = track->GetAsAudioInfo();
if (!ai) {
LOGE("Failed to get AudioInfo");
return Err(NS_ERROR_INVALID_ARG);
}
if (aConfig.mDescription.isSome()) {
RefPtr<MediaByteBuffer> buf;
buf = aConfig.mDescription.value();
if (buf) {
LOG("The given config has %zu bytes of description data", buf->Length());
ai->mCodecSpecificConfig =
AudioCodecSpecificVariant{AudioCodecSpecificBinaryBlob{buf}};
}
}
ai->mChannels = aConfig.mNumberOfChannels;
ai->mRate = aConfig.mSampleRate;
LOG("Created AudioInfo %s (%" PRIu32 "ch %" PRIu32
"Hz - with extra-data: %s)",
NS_ConvertUTF16toUTF8(aConfig.mCodec).get(), ai->mChannels, ai->mChannels,
aConfig.mDescription.isSome() ? "yes" : "no");
return track;
}
// https://w3c.github.io/webcodecs/#valid-audiodecoderconfig
/* static */
bool AudioDecoderTraits::Validate(const AudioDecoderConfig& aConfig,
nsCString& aErrorMessage) {
Maybe<nsString> codec = ParseCodecString(aConfig.mCodec);
if (!codec || codec->IsEmpty()) {
LOGE("Validating AudioDecoderConfig: invalid codec string");
aErrorMessage.AppendPrintf("Invalid codec string %s",
NS_ConvertUTF16toUTF8(aConfig.mCodec).get());
return false;
}
LOG("Validating AudioDecoderConfig: codec: %s %uch %uHz %s extradata",
NS_ConvertUTF16toUTF8(codec.value()).get(), aConfig.mNumberOfChannels,
aConfig.mSampleRate, aConfig.mDescription.WasPassed() ? "w/" : "no");
if (aConfig.mNumberOfChannels == 0) {
aErrorMessage.AppendPrintf("Invalid number of channels of %u",
aConfig.mNumberOfChannels);
return false;
}
if (aConfig.mSampleRate == 0) {
aErrorMessage.AppendPrintf("Invalid sample-rate of %u",
aConfig.mNumberOfChannels);
return false;
}
return true;
}
/* static */
UniquePtr<AudioDecoderConfigInternal> AudioDecoderTraits::CreateConfigInternal(
const AudioDecoderConfig& aConfig) {
return AudioDecoderConfigInternal::Create(aConfig);
}
/* static */
bool AudioDecoderTraits::IsKeyChunk(const EncodedAudioChunk& aInput) {
return aInput.Type() == EncodedAudioChunkType::Key;
}
/* static */
UniquePtr<EncodedAudioChunkData> AudioDecoderTraits::CreateInputInternal(
const EncodedAudioChunk& aInput) {
return aInput.Clone();
}
/*
* Below are AudioDecoder implementation
*/
AudioDecoder::AudioDecoder(nsIGlobalObject* aParent,
RefPtr<WebCodecsErrorCallback>&& aErrorCallback,
RefPtr<AudioDataOutputCallback>&& aOutputCallback)
: DecoderTemplate(aParent, std::move(aErrorCallback),
std::move(aOutputCallback)) {
MOZ_ASSERT(mErrorCallback);
MOZ_ASSERT(mOutputCallback);
LOG("AudioDecoder %p ctor", this);
}
AudioDecoder::~AudioDecoder() {
LOG("AudioDecoder %p dtor", this);
Unused << ResetInternal(NS_ERROR_DOM_ABORT_ERR);
}
JSObject* AudioDecoder::WrapObject(JSContext* aCx,
JS::Handle<JSObject*> aGivenProto) {
AssertIsOnOwningThread();
return AudioDecoder_Binding::Wrap(aCx, this, aGivenProto);
}
// https://w3c.github.io/webcodecs/#dom-audiodecoder-audiodecoder
/* static */
already_AddRefed<AudioDecoder> AudioDecoder::Constructor(
const GlobalObject& aGlobal, const AudioDecoderInit& aInit,
ErrorResult& aRv) {
nsCOMPtr<nsIGlobalObject> global = do_QueryInterface(aGlobal.GetAsSupports());
if (!global) {
aRv.Throw(NS_ERROR_FAILURE);
return nullptr;
}
return MakeAndAddRef<AudioDecoder>(
global.get(), RefPtr<WebCodecsErrorCallback>(aInit.mError),
RefPtr<AudioDataOutputCallback>(aInit.mOutput));
}
// https://w3c.github.io/webcodecs/#dom-audiodecoder-isconfigsupported
/* static */
already_AddRefed<Promise> AudioDecoder::IsConfigSupported(
const GlobalObject& aGlobal, const AudioDecoderConfig& aConfig,
ErrorResult& aRv) {
LOG("AudioDecoder::IsConfigSupported, config: %s",
NS_ConvertUTF16toUTF8(aConfig.mCodec).get());
nsCOMPtr<nsIGlobalObject> global = do_QueryInterface(aGlobal.GetAsSupports());
if (!global) {
aRv.Throw(NS_ERROR_FAILURE);
return nullptr;
}
RefPtr<Promise> p = Promise::Create(global.get(), aRv);
if (NS_WARN_IF(aRv.Failed())) {
return p.forget();
}
nsCString errorMessage;
if (!AudioDecoderTraits::Validate(aConfig, errorMessage)) {
p->MaybeRejectWithTypeError(errorMessage);
return p.forget();
}
// TODO: Move the following works to another thread to unblock the current
// thread, as what spec suggests.
RootedDictionary<AudioDecoderConfig> config(aGlobal.Context());
auto r = CloneConfiguration(config, aGlobal.Context(), aConfig);
if (r.isErr()) {
nsresult e = r.unwrapErr();
nsCString error;
GetErrorName(e, error);
LOGE("Failed to clone AudioDecoderConfig. Error: %s", error.get());
p->MaybeRejectWithTypeError("Failed to clone AudioDecoderConfig");
aRv.Throw(e);
return p.forget();
}
bool canDecode = CanDecodeAudio(config);
RootedDictionary<AudioDecoderSupport> s(aGlobal.Context());
s.mConfig.Construct(std::move(config));
s.mSupported.Construct(canDecode);
p->MaybeResolve(s);
return p.forget();
}
already_AddRefed<MediaRawData> AudioDecoder::InputDataToMediaRawData(
UniquePtr<EncodedAudioChunkData>&& aData, TrackInfo& aInfo,
const AudioDecoderConfigInternal& aConfig) {
AssertIsOnOwningThread();
MOZ_ASSERT(aInfo.GetAsAudioInfo());
if (!aData) {
LOGE("No data for conversion");
return nullptr;
}
RefPtr<MediaRawData> sample = aData->TakeData();
if (!sample) {
LOGE("Take no data for conversion");
return nullptr;
}
LOGV(
"EncodedAudioChunkData %p converted to %zu-byte MediaRawData - time: "
"%" PRIi64 "us, timecode: %" PRIi64 "us, duration: %" PRIi64
"us, key-frame: %s",
aData.get(), sample->Size(), sample->mTime.ToMicroseconds(),
sample->mTimecode.ToMicroseconds(), sample->mDuration.ToMicroseconds(),
sample->mKeyframe ? "yes" : "no");
return sample.forget();
}
nsTArray<RefPtr<AudioData>> AudioDecoder::DecodedDataToOutputType(
nsIGlobalObject* aGlobalObject, const nsTArray<RefPtr<MediaData>>&& aData,
AudioDecoderConfigInternal& aConfig) {
AssertIsOnOwningThread();
nsTArray<RefPtr<AudioData>> frames;
for (const RefPtr<MediaData>& data : aData) {
MOZ_RELEASE_ASSERT(data->mType == MediaData::Type::AUDIO_DATA);
RefPtr<mozilla::AudioData> d(data->As<mozilla::AudioData>());
frames.AppendElement(CreateAudioData(aGlobalObject, d.get()));
}
return frames;
}
#undef LOG
#undef LOGW
#undef LOGE
#undef LOGV
#undef LOG_INTERNAL
} // namespace mozilla::dom