fune/dom/media/webspeech/synth/cocoa/OSXSpeechSynthesizerService.mm
Mike Hommey 9331b9fb07 Bug 1851529 - Set clang-format ColumnLimit consistently. r=sylvestre,necko-reviewers,kershaw
For some reason, its value in the Google style we use is 80... except
for Objective-C, where it's 100, which led to things like:
https://hg.mozilla.org/mozilla-central/rev/31bf68247e6e
https://hg.mozilla.org/mozilla-central/rev/64ceb33533a4.

There's probably a discussion to have about whether 80 is the right
limit, but since it's what's used for everything except ObjC, let's roll
with it.

# ignore-this-changeset

Differential Revision: https://phabricator.services.mozilla.com/D187409
2023-09-05 11:23:01 +00:00

461 lines
14 KiB
Text

/* -*- Mode: Objective-C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset:
* 2 -*- */
/* vim: set ts=2 sw=2 et tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "nsISupports.h"
#include "nsServiceManagerUtils.h"
#include "nsObjCExceptions.h"
#include "nsCocoaUtils.h"
#include "nsIThread.h"
#include "nsThreadUtils.h"
#include "nsXULAppAPI.h"
#include "mozilla/ClearOnShutdown.h"
#include "mozilla/dom/nsSynthVoiceRegistry.h"
#include "mozilla/dom/nsSpeechTask.h"
#include "mozilla/Preferences.h"
#include "mozilla/StaticPrefs_media.h"
#include "mozilla/Assertions.h"
#include "OSXSpeechSynthesizerService.h"
#import <Cocoa/Cocoa.h>
@class SpeechDelegate;
// We can escape the default delimiters ("[[" and "]]") by temporarily
// changing the delimiters just before they appear, and changing them back
// just after.
#define DLIM_ESCAPE_START "[[dlim (( ))]]"
#define DLIM_ESCAPE_END "((dlim [[ ]]))"
using namespace mozilla;
class SpeechTaskCallback final : public nsISpeechTaskCallback {
public:
SpeechTaskCallback(nsISpeechTask* aTask, NSSpeechSynthesizer* aSynth,
const nsTArray<size_t>& aOffsets);
NS_DECL_CYCLE_COLLECTING_ISUPPORTS
NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(SpeechTaskCallback,
nsISpeechTaskCallback)
NS_DECL_NSISPEECHTASKCALLBACK
void OnWillSpeakWord(uint32_t aIndex, uint32_t aLength);
void OnError(uint32_t aIndex);
void OnDidFinishSpeaking();
private:
virtual ~SpeechTaskCallback();
float GetTimeDurationFromStart();
nsCOMPtr<nsISpeechTask> mTask;
NSSpeechSynthesizer* mSpeechSynthesizer;
SpeechDelegate* mDelegate;
TimeStamp mStartingTime;
uint32_t mCurrentIndex;
nsTArray<size_t> mOffsets;
};
@interface SpeechDelegate : NSObject <NSSpeechSynthesizerDelegate> {
@private
SpeechTaskCallback* mCallback;
}
- (id)initWithCallback:(SpeechTaskCallback*)aCallback;
@end
@implementation SpeechDelegate
- (id)initWithCallback:(SpeechTaskCallback*)aCallback {
[super init];
mCallback = aCallback;
return self;
}
- (void)speechSynthesizer:(NSSpeechSynthesizer*)aSender
willSpeakWord:(NSRange)aRange
ofString:(NSString*)aString {
mCallback->OnWillSpeakWord(aRange.location, aRange.length);
}
- (void)speechSynthesizer:(NSSpeechSynthesizer*)aSender
didFinishSpeaking:(BOOL)aFinishedSpeaking {
mCallback->OnDidFinishSpeaking();
}
- (void)speechSynthesizer:(NSSpeechSynthesizer*)aSender
didEncounterErrorAtIndex:(NSUInteger)aCharacterIndex
ofString:(NSString*)aString
message:(NSString*)aMessage {
mCallback->OnError(aCharacterIndex);
}
@end
NS_IMPL_CYCLE_COLLECTION(SpeechTaskCallback, mTask);
NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechTaskCallback)
NS_INTERFACE_MAP_ENTRY(nsISpeechTaskCallback)
NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechTaskCallback)
NS_INTERFACE_MAP_END
NS_IMPL_CYCLE_COLLECTING_ADDREF(SpeechTaskCallback)
NS_IMPL_CYCLE_COLLECTING_RELEASE(SpeechTaskCallback)
SpeechTaskCallback::SpeechTaskCallback(nsISpeechTask* aTask,
NSSpeechSynthesizer* aSynth,
const nsTArray<size_t>& aOffsets)
: mTask(aTask),
mSpeechSynthesizer(aSynth),
mCurrentIndex(0),
mOffsets(aOffsets.Clone()) {
mDelegate = [[SpeechDelegate alloc] initWithCallback:this];
[mSpeechSynthesizer setDelegate:mDelegate];
mStartingTime = TimeStamp::Now();
}
SpeechTaskCallback::~SpeechTaskCallback() {
[mSpeechSynthesizer setDelegate:nil];
[mDelegate release];
[mSpeechSynthesizer release];
}
NS_IMETHODIMP
SpeechTaskCallback::OnCancel() {
NS_OBJC_BEGIN_TRY_BLOCK_RETURN;
[mSpeechSynthesizer stopSpeaking];
return NS_OK;
NS_OBJC_END_TRY_BLOCK_RETURN(NS_ERROR_FAILURE);
}
NS_IMETHODIMP
SpeechTaskCallback::OnPause() {
NS_OBJC_BEGIN_TRY_BLOCK_RETURN;
[mSpeechSynthesizer pauseSpeakingAtBoundary:NSSpeechImmediateBoundary];
if (!mTask) {
// When calling pause() on child porcess, it may not receive end event
// from chrome process yet.
return NS_ERROR_FAILURE;
}
mTask->DispatchPause(GetTimeDurationFromStart(), mCurrentIndex);
return NS_OK;
NS_OBJC_END_TRY_BLOCK_RETURN(NS_ERROR_FAILURE);
}
NS_IMETHODIMP
SpeechTaskCallback::OnResume() {
NS_OBJC_BEGIN_TRY_BLOCK_RETURN;
[mSpeechSynthesizer continueSpeaking];
if (!mTask) {
// When calling resume() on child porcess, it may not receive end event
// from chrome process yet.
return NS_ERROR_FAILURE;
}
mTask->DispatchResume(GetTimeDurationFromStart(), mCurrentIndex);
return NS_OK;
NS_OBJC_END_TRY_BLOCK_RETURN(NS_ERROR_FAILURE);
}
NS_IMETHODIMP
SpeechTaskCallback::OnVolumeChanged(float aVolume) {
NS_OBJC_BEGIN_TRY_BLOCK_RETURN;
[mSpeechSynthesizer setObject:[NSNumber numberWithFloat:aVolume]
forProperty:NSSpeechVolumeProperty
error:nil];
return NS_OK;
NS_OBJC_END_TRY_BLOCK_RETURN(NS_ERROR_FAILURE);
}
float SpeechTaskCallback::GetTimeDurationFromStart() {
TimeDuration duration = TimeStamp::Now() - mStartingTime;
return duration.ToSeconds();
}
void SpeechTaskCallback::OnWillSpeakWord(uint32_t aIndex, uint32_t aLength) {
mCurrentIndex = aIndex < mOffsets.Length() ? mOffsets[aIndex] : mCurrentIndex;
if (!mTask) {
return;
}
mTask->DispatchBoundary(u"word"_ns, GetTimeDurationFromStart(), mCurrentIndex,
aLength, 1);
}
void SpeechTaskCallback::OnError(uint32_t aIndex) {
if (!mTask) {
return;
}
mTask->DispatchError(GetTimeDurationFromStart(), aIndex);
}
void SpeechTaskCallback::OnDidFinishSpeaking() {
mTask->DispatchEnd(GetTimeDurationFromStart(), mCurrentIndex);
// no longer needed
[mSpeechSynthesizer setDelegate:nil];
mTask = nullptr;
}
namespace mozilla {
namespace dom {
struct OSXVoice {
OSXVoice() : mIsDefault(false) {}
nsString mUri;
nsString mName;
nsString mLocale;
bool mIsDefault;
};
class RegisterVoicesRunnable final : public Runnable {
public:
RegisterVoicesRunnable(OSXSpeechSynthesizerService* aSpeechService,
nsTArray<OSXVoice>& aList)
: Runnable("RegisterVoicesRunnable"),
mSpeechService(aSpeechService),
mVoices(aList) {}
NS_IMETHOD Run() override;
private:
~RegisterVoicesRunnable() override = default;
// This runnable always use sync mode. It is unnecesarry to reference object
OSXSpeechSynthesizerService* mSpeechService;
nsTArray<OSXVoice>& mVoices;
};
NS_IMETHODIMP
RegisterVoicesRunnable::Run() {
nsresult rv;
nsCOMPtr<nsISynthVoiceRegistry> registry =
do_GetService(NS_SYNTHVOICEREGISTRY_CONTRACTID, &rv);
if (!registry) {
return rv;
}
for (OSXVoice voice : mVoices) {
rv = registry->AddVoice(mSpeechService, voice.mUri, voice.mName,
voice.mLocale, true, false);
if (NS_WARN_IF(NS_FAILED(rv))) {
continue;
}
if (voice.mIsDefault) {
registry->SetDefaultVoice(voice.mUri, true);
}
}
registry->NotifyVoicesChanged();
return NS_OK;
}
class EnumVoicesRunnable final : public Runnable {
public:
explicit EnumVoicesRunnable(OSXSpeechSynthesizerService* aSpeechService)
: Runnable("EnumVoicesRunnable"), mSpeechService(aSpeechService) {}
NS_IMETHOD Run() override;
private:
~EnumVoicesRunnable() override = default;
RefPtr<OSXSpeechSynthesizerService> mSpeechService;
};
NS_IMETHODIMP
EnumVoicesRunnable::Run() {
NS_OBJC_BEGIN_TRY_BLOCK_RETURN;
AutoTArray<OSXVoice, 64> list;
NSArray* voices = [NSSpeechSynthesizer availableVoices];
NSString* defaultVoice = [NSSpeechSynthesizer defaultVoice];
for (NSString* voice in voices) {
OSXVoice item;
NSDictionary* attr = [NSSpeechSynthesizer attributesForVoice:voice];
nsAutoString identifier;
nsCocoaUtils::GetStringForNSString([attr objectForKey:NSVoiceIdentifier],
identifier);
nsCocoaUtils::GetStringForNSString([attr objectForKey:NSVoiceName],
item.mName);
nsCocoaUtils::GetStringForNSString(
[attr objectForKey:NSVoiceLocaleIdentifier], item.mLocale);
item.mLocale.ReplaceChar('_', '-');
item.mUri.AssignLiteral("urn:moz-tts:osx:");
item.mUri.Append(identifier);
if ([voice isEqualToString:defaultVoice]) {
item.mIsDefault = true;
}
list.AppendElement(item);
}
RefPtr<RegisterVoicesRunnable> runnable =
new RegisterVoicesRunnable(mSpeechService, list);
NS_DispatchAndSpinEventLoopUntilComplete("EnumVoicesRunnable"_ns,
GetMainThreadSerialEventTarget(),
runnable.forget());
return NS_OK;
NS_OBJC_END_TRY_BLOCK_RETURN(NS_ERROR_FAILURE);
}
StaticRefPtr<OSXSpeechSynthesizerService>
OSXSpeechSynthesizerService::sSingleton;
NS_INTERFACE_MAP_BEGIN(OSXSpeechSynthesizerService)
NS_INTERFACE_MAP_ENTRY(nsISpeechService)
NS_INTERFACE_MAP_ENTRY(nsIObserver)
NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechService)
NS_INTERFACE_MAP_END
NS_IMPL_ADDREF(OSXSpeechSynthesizerService)
NS_IMPL_RELEASE(OSXSpeechSynthesizerService)
OSXSpeechSynthesizerService::OSXSpeechSynthesizerService()
: mInitialized(false) {}
bool OSXSpeechSynthesizerService::Init() {
if (Preferences::GetBool("media.webspeech.synth.test") ||
!StaticPrefs::media_webspeech_synth_enabled()) {
// When test is enabled, we shouldn't add OS backend (Bug 1160844)
return false;
}
nsCOMPtr<nsIThread> thread;
if (NS_FAILED(NS_NewNamedThread("SpeechWorker", getter_AddRefs(thread)))) {
return false;
}
// Get all the voices and register in the SynthVoiceRegistry
nsCOMPtr<nsIRunnable> runnable = new EnumVoicesRunnable(this);
thread->Dispatch(runnable, NS_DISPATCH_NORMAL);
mInitialized = true;
return true;
}
NS_IMETHODIMP
OSXSpeechSynthesizerService::Speak(const nsAString& aText,
const nsAString& aUri, float aVolume,
float aRate, float aPitch,
nsISpeechTask* aTask) {
NS_OBJC_BEGIN_TRY_BLOCK_RETURN;
MOZ_ASSERT(StringBeginsWith(aUri, u"urn:moz-tts:osx:"_ns),
"OSXSpeechSynthesizerService doesn't allow this voice URI");
NSSpeechSynthesizer* synth = [[NSSpeechSynthesizer alloc] init];
// strlen("urn:moz-tts:osx:") == 16
NSString* identifier = nsCocoaUtils::ToNSString(Substring(aUri, 16));
[synth setVoice:identifier];
// default rate is 180-220
[synth setObject:[NSNumber numberWithInt:aRate * 200]
forProperty:NSSpeechRateProperty
error:nil];
// volume allows 0.0-1.0
[synth setObject:[NSNumber numberWithFloat:aVolume]
forProperty:NSSpeechVolumeProperty
error:nil];
// Use default pitch value to calculate this
NSNumber* defaultPitch = [synth objectForProperty:NSSpeechPitchBaseProperty
error:nil];
if (defaultPitch) {
int newPitch = [defaultPitch intValue] * (aPitch / 2 + 0.5);
[synth setObject:[NSNumber numberWithInt:newPitch]
forProperty:NSSpeechPitchBaseProperty
error:nil];
}
nsAutoString escapedText;
// We need to map the the offsets from the given text to the escaped text.
// The index of the offsets array is the position in the escaped text,
// the element value is the position in the user-supplied text.
nsTArray<size_t> offsets;
offsets.SetCapacity(aText.Length());
// This loop looks for occurances of "[[" or "]]", escapes them, and
// populates the offsets array to supply a map to the original offsets.
for (size_t i = 0; i < aText.Length(); i++) {
if (aText.Length() > i + 1 && ((aText[i] == ']' && aText[i + 1] == ']') ||
(aText[i] == '[' && aText[i + 1] == '['))) {
escapedText.AppendLiteral(DLIM_ESCAPE_START);
offsets.AppendElements(strlen(DLIM_ESCAPE_START));
escapedText.Append(aText[i]);
offsets.AppendElement(i);
escapedText.Append(aText[++i]);
offsets.AppendElement(i);
escapedText.AppendLiteral(DLIM_ESCAPE_END);
offsets.AppendElements(strlen(DLIM_ESCAPE_END));
} else {
escapedText.Append(aText[i]);
offsets.AppendElement(i);
}
}
RefPtr<SpeechTaskCallback> callback =
new SpeechTaskCallback(aTask, synth, offsets);
nsresult rv = aTask->Setup(callback);
NS_ENSURE_SUCCESS(rv, rv);
NSString* text = nsCocoaUtils::ToNSString(escapedText);
BOOL success = [synth startSpeakingString:text];
NS_ENSURE_TRUE(success, NS_ERROR_FAILURE);
aTask->DispatchStart();
return NS_OK;
NS_OBJC_END_TRY_BLOCK_RETURN(NS_ERROR_FAILURE);
}
NS_IMETHODIMP
OSXSpeechSynthesizerService::Observe(nsISupports* aSubject, const char* aTopic,
const char16_t* aData) {
return NS_OK;
}
OSXSpeechSynthesizerService* OSXSpeechSynthesizerService::GetInstance() {
MOZ_ASSERT(NS_IsMainThread());
if (XRE_GetProcessType() != GeckoProcessType_Default) {
return nullptr;
}
if (!sSingleton) {
RefPtr<OSXSpeechSynthesizerService> speechService =
new OSXSpeechSynthesizerService();
if (speechService->Init()) {
sSingleton = speechService;
ClearOnShutdown(&sSingleton);
}
}
return sSingleton;
}
already_AddRefed<OSXSpeechSynthesizerService>
OSXSpeechSynthesizerService::GetInstanceForService() {
RefPtr<OSXSpeechSynthesizerService> speechService = GetInstance();
return speechService.forget();
}
} // namespace dom
} // namespace mozilla