tts_mac.mm 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345
  1. // Copyright (c) 2012 The Chromium Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style license that can be
  3. // found in the LICENSE file.
  4. #include <string>
  5. #include "base/mac/scoped_nsobject.h"
  6. #include "base/memory/singleton.h"
  7. #include "base/strings/sys_string_conversions.h"
  8. #include "base/values.h"
  9. #include "chrome/browser/speech/tts_controller.h"
  10. #include "chrome/browser/speech/tts_platform.h"
  11. #import <Cocoa/Cocoa.h>
  12. class TtsPlatformImplMac;
  13. @interface ChromeTtsDelegate : NSObject <NSSpeechSynthesizerDelegate> {
  14. @private
  15. TtsPlatformImplMac* ttsImplMac_; // weak.
  16. }
  17. - (id)initWithPlatformImplMac:(TtsPlatformImplMac*)ttsImplMac;
  18. @end
  19. // Subclass of NSSpeechSynthesizer that takes an utterance
  20. // string on initialization, retains it and only allows it
  21. // to be spoken once.
  22. //
  23. // We construct a new NSSpeechSynthesizer for each utterance, for
  24. // two reasons:
  25. // 1. To associate delegate callbacks with a particular utterance,
  26. // without assuming anything undocumented about the protocol.
  27. // 2. To work around http://openradar.appspot.com/radar?id=2854403,
  28. // where Nuance voices don't retain the utterance string and
  29. // crash when trying to call willSpeakWord.
  30. @interface SingleUseSpeechSynthesizer : NSSpeechSynthesizer {
  31. @private
  32. base::scoped_nsobject<NSString> utterance_;
  33. bool didSpeak_;
  34. }
  35. - (id)initWithUtterance:(NSString*)utterance;
  36. - (bool)startSpeakingRetainedUtterance;
  37. - (bool)startSpeakingString:(NSString*)utterance;
  38. @end
  39. class TtsPlatformImplMac : public TtsPlatformImpl {
  40. public:
  41. bool PlatformImplAvailable() override { return true; }
  42. bool Speak(int utterance_id,
  43. const std::string& utterance,
  44. const std::string& lang,
  45. const VoiceData& voice,
  46. const UtteranceContinuousParameters& params) override;
  47. bool StopSpeaking() override;
  48. void Pause() override;
  49. void Resume() override;
  50. bool IsSpeaking() override;
  51. void GetVoices(std::vector<VoiceData>* out_voices) override;
  52. // Called by ChromeTtsDelegate when we get a callback from the
  53. // native speech engine.
  54. void OnSpeechEvent(NSSpeechSynthesizer* sender,
  55. TtsEventType event_type,
  56. int char_index,
  57. const std::string& error_message);
  58. // Get the single instance of this class.
  59. static TtsPlatformImplMac* GetInstance();
  60. private:
  61. TtsPlatformImplMac();
  62. ~TtsPlatformImplMac() override;
  63. base::scoped_nsobject<SingleUseSpeechSynthesizer> speech_synthesizer_;
  64. base::scoped_nsobject<ChromeTtsDelegate> delegate_;
  65. int utterance_id_;
  66. std::string utterance_;
  67. int last_char_index_;
  68. bool paused_;
  69. friend struct base::DefaultSingletonTraits<TtsPlatformImplMac>;
  70. DISALLOW_COPY_AND_ASSIGN(TtsPlatformImplMac);
  71. };
  72. // static
  73. TtsPlatformImpl* TtsPlatformImpl::GetInstance() {
  74. return TtsPlatformImplMac::GetInstance();
  75. }
  76. bool TtsPlatformImplMac::Speak(int utterance_id,
  77. const std::string& utterance,
  78. const std::string& lang,
  79. const VoiceData& voice,
  80. const UtteranceContinuousParameters& params) {
  81. // TODO: convert SSML to SAPI xml. http://crbug.com/88072
  82. utterance_ = utterance;
  83. paused_ = false;
  84. NSString* utterance_nsstring =
  85. [NSString stringWithUTF8String:utterance_.c_str()];
  86. // Deliberately construct a new speech synthesizer every time Speak is
  87. // called, otherwise there's no way to know whether calls to the delegate
  88. // apply to the current utterance or a previous utterance. In
  89. // experimentation, the overhead of constructing and destructing a
  90. // NSSpeechSynthesizer is minimal.
  91. speech_synthesizer_.reset([[SingleUseSpeechSynthesizer alloc]
  92. initWithUtterance:utterance_nsstring]);
  93. [speech_synthesizer_ setDelegate:delegate_];
  94. if (!voice.native_voice_identifier.empty()) {
  95. NSString* native_voice_identifier =
  96. [NSString stringWithUTF8String:voice.native_voice_identifier.c_str()];
  97. [speech_synthesizer_ setVoice:native_voice_identifier];
  98. }
  99. utterance_id_ = utterance_id;
  100. // TODO: support languages other than the default: crbug.com/88059
  101. if (params.rate >= 0.0) {
  102. // The TTS api defines rate via words per minute. Let 200 be the default.
  103. [speech_synthesizer_ setObject:[NSNumber numberWithInt:params.rate * 200]
  104. forProperty:NSSpeechRateProperty
  105. error:nil];
  106. }
  107. if (params.pitch >= 0.0) {
  108. // The input is a float from 0.0 to 2.0, with 1.0 being the default.
  109. // Get the default pitch for this voice and modulate it by 50% - 150%.
  110. NSError* errorCode;
  111. NSNumber* defaultPitchObj =
  112. [speech_synthesizer_ objectForProperty:NSSpeechPitchBaseProperty
  113. error:&errorCode];
  114. int defaultPitch = defaultPitchObj ? [defaultPitchObj intValue] : 48;
  115. int newPitch = static_cast<int>(defaultPitch * (0.5 * params.pitch + 0.5));
  116. [speech_synthesizer_ setObject:[NSNumber numberWithInt:newPitch]
  117. forProperty:NSSpeechPitchBaseProperty
  118. error:nil];
  119. }
  120. if (params.volume >= 0.0) {
  121. [speech_synthesizer_ setObject:[NSNumber numberWithFloat:params.volume]
  122. forProperty:NSSpeechVolumeProperty
  123. error:nil];
  124. }
  125. bool success = [speech_synthesizer_ startSpeakingRetainedUtterance];
  126. if (success) {
  127. TtsController* controller = TtsController::GetInstance();
  128. controller->OnTtsEvent(utterance_id_, TTS_EVENT_START, 0, "");
  129. }
  130. return success;
  131. }
  132. bool TtsPlatformImplMac::StopSpeaking() {
  133. if (speech_synthesizer_.get()) {
  134. [speech_synthesizer_ stopSpeaking];
  135. speech_synthesizer_.reset(nil);
  136. }
  137. paused_ = false;
  138. return true;
  139. }
  140. void TtsPlatformImplMac::Pause() {
  141. if (speech_synthesizer_.get() && utterance_id_ && !paused_) {
  142. [speech_synthesizer_ pauseSpeakingAtBoundary:NSSpeechImmediateBoundary];
  143. paused_ = true;
  144. TtsController::GetInstance()->OnTtsEvent(utterance_id_, TTS_EVENT_PAUSE,
  145. last_char_index_, "");
  146. }
  147. }
  148. void TtsPlatformImplMac::Resume() {
  149. if (speech_synthesizer_.get() && utterance_id_ && paused_) {
  150. [speech_synthesizer_ continueSpeaking];
  151. paused_ = false;
  152. TtsController::GetInstance()->OnTtsEvent(utterance_id_, TTS_EVENT_RESUME,
  153. last_char_index_, "");
  154. }
  155. }
  156. bool TtsPlatformImplMac::IsSpeaking() {
  157. if (speech_synthesizer_)
  158. return [speech_synthesizer_ isSpeaking];
  159. return false;
  160. }
  161. void TtsPlatformImplMac::GetVoices(std::vector<VoiceData>* outVoices) {
  162. NSArray* voices = [NSSpeechSynthesizer availableVoices];
  163. // Create a new temporary array of the available voices with
  164. // the default voice first.
  165. NSMutableArray* orderedVoices =
  166. [NSMutableArray arrayWithCapacity:[voices count]];
  167. NSString* defaultVoice = [NSSpeechSynthesizer defaultVoice];
  168. if (defaultVoice) {
  169. [orderedVoices addObject:defaultVoice];
  170. }
  171. for (NSString* voiceIdentifier in voices) {
  172. if (![voiceIdentifier isEqualToString:defaultVoice])
  173. [orderedVoices addObject:voiceIdentifier];
  174. }
  175. for (NSString* voiceIdentifier in orderedVoices) {
  176. outVoices->push_back(VoiceData());
  177. VoiceData& data = outVoices->back();
  178. NSDictionary* attributes =
  179. [NSSpeechSynthesizer attributesForVoice:voiceIdentifier];
  180. NSString* name = [attributes objectForKey:NSVoiceName];
  181. NSString* gender = [attributes objectForKey:NSVoiceGender];
  182. NSString* localeIdentifier =
  183. [attributes objectForKey:NSVoiceLocaleIdentifier];
  184. data.native = true;
  185. data.native_voice_identifier = base::SysNSStringToUTF8(voiceIdentifier);
  186. data.name = base::SysNSStringToUTF8(name);
  187. NSDictionary* localeComponents =
  188. [NSLocale componentsFromLocaleIdentifier:localeIdentifier];
  189. NSString* language = [localeComponents objectForKey:NSLocaleLanguageCode];
  190. NSString* country = [localeComponents objectForKey:NSLocaleCountryCode];
  191. if (language && country) {
  192. data.lang =
  193. [[NSString stringWithFormat:@"%@-%@", language, country] UTF8String];
  194. } else {
  195. data.lang = base::SysNSStringToUTF8(language);
  196. }
  197. if ([gender isEqualToString:NSVoiceGenderMale])
  198. data.gender = TTS_GENDER_MALE;
  199. else if ([gender isEqualToString:NSVoiceGenderFemale])
  200. data.gender = TTS_GENDER_FEMALE;
  201. else
  202. data.gender = TTS_GENDER_NONE;
  203. data.events.insert(TTS_EVENT_START);
  204. data.events.insert(TTS_EVENT_END);
  205. data.events.insert(TTS_EVENT_WORD);
  206. data.events.insert(TTS_EVENT_ERROR);
  207. data.events.insert(TTS_EVENT_CANCELLED);
  208. data.events.insert(TTS_EVENT_INTERRUPTED);
  209. data.events.insert(TTS_EVENT_PAUSE);
  210. data.events.insert(TTS_EVENT_RESUME);
  211. }
  212. }
  213. void TtsPlatformImplMac::OnSpeechEvent(NSSpeechSynthesizer* sender,
  214. TtsEventType event_type,
  215. int char_index,
  216. const std::string& error_message) {
  217. // Don't send events from an utterance that's already completed.
  218. // This depends on the fact that we construct a new NSSpeechSynthesizer
  219. // each time we call Speak.
  220. if (sender != speech_synthesizer_.get())
  221. return;
  222. if (event_type == TTS_EVENT_END)
  223. char_index = utterance_.size();
  224. TtsController* controller = TtsController::GetInstance();
  225. controller->OnTtsEvent(utterance_id_, event_type, char_index, error_message);
  226. last_char_index_ = char_index;
  227. }
  228. TtsPlatformImplMac::TtsPlatformImplMac() {
  229. utterance_id_ = -1;
  230. paused_ = false;
  231. delegate_.reset([[ChromeTtsDelegate alloc] initWithPlatformImplMac:this]);
  232. }
  233. TtsPlatformImplMac::~TtsPlatformImplMac() {}
  234. // static
  235. TtsPlatformImplMac* TtsPlatformImplMac::GetInstance() {
  236. return base::Singleton<TtsPlatformImplMac>::get();
  237. }
  238. @implementation ChromeTtsDelegate
  239. - (id)initWithPlatformImplMac:(TtsPlatformImplMac*)ttsImplMac {
  240. if ((self = [super init])) {
  241. ttsImplMac_ = ttsImplMac;
  242. }
  243. return self;
  244. }
  245. - (void)speechSynthesizer:(NSSpeechSynthesizer*)sender
  246. didFinishSpeaking:(BOOL)finished_speaking {
  247. ttsImplMac_->OnSpeechEvent(sender, TTS_EVENT_END, 0, "");
  248. }
  249. - (void)speechSynthesizer:(NSSpeechSynthesizer*)sender
  250. willSpeakWord:(NSRange)character_range
  251. ofString:(NSString*)string {
  252. ttsImplMac_->OnSpeechEvent(sender, TTS_EVENT_WORD, character_range.location,
  253. "");
  254. }
  255. - (void)speechSynthesizer:(NSSpeechSynthesizer*)sender
  256. didEncounterErrorAtIndex:(NSUInteger)character_index
  257. ofString:(NSString*)string
  258. message:(NSString*)message {
  259. std::string message_utf8 = base::SysNSStringToUTF8(message);
  260. ttsImplMac_->OnSpeechEvent(sender, TTS_EVENT_ERROR, character_index,
  261. message_utf8);
  262. }
  263. @end
  264. @implementation SingleUseSpeechSynthesizer
  265. - (id)initWithUtterance:(NSString*)utterance {
  266. self = [super init];
  267. if (self) {
  268. utterance_.reset([utterance retain]);
  269. didSpeak_ = false;
  270. }
  271. return self;
  272. }
  273. - (bool)startSpeakingRetainedUtterance {
  274. CHECK(!didSpeak_);
  275. CHECK(utterance_);
  276. didSpeak_ = true;
  277. return [super startSpeakingString:utterance_];
  278. }
  279. - (bool)startSpeakingString:(NSString*)utterance {
  280. CHECK(false);
  281. return false;
  282. }
  283. @end