tts_controller_impl.cc 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448
  1. // Copyright 2014 The Chromium Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style license that can be
  3. // found in the LICENSE file.
  4. #include "chrome/browser/speech/tts_controller_impl.h"
  5. #include <string>
  6. #include <vector>
  7. #include "base/values.h"
  8. #include "chrome/browser/browser_process.h"
  9. #include "chrome/browser/speech/tts_platform.h"
  10. namespace {
  11. // A value to be used to indicate that there is no char index available.
  12. const int kInvalidCharIndex = -1;
  13. // Given a language/region code of the form 'fr-FR', returns just the basic
  14. // language portion, e.g. 'fr'.
  15. std::string TrimLanguageCode(std::string lang) {
  16. if (lang.size() >= 5 && lang[2] == '-')
  17. return lang.substr(0, 2);
  18. else
  19. return lang;
  20. }
  21. } // namespace
  22. bool IsFinalTtsEventType(TtsEventType event_type) {
  23. return (event_type == TTS_EVENT_END || event_type == TTS_EVENT_INTERRUPTED ||
  24. event_type == TTS_EVENT_CANCELLED || event_type == TTS_EVENT_ERROR);
  25. }
  26. //
  27. // UtteranceContinuousParameters
  28. //
  29. UtteranceContinuousParameters::UtteranceContinuousParameters()
  30. : rate(-1), pitch(-1), volume(-1) {}
  31. //
  32. // VoiceData
  33. //
  34. VoiceData::VoiceData()
  35. : gender(TTS_GENDER_NONE), remote(false), native(false) {}
  36. VoiceData::VoiceData(const VoiceData&) = default;
  37. VoiceData::~VoiceData() = default;
  38. //
  39. // Utterance
  40. //
  41. // static
  42. int Utterance::next_utterance_id_ = 0;
  43. Utterance::Utterance(content::BrowserContext* browser_context)
  44. : browser_context_(browser_context),
  45. id_(next_utterance_id_++),
  46. src_id_(-1),
  47. gender_(TTS_GENDER_NONE),
  48. can_enqueue_(false),
  49. char_index_(0),
  50. finished_(false) {
  51. options_.reset(new base::DictionaryValue());
  52. }
  53. Utterance::~Utterance() {
  54. DCHECK(finished_);
  55. }
  56. void Utterance::OnTtsEvent(TtsEventType event_type,
  57. int char_index,
  58. const std::string& error_message) {
  59. if (char_index >= 0)
  60. char_index_ = char_index;
  61. if (IsFinalTtsEventType(event_type))
  62. finished_ = true;
  63. if (event_delegate_)
  64. event_delegate_->OnTtsEvent(this, event_type, char_index, error_message);
  65. if (finished_)
  66. event_delegate_.reset();
  67. }
  68. void Utterance::Finish() {
  69. finished_ = true;
  70. }
  71. void Utterance::set_options(const base::Value* options) {
  72. options_.reset(options->DeepCopy());
  73. }
  74. TtsController* TtsController::GetInstance() {
  75. return TtsControllerImpl::GetInstance();
  76. }
  77. //
  78. // TtsControllerImpl
  79. //
  80. // static
  81. TtsControllerImpl* TtsControllerImpl::GetInstance() {
  82. return base::Singleton<TtsControllerImpl>::get();
  83. }
  84. TtsControllerImpl::TtsControllerImpl()
  85. : current_utterance_(NULL),
  86. paused_(false),
  87. platform_impl_(NULL),
  88. tts_engine_delegate_(NULL) {}
  89. TtsControllerImpl::~TtsControllerImpl() {
  90. if (current_utterance_) {
  91. current_utterance_->Finish();
  92. delete current_utterance_;
  93. }
  94. // Clear any queued utterances too.
  95. ClearUtteranceQueue(false); // Don't sent events.
  96. }
  97. void TtsControllerImpl::SpeakOrEnqueue(Utterance* utterance) {
  98. // If we're paused and we get an utterance that can't be queued,
  99. // flush the queue but stay in the paused state.
  100. if (paused_ && !utterance->can_enqueue()) {
  101. Stop();
  102. paused_ = true;
  103. delete utterance;
  104. return;
  105. }
  106. if (paused_ || (IsSpeaking() && utterance->can_enqueue())) {
  107. utterance_queue_.push(utterance);
  108. } else {
  109. Stop();
  110. SpeakNow(utterance);
  111. }
  112. }
  113. void TtsControllerImpl::SpeakNow(Utterance* utterance) {
  114. // Ensure we have all built-in voices loaded. This is a no-op if already
  115. // loaded.
  116. bool loaded_built_in =
  117. GetPlatformImpl()->LoadBuiltInTtsExtension(utterance->browser_context());
  118. // Get all available voices and try to find a matching voice.
  119. std::vector<VoiceData> voices;
  120. GetVoices(utterance->browser_context(), &voices);
  121. int index = GetMatchingVoice(utterance, voices);
  122. VoiceData voice;
  123. if (index != -1) {
  124. // Select the matching voice.
  125. voice = voices[index];
  126. } else {
  127. // However, if no match was found on a platform without native tts voices,
  128. // attempt to get a voice based only on the current locale without respect
  129. // to any supplied voice names.
  130. std::vector<VoiceData> native_voices;
  131. if (GetPlatformImpl()->PlatformImplAvailable())
  132. GetPlatformImpl()->GetVoices(&native_voices);
  133. if (native_voices.empty() && !voices.empty()) {
  134. // TODO(dtseng): Notify extension caller of an error.
  135. utterance->set_voice_name("");
  136. // TODO(gaochun): Replace the global variable g_browser_process with
  137. // GetContentClient()->browser() to eliminate the dependency of browser
  138. // once TTS implementation was moved to content.
  139. utterance->set_lang(g_browser_process->GetApplicationLocale());
  140. index = GetMatchingVoice(utterance, voices);
  141. // If even that fails, just take the first available voice.
  142. if (index == -1)
  143. index = 0;
  144. voice = voices[index];
  145. } else {
  146. // Otherwise, simply give native voices a chance to handle this utterance.
  147. voice.native = true;
  148. }
  149. }
  150. GetPlatformImpl()->WillSpeakUtteranceWithVoice(utterance, voice);
  151. if (!voice.native) {
  152. #if !defined(OS_ANDROID)
  153. DCHECK(!voice.extension_id.empty());
  154. current_utterance_ = utterance;
  155. utterance->set_extension_id(voice.extension_id);
  156. if (tts_engine_delegate_)
  157. tts_engine_delegate_->Speak(utterance, voice);
  158. bool sends_end_event =
  159. voice.events.find(TTS_EVENT_END) != voice.events.end();
  160. if (!sends_end_event) {
  161. utterance->Finish();
  162. delete utterance;
  163. current_utterance_ = NULL;
  164. SpeakNextUtterance();
  165. }
  166. #endif
  167. } else {
  168. // It's possible for certain platforms to send start events immediately
  169. // during |speak|.
  170. current_utterance_ = utterance;
  171. GetPlatformImpl()->clear_error();
  172. bool success = GetPlatformImpl()->Speak(utterance->id(), utterance->text(),
  173. utterance->lang(), voice,
  174. utterance->continuous_parameters());
  175. if (!success)
  176. current_utterance_ = NULL;
  177. // If the native voice wasn't able to process this speech, see if
  178. // the browser has built-in TTS that isn't loaded yet.
  179. if (!success && loaded_built_in) {
  180. utterance_queue_.push(utterance);
  181. return;
  182. }
  183. if (!success) {
  184. utterance->OnTtsEvent(TTS_EVENT_ERROR, kInvalidCharIndex,
  185. GetPlatformImpl()->error());
  186. delete utterance;
  187. return;
  188. }
  189. }
  190. }
  191. void TtsControllerImpl::Stop() {
  192. paused_ = false;
  193. if (current_utterance_ && !current_utterance_->extension_id().empty()) {
  194. #if !defined(OS_ANDROID)
  195. if (tts_engine_delegate_)
  196. tts_engine_delegate_->Stop(current_utterance_);
  197. #endif
  198. } else {
  199. GetPlatformImpl()->clear_error();
  200. GetPlatformImpl()->StopSpeaking();
  201. }
  202. if (current_utterance_)
  203. current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex,
  204. std::string());
  205. FinishCurrentUtterance();
  206. ClearUtteranceQueue(true); // Send events.
  207. }
  208. void TtsControllerImpl::Pause() {
  209. paused_ = true;
  210. if (current_utterance_ && !current_utterance_->extension_id().empty()) {
  211. #if !defined(OS_ANDROID)
  212. if (tts_engine_delegate_)
  213. tts_engine_delegate_->Pause(current_utterance_);
  214. #endif
  215. } else if (current_utterance_) {
  216. GetPlatformImpl()->clear_error();
  217. GetPlatformImpl()->Pause();
  218. }
  219. }
  220. void TtsControllerImpl::Resume() {
  221. paused_ = false;
  222. if (current_utterance_ && !current_utterance_->extension_id().empty()) {
  223. #if !defined(OS_ANDROID)
  224. if (tts_engine_delegate_)
  225. tts_engine_delegate_->Resume(current_utterance_);
  226. #endif
  227. } else if (current_utterance_) {
  228. GetPlatformImpl()->clear_error();
  229. GetPlatformImpl()->Resume();
  230. } else {
  231. SpeakNextUtterance();
  232. }
  233. }
  234. void TtsControllerImpl::OnTtsEvent(int utterance_id,
  235. TtsEventType event_type,
  236. int char_index,
  237. const std::string& error_message) {
  238. // We may sometimes receive completion callbacks "late", after we've
  239. // already finished the utterance (for example because another utterance
  240. // interrupted or we got a call to Stop). This is normal and we can
  241. // safely just ignore these events.
  242. if (!current_utterance_ || utterance_id != current_utterance_->id()) {
  243. return;
  244. }
  245. current_utterance_->OnTtsEvent(event_type, char_index, error_message);
  246. if (current_utterance_->finished()) {
  247. FinishCurrentUtterance();
  248. SpeakNextUtterance();
  249. }
  250. }
  251. void TtsControllerImpl::GetVoices(content::BrowserContext* browser_context,
  252. std::vector<VoiceData>* out_voices) {
  253. #if !defined(OS_ANDROID)
  254. if (browser_context && tts_engine_delegate_)
  255. tts_engine_delegate_->GetVoices(browser_context, out_voices);
  256. #endif
  257. TtsPlatformImpl* platform_impl = GetPlatformImpl();
  258. if (platform_impl) {
  259. // Ensure we have all built-in voices loaded. This is a no-op if already
  260. // loaded.
  261. platform_impl->LoadBuiltInTtsExtension(browser_context);
  262. if (platform_impl->PlatformImplAvailable())
  263. platform_impl->GetVoices(out_voices);
  264. }
  265. }
  266. bool TtsControllerImpl::IsSpeaking() {
  267. return current_utterance_ != NULL || GetPlatformImpl()->IsSpeaking();
  268. }
  269. void TtsControllerImpl::FinishCurrentUtterance() {
  270. if (current_utterance_) {
  271. if (!current_utterance_->finished())
  272. current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex,
  273. std::string());
  274. delete current_utterance_;
  275. current_utterance_ = NULL;
  276. }
  277. }
  278. void TtsControllerImpl::SpeakNextUtterance() {
  279. if (paused_)
  280. return;
  281. // Start speaking the next utterance in the queue. Keep trying in case
  282. // one fails but there are still more in the queue to try.
  283. while (!utterance_queue_.empty() && !current_utterance_) {
  284. Utterance* utterance = utterance_queue_.front();
  285. utterance_queue_.pop();
  286. SpeakNow(utterance);
  287. }
  288. }
  289. void TtsControllerImpl::ClearUtteranceQueue(bool send_events) {
  290. while (!utterance_queue_.empty()) {
  291. Utterance* utterance = utterance_queue_.front();
  292. utterance_queue_.pop();
  293. if (send_events)
  294. utterance->OnTtsEvent(TTS_EVENT_CANCELLED, kInvalidCharIndex,
  295. std::string());
  296. else
  297. utterance->Finish();
  298. delete utterance;
  299. }
  300. }
  301. void TtsControllerImpl::SetPlatformImpl(TtsPlatformImpl* platform_impl) {
  302. platform_impl_ = platform_impl;
  303. }
  304. int TtsControllerImpl::QueueSize() {
  305. return static_cast<int>(utterance_queue_.size());
  306. }
  307. TtsPlatformImpl* TtsControllerImpl::GetPlatformImpl() {
  308. if (!platform_impl_)
  309. platform_impl_ = TtsPlatformImpl::GetInstance();
  310. return platform_impl_;
  311. }
  312. int TtsControllerImpl::GetMatchingVoice(const Utterance* utterance,
  313. std::vector<VoiceData>& voices) {
  314. // Make two passes: the first time, do strict language matching
  315. // ('fr-FR' does not match 'fr-CA'). The second time, do prefix
  316. // language matching ('fr-FR' matches 'fr' and 'fr-CA')
  317. for (int pass = 0; pass < 2; ++pass) {
  318. for (size_t i = 0; i < voices.size(); ++i) {
  319. const VoiceData& voice = voices[i];
  320. if (!utterance->extension_id().empty() &&
  321. utterance->extension_id() != voice.extension_id) {
  322. continue;
  323. }
  324. if (!voice.name.empty() && !utterance->voice_name().empty() &&
  325. voice.name != utterance->voice_name()) {
  326. continue;
  327. }
  328. if (!voice.lang.empty() && !utterance->lang().empty()) {
  329. std::string voice_lang = voice.lang;
  330. std::string utterance_lang = utterance->lang();
  331. if (pass == 1) {
  332. voice_lang = TrimLanguageCode(voice_lang);
  333. utterance_lang = TrimLanguageCode(utterance_lang);
  334. }
  335. if (voice_lang != utterance_lang) {
  336. continue;
  337. }
  338. }
  339. if (voice.gender != TTS_GENDER_NONE &&
  340. utterance->gender() != TTS_GENDER_NONE &&
  341. voice.gender != utterance->gender()) {
  342. continue;
  343. }
  344. if (utterance->required_event_types().size() > 0) {
  345. bool has_all_required_event_types = true;
  346. for (std::set<TtsEventType>::const_iterator iter =
  347. utterance->required_event_types().begin();
  348. iter != utterance->required_event_types().end(); ++iter) {
  349. if (voice.events.find(*iter) == voice.events.end()) {
  350. has_all_required_event_types = false;
  351. break;
  352. }
  353. }
  354. if (!has_all_required_event_types)
  355. continue;
  356. }
  357. return static_cast<int>(i);
  358. }
  359. }
  360. return -1;
  361. }
  362. void TtsControllerImpl::VoicesChanged() {
  363. for (std::set<VoicesChangedDelegate*>::iterator iter =
  364. voices_changed_delegates_.begin();
  365. iter != voices_changed_delegates_.end(); ++iter) {
  366. (*iter)->OnVoicesChanged();
  367. }
  368. }
  369. void TtsControllerImpl::AddVoicesChangedDelegate(
  370. VoicesChangedDelegate* delegate) {
  371. voices_changed_delegates_.insert(delegate);
  372. }
  373. void TtsControllerImpl::RemoveVoicesChangedDelegate(
  374. VoicesChangedDelegate* delegate) {
  375. voices_changed_delegates_.erase(delegate);
  376. }
  377. void TtsControllerImpl::SetTtsEngineDelegate(TtsEngineDelegate* delegate) {
  378. tts_engine_delegate_ = delegate;
  379. }
  380. TtsEngineDelegate* TtsControllerImpl::GetTtsEngineDelegate() {
  381. return tts_engine_delegate_;
  382. }