tts_controller.h 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337
  1. // Copyright (c) 2012 The Chromium Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style license that can be
  3. // found in the LICENSE file.
  4. #ifndef CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_
  5. #define CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_
  6. #include <memory>
  7. #include <queue>
  8. #include <set>
  9. #include <string>
  10. #include <vector>
  11. #include "base/memory/singleton.h"
  12. #include "base/memory/weak_ptr.h"
  13. #include "url/gurl.h"
  14. class Utterance;
  15. class TtsPlatformImpl;
  16. namespace base {
  17. class Value;
  18. }
  19. namespace content {
  20. class BrowserContext;
  21. }
  22. // Events sent back from the TTS engine indicating the progress.
  23. enum TtsEventType {
  24. TTS_EVENT_START,
  25. TTS_EVENT_END,
  26. TTS_EVENT_WORD,
  27. TTS_EVENT_SENTENCE,
  28. TTS_EVENT_MARKER,
  29. TTS_EVENT_INTERRUPTED,
  30. TTS_EVENT_CANCELLED,
  31. TTS_EVENT_ERROR,
  32. TTS_EVENT_PAUSE,
  33. TTS_EVENT_RESUME
  34. };
  35. enum TtsGenderType { TTS_GENDER_NONE, TTS_GENDER_MALE, TTS_GENDER_FEMALE };
  36. // Returns true if this event type is one that indicates an utterance
  37. // is finished and can be destroyed.
  38. bool IsFinalTtsEventType(TtsEventType event_type);
  39. // The continuous parameters that apply to a given utterance.
  40. struct UtteranceContinuousParameters {
  41. UtteranceContinuousParameters();
  42. double rate;
  43. double pitch;
  44. double volume;
  45. };
  46. // Information about one voice.
  47. struct VoiceData {
  48. VoiceData();
  49. VoiceData(const VoiceData&);
  50. ~VoiceData();
  51. std::string name;
  52. std::string lang;
  53. TtsGenderType gender;
  54. std::string extension_id;
  55. std::set<TtsEventType> events;
  56. // If true, the synthesis engine is a remote network resource.
  57. // It may be higher latency and may incur bandwidth costs.
  58. bool remote;
  59. // If true, this is implemented by this platform's subclass of
  60. // TtsPlatformImpl. If false, this is implemented by an extension.
  61. bool native;
  62. std::string native_voice_identifier;
  63. };
  64. // Interface that delegates TTS requests to user-installed extensions.
  65. class TtsEngineDelegate {
  66. public:
  67. virtual ~TtsEngineDelegate() {}
  68. // Return a list of all available voices registered.
  69. virtual void GetVoices(content::BrowserContext* browser_context,
  70. std::vector<VoiceData>* out_voices) = 0;
  71. // Speak the given utterance by sending an event to the given TTS engine.
  72. virtual void Speak(Utterance* utterance, const VoiceData& voice) = 0;
  73. // Stop speaking the given utterance by sending an event to the target
  74. // associated with this utterance.
  75. virtual void Stop(Utterance* utterance) = 0;
  76. // Pause in the middle of speaking this utterance.
  77. virtual void Pause(Utterance* utterance) = 0;
  78. // Resume speaking this utterance.
  79. virtual void Resume(Utterance* utterance) = 0;
  80. // Load the built-in component extension for ChromeOS.
  81. virtual bool LoadBuiltInTtsExtension(
  82. content::BrowserContext* browser_context) = 0;
  83. };
  84. // Class that wants to receive events on utterances.
  85. class UtteranceEventDelegate {
  86. public:
  87. virtual ~UtteranceEventDelegate() {}
  88. virtual void OnTtsEvent(Utterance* utterance,
  89. TtsEventType event_type,
  90. int char_index,
  91. const std::string& error_message) = 0;
  92. };
  93. // Class that wants to be notified when the set of
  94. // voices has changed.
  95. class VoicesChangedDelegate {
  96. public:
  97. virtual ~VoicesChangedDelegate() {}
  98. virtual void OnVoicesChanged() = 0;
  99. };
  100. // One speech utterance.
  101. class Utterance {
  102. public:
  103. // Construct an utterance given a profile and a completion task to call
  104. // when the utterance is done speaking. Before speaking this utterance,
  105. // its other parameters like text, rate, pitch, etc. should all be set.
  106. explicit Utterance(content::BrowserContext* browser_context);
  107. ~Utterance();
  108. // Sends an event to the delegate. If the event type is TTS_EVENT_END
  109. // or TTS_EVENT_ERROR, deletes the utterance. If |char_index| is -1,
  110. // uses the last good value.
  111. void OnTtsEvent(TtsEventType event_type,
  112. int char_index,
  113. const std::string& error_message);
  114. // Finish an utterance without sending an event to the delegate.
  115. void Finish();
  116. // Getters and setters for the text to speak and other speech options.
  117. void set_text(const std::string& text) { text_ = text; }
  118. const std::string& text() const { return text_; }
  119. void set_options(const base::Value* options);
  120. const base::Value* options() const { return options_.get(); }
  121. void set_src_extension_id(const std::string& src_extension_id) {
  122. src_extension_id_ = src_extension_id;
  123. }
  124. const std::string& src_extension_id() { return src_extension_id_; }
  125. void set_src_id(int src_id) { src_id_ = src_id; }
  126. int src_id() { return src_id_; }
  127. void set_src_url(const GURL& src_url) { src_url_ = src_url; }
  128. const GURL& src_url() { return src_url_; }
  129. void set_voice_name(const std::string& voice_name) {
  130. voice_name_ = voice_name;
  131. }
  132. const std::string& voice_name() const { return voice_name_; }
  133. void set_lang(const std::string& lang) { lang_ = lang; }
  134. const std::string& lang() const { return lang_; }
  135. void set_gender(TtsGenderType gender) { gender_ = gender; }
  136. TtsGenderType gender() const { return gender_; }
  137. void set_continuous_parameters(const UtteranceContinuousParameters& params) {
  138. continuous_parameters_ = params;
  139. }
  140. const UtteranceContinuousParameters& continuous_parameters() {
  141. return continuous_parameters_;
  142. }
  143. void set_can_enqueue(bool can_enqueue) { can_enqueue_ = can_enqueue; }
  144. bool can_enqueue() const { return can_enqueue_; }
  145. void set_required_event_types(const std::set<TtsEventType>& types) {
  146. required_event_types_ = types;
  147. }
  148. const std::set<TtsEventType>& required_event_types() const {
  149. return required_event_types_;
  150. }
  151. void set_desired_event_types(const std::set<TtsEventType>& types) {
  152. desired_event_types_ = types;
  153. }
  154. const std::set<TtsEventType>& desired_event_types() const {
  155. return desired_event_types_;
  156. }
  157. const std::string& extension_id() const { return extension_id_; }
  158. void set_extension_id(const std::string& extension_id) {
  159. extension_id_ = extension_id;
  160. }
  161. UtteranceEventDelegate* event_delegate() const {
  162. return event_delegate_.get();
  163. }
  164. void set_event_delegate(
  165. base::WeakPtr<UtteranceEventDelegate> event_delegate) {
  166. event_delegate_ = event_delegate;
  167. }
  168. // Getters and setters for internal state.
  169. content::BrowserContext* browser_context() const { return browser_context_; }
  170. int id() const { return id_; }
  171. bool finished() const { return finished_; }
  172. private:
  173. // The BrowserContext that initiated this utterance.
  174. content::BrowserContext* browser_context_;
  175. // The extension ID of the extension providing TTS for this utterance, or
  176. // empty if native TTS is being used.
  177. std::string extension_id_;
  178. // The unique ID of this utterance, used to associate callback functions
  179. // with utterances.
  180. int id_;
  181. // The id of the next utterance, so we can associate requests with
  182. // responses.
  183. static int next_utterance_id_;
  184. // The text to speak.
  185. std::string text_;
  186. // The full options arg passed to tts.speak, which may include fields
  187. // other than the ones we explicitly parse, below.
  188. std::unique_ptr<base::Value> options_;
  189. // The extension ID of the extension that called speak() and should
  190. // receive events.
  191. std::string src_extension_id_;
  192. // The source extension's ID of this utterance, so that it can associate
  193. // events with the appropriate callback.
  194. int src_id_;
  195. // The URL of the page where the source extension called speak.
  196. GURL src_url_;
  197. // The delegate to be called when an utterance event is fired.
  198. base::WeakPtr<UtteranceEventDelegate> event_delegate_;
  199. // The parsed options.
  200. std::string voice_name_;
  201. std::string lang_;
  202. TtsGenderType gender_;
  203. UtteranceContinuousParameters continuous_parameters_;
  204. bool can_enqueue_;
  205. std::set<TtsEventType> required_event_types_;
  206. std::set<TtsEventType> desired_event_types_;
  207. // The index of the current char being spoken.
  208. int char_index_;
  209. // True if this utterance received an event indicating it's done.
  210. bool finished_;
  211. };
  212. // Singleton class that manages text-to-speech for the TTS and TTS engine
  213. // extension APIs, maintaining a queue of pending utterances and keeping
  214. // track of all state.
  215. class TtsController {
  216. public:
  217. // Get the single instance of this class.
  218. static TtsController* GetInstance();
  219. // Returns true if we're currently speaking an utterance.
  220. virtual bool IsSpeaking() = 0;
  221. // Speak the given utterance. If the utterance's can_enqueue flag is true
  222. // and another utterance is in progress, adds it to the end of the queue.
  223. // Otherwise, interrupts any current utterance and speaks this one
  224. // immediately.
  225. virtual void SpeakOrEnqueue(Utterance* utterance) = 0;
  226. // Stop all utterances and flush the queue. Implies leaving pause mode
  227. // as well.
  228. virtual void Stop() = 0;
  229. // Pause the speech queue. Some engines may support pausing in the middle
  230. // of an utterance.
  231. virtual void Pause() = 0;
  232. // Resume speaking.
  233. virtual void Resume() = 0;
  234. // Handle events received from the speech engine. Events are forwarded to
  235. // the callback function, and in addition, completion and error events
  236. // trigger finishing the current utterance and starting the next one, if
  237. // any.
  238. virtual void OnTtsEvent(int utterance_id,
  239. TtsEventType event_type,
  240. int char_index,
  241. const std::string& error_message) = 0;
  242. // Return a list of all available voices, including the native voice,
  243. // if supported, and all voices registered by extensions.
  244. virtual void GetVoices(content::BrowserContext* browser_context,
  245. std::vector<VoiceData>* out_voices) = 0;
  246. // Called by the extension system or platform implementation when the
  247. // list of voices may have changed and should be re-queried.
  248. virtual void VoicesChanged() = 0;
  249. // Add a delegate that wants to be notified when the set of voices changes.
  250. virtual void AddVoicesChangedDelegate(VoicesChangedDelegate* delegate) = 0;
  251. // Remove delegate that wants to be notified when the set of voices changes.
  252. virtual void RemoveVoicesChangedDelegate(VoicesChangedDelegate* delegate) = 0;
  253. // Set the delegate that processes TTS requests with user-installed
  254. // extensions.
  255. virtual void SetTtsEngineDelegate(TtsEngineDelegate* delegate) = 0;
  256. // Get the delegate that processes TTS requests with user-installed
  257. // extensions.
  258. virtual TtsEngineDelegate* GetTtsEngineDelegate() = 0;
  259. // For unit testing.
  260. virtual void SetPlatformImpl(TtsPlatformImpl* platform_impl) = 0;
  261. virtual int QueueSize() = 0;
  262. protected:
  263. virtual ~TtsController() {}
  264. };
  265. #endif // CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_