res_speech.c 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351
  1. /*
  2. * Asterisk -- An open source telephony toolkit.
  3. *
  4. * Copyright (C) 2006, Digium, Inc.
  5. *
  6. * Joshua Colp <jcolp@digium.com>
  7. *
  8. * See http://www.asterisk.org for more information about
  9. * the Asterisk project. Please do not directly contact
  10. * any of the maintainers of this project for assistance;
  11. * the project provides a web site, mailing lists and IRC
  12. * channels for your use.
  13. *
  14. * This program is free software, distributed under the terms of
  15. * the GNU General Public License Version 2. See the LICENSE file
  16. * at the top of the source tree.
  17. */
  18. /*! \file
  19. *
  20. * \brief Generic Speech Recognition API
  21. *
  22. * \author Joshua Colp <jcolp@digium.com>
  23. */
  24. /*** MODULEINFO
  25. <support_level>core</support_level>
  26. ***/
  27. #include "asterisk.h"
  28. ASTERISK_FILE_VERSION(__FILE__, "$Revision$");
  29. #include "asterisk/channel.h"
  30. #include "asterisk/module.h"
  31. #include "asterisk/lock.h"
  32. #include "asterisk/linkedlists.h"
  33. #include "asterisk/cli.h"
  34. #include "asterisk/term.h"
  35. #include "asterisk/speech.h"
  36. static AST_RWLIST_HEAD_STATIC(engines, ast_speech_engine);
  37. static struct ast_speech_engine *default_engine = NULL;
  38. /*! \brief Find a speech recognition engine of specified name, if NULL then use the default one */
  39. static struct ast_speech_engine *find_engine(const char *engine_name)
  40. {
  41. struct ast_speech_engine *engine = NULL;
  42. /* If no name is specified -- use the default engine */
  43. if (ast_strlen_zero(engine_name))
  44. return default_engine;
  45. AST_RWLIST_RDLOCK(&engines);
  46. AST_RWLIST_TRAVERSE(&engines, engine, list) {
  47. if (!strcasecmp(engine->name, engine_name)) {
  48. break;
  49. }
  50. }
  51. AST_RWLIST_UNLOCK(&engines);
  52. return engine;
  53. }
  54. /*! \brief Activate a loaded (either local or global) grammar */
  55. int ast_speech_grammar_activate(struct ast_speech *speech, const char *grammar_name)
  56. {
  57. return (speech->engine->activate ? speech->engine->activate(speech, grammar_name) : -1);
  58. }
  59. /*! \brief Deactivate a loaded grammar on a speech structure */
  60. int ast_speech_grammar_deactivate(struct ast_speech *speech, const char *grammar_name)
  61. {
  62. return (speech->engine->deactivate ? speech->engine->deactivate(speech, grammar_name) : -1);
  63. }
  64. /*! \brief Load a local grammar on a speech structure */
  65. int ast_speech_grammar_load(struct ast_speech *speech, const char *grammar_name, const char *grammar)
  66. {
  67. return (speech->engine->load ? speech->engine->load(speech, grammar_name, grammar) : -1);
  68. }
  69. /*! \brief Unload a local grammar from a speech structure */
  70. int ast_speech_grammar_unload(struct ast_speech *speech, const char *grammar_name)
  71. {
  72. return (speech->engine->unload ? speech->engine->unload(speech, grammar_name) : -1);
  73. }
  74. /*! \brief Return the results of a recognition from the speech structure */
  75. struct ast_speech_result *ast_speech_results_get(struct ast_speech *speech)
  76. {
  77. return (speech->engine->get ? speech->engine->get(speech) : NULL);
  78. }
  79. /*! \brief Free a list of results */
  80. int ast_speech_results_free(struct ast_speech_result *result)
  81. {
  82. struct ast_speech_result *current_result = result, *prev_result = NULL;
  83. int res = 0;
  84. while (current_result != NULL) {
  85. prev_result = current_result;
  86. /* Deallocate what we can */
  87. if (current_result->text != NULL) {
  88. ast_free(current_result->text);
  89. current_result->text = NULL;
  90. }
  91. if (current_result->grammar != NULL) {
  92. ast_free(current_result->grammar);
  93. current_result->grammar = NULL;
  94. }
  95. /* Move on and then free ourselves */
  96. current_result = AST_LIST_NEXT(current_result, list);
  97. ast_free(prev_result);
  98. prev_result = NULL;
  99. }
  100. return res;
  101. }
  102. /*! \brief Start speech recognition on a speech structure */
  103. void ast_speech_start(struct ast_speech *speech)
  104. {
  105. /* Clear any flags that may affect things */
  106. ast_clear_flag(speech, AST_SPEECH_SPOKE);
  107. ast_clear_flag(speech, AST_SPEECH_QUIET);
  108. ast_clear_flag(speech, AST_SPEECH_HAVE_RESULTS);
  109. /* If results are on the structure, free them since we are starting again */
  110. if (speech->results) {
  111. ast_speech_results_free(speech->results);
  112. speech->results = NULL;
  113. }
  114. /* If the engine needs to start stuff up, do it */
  115. if (speech->engine->start)
  116. speech->engine->start(speech);
  117. return;
  118. }
  119. /*! \brief Write in signed linear audio to be recognized */
  120. int ast_speech_write(struct ast_speech *speech, void *data, int len)
  121. {
  122. /* Make sure the speech engine is ready to accept audio */
  123. if (speech->state != AST_SPEECH_STATE_READY)
  124. return -1;
  125. return speech->engine->write(speech, data, len);
  126. }
  127. /*! \brief Signal to the engine that DTMF was received */
  128. int ast_speech_dtmf(struct ast_speech *speech, const char *dtmf)
  129. {
  130. int res = 0;
  131. if (speech->state != AST_SPEECH_STATE_READY)
  132. return -1;
  133. if (speech->engine->dtmf != NULL) {
  134. res = speech->engine->dtmf(speech, dtmf);
  135. }
  136. return res;
  137. }
  138. /*! \brief Change an engine specific attribute */
  139. int ast_speech_change(struct ast_speech *speech, const char *name, const char *value)
  140. {
  141. return (speech->engine->change ? speech->engine->change(speech, name, value) : -1);
  142. }
  143. /*! \brief Create a new speech structure using the engine specified */
  144. struct ast_speech *ast_speech_new(const char *engine_name, int formats)
  145. {
  146. struct ast_speech_engine *engine = NULL;
  147. struct ast_speech *new_speech = NULL;
  148. int format = AST_FORMAT_SLINEAR;
  149. /* Try to find the speech recognition engine that was requested */
  150. if (!(engine = find_engine(engine_name)))
  151. return NULL;
  152. /* Before even allocating the memory below do some codec negotiation, we choose the best codec possible and fall back to signed linear if possible */
  153. if ((format = (engine->formats & formats)))
  154. format = ast_best_codec(format);
  155. else if ((engine->formats & AST_FORMAT_SLINEAR))
  156. format = AST_FORMAT_SLINEAR;
  157. else
  158. return NULL;
  159. /* Allocate our own speech structure, and try to allocate a structure from the engine too */
  160. if (!(new_speech = ast_calloc(1, sizeof(*new_speech))))
  161. return NULL;
  162. /* Initialize the lock */
  163. ast_mutex_init(&new_speech->lock);
  164. /* Make sure no results are present */
  165. new_speech->results = NULL;
  166. /* Copy over our engine pointer */
  167. new_speech->engine = engine;
  168. /* Can't forget the format audio is going to be in */
  169. new_speech->format = format;
  170. /* We are not ready to accept audio yet */
  171. ast_speech_change_state(new_speech, AST_SPEECH_STATE_NOT_READY);
  172. /* Pass ourselves to the engine so they can set us up some more and if they error out then do not create a structure */
  173. if (engine->create(new_speech, format)) {
  174. ast_mutex_destroy(&new_speech->lock);
  175. ast_free(new_speech);
  176. new_speech = NULL;
  177. }
  178. return new_speech;
  179. }
  180. /*! \brief Destroy a speech structure */
  181. int ast_speech_destroy(struct ast_speech *speech)
  182. {
  183. int res = 0;
  184. /* Call our engine so we are destroyed properly */
  185. speech->engine->destroy(speech);
  186. /* Deinitialize the lock */
  187. ast_mutex_destroy(&speech->lock);
  188. /* If results exist on the speech structure, destroy them */
  189. if (speech->results)
  190. ast_speech_results_free(speech->results);
  191. /* If a processing sound is set - free the memory used by it */
  192. if (speech->processing_sound)
  193. ast_free(speech->processing_sound);
  194. /* Aloha we are done */
  195. ast_free(speech);
  196. return res;
  197. }
  198. /*! \brief Change state of a speech structure */
  199. int ast_speech_change_state(struct ast_speech *speech, int state)
  200. {
  201. int res = 0;
  202. switch (state) {
  203. case AST_SPEECH_STATE_WAIT:
  204. /* The engine heard audio, so they spoke */
  205. ast_set_flag(speech, AST_SPEECH_SPOKE);
  206. default:
  207. speech->state = state;
  208. break;
  209. }
  210. return res;
  211. }
  212. /*! \brief Change the type of results we want */
  213. int ast_speech_change_results_type(struct ast_speech *speech, enum ast_speech_results_type results_type)
  214. {
  215. speech->results_type = results_type;
  216. return (speech->engine->change_results_type ? speech->engine->change_results_type(speech, results_type) : 0);
  217. }
  218. /*! \brief Register a speech recognition engine */
  219. int ast_speech_register(struct ast_speech_engine *engine)
  220. {
  221. struct ast_speech_engine *existing_engine = NULL;
  222. int res = 0;
  223. /* Confirm the engine meets the minimum API requirements */
  224. if (!engine->create || !engine->write || !engine->destroy) {
  225. ast_log(LOG_WARNING, "Speech recognition engine '%s' did not meet minimum API requirements.\n", engine->name);
  226. return -1;
  227. }
  228. /* If an engine is already loaded with this name, error out */
  229. if ((existing_engine = find_engine(engine->name))) {
  230. ast_log(LOG_WARNING, "Speech recognition engine '%s' already exists.\n", engine->name);
  231. return -1;
  232. }
  233. ast_verb(2, "Registered speech recognition engine '%s'\n", engine->name);
  234. /* Add to the engine linked list and make default if needed */
  235. AST_RWLIST_WRLOCK(&engines);
  236. AST_RWLIST_INSERT_HEAD(&engines, engine, list);
  237. if (!default_engine) {
  238. default_engine = engine;
  239. ast_verb(2, "Made '%s' the default speech recognition engine\n", engine->name);
  240. }
  241. AST_RWLIST_UNLOCK(&engines);
  242. return res;
  243. }
  244. /*! \brief Unregister a speech recognition engine */
  245. int ast_speech_unregister(const char *engine_name)
  246. {
  247. struct ast_speech_engine *engine = NULL;
  248. int res = -1;
  249. if (ast_strlen_zero(engine_name))
  250. return -1;
  251. AST_RWLIST_WRLOCK(&engines);
  252. AST_RWLIST_TRAVERSE_SAFE_BEGIN(&engines, engine, list) {
  253. if (!strcasecmp(engine->name, engine_name)) {
  254. /* We have our engine... removed it */
  255. AST_RWLIST_REMOVE_CURRENT(list);
  256. /* If this was the default engine, we need to pick a new one */
  257. if (engine == default_engine) {
  258. default_engine = AST_RWLIST_FIRST(&engines);
  259. }
  260. ast_verb(2, "Unregistered speech recognition engine '%s'\n", engine_name);
  261. /* All went well */
  262. res = 0;
  263. break;
  264. }
  265. }
  266. AST_RWLIST_TRAVERSE_SAFE_END;
  267. AST_RWLIST_UNLOCK(&engines);
  268. return res;
  269. }
  270. static int unload_module(void)
  271. {
  272. /* We can not be unloaded */
  273. return -1;
  274. }
  275. static int load_module(void)
  276. {
  277. return AST_MODULE_LOAD_SUCCESS;
  278. }
  279. AST_MODULE_INFO(ASTERISK_GPL_KEY, AST_MODFLAG_GLOBAL_SYMBOLS | AST_MODFLAG_LOAD_ORDER, "Generic Speech Recognition API",
  280. .load = load_module,
  281. .unload = unload_module,
  282. .load_pri = AST_MODPRI_APP_DEPEND,
  283. );