asterisk/res/res_speech.c
Kevin Harwell 272bac70dd res_aeap & res_speech_aeap: Add Asterisk External Application Protocol
Add framework to connect to, and read and write protocol based
messages from and to an external application using an Asterisk
External Application Protocol (AEAP). This has been divided into
several abstractions:

 1. transport - base communication layer (currently websocket only)
 2. message - AEAP description and data (currently JSON only)
 3. transaction - links/binds requests and responses
 4. aeap - transport, message, and transaction handler/manager

This patch also adds an AEAP implementation for speech to text.
Existing speech API callbacks for speech to text have been completed
making it possible for Asterisk to connect to a configured external
translator service and provide audio for STT. Results can also be
received from the external translator, and made available as speech
results in Asterisk.

Unit tests have also been created that test the AEAP framework, and
also the speech to text implementation.

ASTERISK-29726 #close

Change-Id: Iaa4b259f84aa63501e5fd2a6fb107f900b4d4ed2
2022-04-26 14:26:48 -05:00

416 lines
11 KiB
C

/*
* Asterisk -- An open source telephony toolkit.
*
* Copyright (C) 2006, Digium, Inc.
*
* Joshua Colp <jcolp@digium.com>
*
* See http://www.asterisk.org for more information about
* the Asterisk project. Please do not directly contact
* any of the maintainers of this project for assistance;
* the project provides a web site, mailing lists and IRC
* channels for your use.
*
* This program is free software, distributed under the terms of
* the GNU General Public License Version 2. See the LICENSE file
* at the top of the source tree.
*/
/*! \file
*
* \brief Generic Speech Recognition API
*
* \author Joshua Colp <jcolp@digium.com>
*/
/*** MODULEINFO
<support_level>core</support_level>
***/
#include "asterisk.h"
#include "asterisk/channel.h"
#include "asterisk/module.h"
#include "asterisk/lock.h"
#include "asterisk/linkedlists.h"
#include "asterisk/cli.h"
#include "asterisk/term.h"
#include "asterisk/speech.h"
#include "asterisk/format_cache.h"
static AST_RWLIST_HEAD_STATIC(engines, ast_speech_engine);
static struct ast_speech_engine *default_engine = NULL;
/*! \brief Find a speech recognition engine of specified name, if NULL then use the default one */
struct ast_speech_engine *ast_speech_find_engine(const char *engine_name)
{
struct ast_speech_engine *engine = NULL;
/* If no name is specified -- use the default engine */
if (ast_strlen_zero(engine_name))
return default_engine;
AST_RWLIST_RDLOCK(&engines);
AST_RWLIST_TRAVERSE(&engines, engine, list) {
if (!strcasecmp(engine->name, engine_name)) {
break;
}
}
AST_RWLIST_UNLOCK(&engines);
return engine;
}
/*! \brief Activate a loaded (either local or global) grammar */
int ast_speech_grammar_activate(struct ast_speech *speech, const char *grammar_name)
{
return (speech->engine->activate ? speech->engine->activate(speech, grammar_name) : -1);
}
/*! \brief Deactivate a loaded grammar on a speech structure */
int ast_speech_grammar_deactivate(struct ast_speech *speech, const char *grammar_name)
{
return (speech->engine->deactivate ? speech->engine->deactivate(speech, grammar_name) : -1);
}
/*! \brief Load a local grammar on a speech structure */
int ast_speech_grammar_load(struct ast_speech *speech, const char *grammar_name, const char *grammar)
{
return (speech->engine->load ? speech->engine->load(speech, grammar_name, grammar) : -1);
}
/*! \brief Unload a local grammar from a speech structure */
int ast_speech_grammar_unload(struct ast_speech *speech, const char *grammar_name)
{
return (speech->engine->unload ? speech->engine->unload(speech, grammar_name) : -1);
}
/*! \brief Return the results of a recognition from the speech structure */
struct ast_speech_result *ast_speech_results_get(struct ast_speech *speech)
{
return (speech->engine->get ? speech->engine->get(speech) : NULL);
}
/*! \brief Free a list of results */
int ast_speech_results_free(struct ast_speech_result *result)
{
struct ast_speech_result *current_result = result, *prev_result = NULL;
int res = 0;
while (current_result != NULL) {
prev_result = current_result;
/* Deallocate what we can */
if (current_result->text != NULL) {
ast_free(current_result->text);
current_result->text = NULL;
}
if (current_result->grammar != NULL) {
ast_free(current_result->grammar);
current_result->grammar = NULL;
}
/* Move on and then free ourselves */
current_result = AST_LIST_NEXT(current_result, list);
ast_free(prev_result);
prev_result = NULL;
}
return res;
}
/*! \brief Start speech recognition on a speech structure */
void ast_speech_start(struct ast_speech *speech)
{
/* Clear any flags that may affect things */
ast_clear_flag(speech, AST_SPEECH_SPOKE);
ast_clear_flag(speech, AST_SPEECH_QUIET);
ast_clear_flag(speech, AST_SPEECH_HAVE_RESULTS);
/* If results are on the structure, free them since we are starting again */
if (speech->results) {
ast_speech_results_free(speech->results);
speech->results = NULL;
}
/* If the engine needs to start stuff up, do it */
if (speech->engine->start)
speech->engine->start(speech);
return;
}
/*! \brief Write in signed linear audio to be recognized */
int ast_speech_write(struct ast_speech *speech, void *data, int len)
{
/* Make sure the speech engine is ready to accept audio */
if (speech->state != AST_SPEECH_STATE_READY)
return -1;
return speech->engine->write(speech, data, len);
}
/*! \brief Signal to the engine that DTMF was received */
int ast_speech_dtmf(struct ast_speech *speech, const char *dtmf)
{
int res = 0;
if (speech->state != AST_SPEECH_STATE_READY)
return -1;
if (speech->engine->dtmf != NULL) {
res = speech->engine->dtmf(speech, dtmf);
}
return res;
}
/*! \brief Change an engine specific attribute */
int ast_speech_change(struct ast_speech *speech, const char *name, const char *value)
{
return (speech->engine->change ? speech->engine->change(speech, name, value) : -1);
}
/*! \brief Get an engine specific attribute */
int ast_speech_get_setting(struct ast_speech *speech, const char *name, char *buf, size_t len)
{
return (speech->engine->get_setting ? speech->engine->get_setting(speech, name, buf, len) : -1);
}
/*! \brief Create a new speech structure using the engine specified */
struct ast_speech *ast_speech_new(const char *engine_name, const struct ast_format_cap *cap)
{
struct ast_speech_engine *engine = NULL;
struct ast_speech *new_speech = NULL;
struct ast_format_cap *joint;
RAII_VAR(struct ast_format *, best, NULL, ao2_cleanup);
/* Try to find the speech recognition engine that was requested */
if (!(engine = ast_speech_find_engine(engine_name)))
return NULL;
joint = ast_format_cap_alloc(AST_FORMAT_CAP_FLAG_DEFAULT);
if (!joint) {
return NULL;
}
ast_format_cap_get_compatible(engine->formats, cap, joint);
best = ast_format_cap_get_format(joint, 0);
ao2_ref(joint, -1);
if (!best) {
if (ast_format_cap_iscompatible_format(engine->formats, ast_format_slin) != AST_FORMAT_CMP_NOT_EQUAL) {
best = ao2_bump(ast_format_slin);
} else {
return NULL;
}
}
/* Allocate our own speech structure, and try to allocate a structure from the engine too */
if (!(new_speech = ast_calloc(1, sizeof(*new_speech)))) {
return NULL;
}
/* Initialize the lock */
ast_mutex_init(&new_speech->lock);
/* Make sure no results are present */
new_speech->results = NULL;
/* Copy over our engine pointer */
new_speech->engine = engine;
/* Can't forget the format audio is going to be in */
new_speech->format = ao2_bump(best);
/* We are not ready to accept audio yet */
ast_speech_change_state(new_speech, AST_SPEECH_STATE_NOT_READY);
/* Pass ourselves to the engine so they can set us up some more and if they error out then do not create a structure */
if (engine->create(new_speech, new_speech->format)) {
ast_mutex_destroy(&new_speech->lock);
ao2_ref(new_speech->format, -1);
ast_free(new_speech);
return NULL;
}
return new_speech;
}
/*! \brief Destroy a speech structure */
int ast_speech_destroy(struct ast_speech *speech)
{
int res = 0;
/* Call our engine so we are destroyed properly */
speech->engine->destroy(speech);
/* Deinitialize the lock */
ast_mutex_destroy(&speech->lock);
/* If results exist on the speech structure, destroy them */
if (speech->results)
ast_speech_results_free(speech->results);
/* If a processing sound is set - free the memory used by it */
if (speech->processing_sound)
ast_free(speech->processing_sound);
ao2_ref(speech->format, -1);
/* Aloha we are done */
ast_free(speech);
return res;
}
/*! \brief Change state of a speech structure */
int ast_speech_change_state(struct ast_speech *speech, int state)
{
int res = 0;
switch (state) {
case AST_SPEECH_STATE_WAIT:
/* The engine heard audio, so they spoke */
ast_set_flag(speech, AST_SPEECH_SPOKE);
default:
speech->state = state;
break;
}
return res;
}
const char *ast_speech_results_type_to_string(enum ast_speech_results_type type)
{
switch (type) {
case AST_SPEECH_RESULTS_TYPE_NORMAL:
return "normal";
case AST_SPEECH_RESULTS_TYPE_NBEST:
return "nbest";
default:
ast_assert(0);
return "unknown";
}
}
/*! \brief Change the type of results we want */
int ast_speech_change_results_type(struct ast_speech *speech, enum ast_speech_results_type results_type)
{
speech->results_type = results_type;
return (speech->engine->change_results_type ? speech->engine->change_results_type(speech, results_type) : 0);
}
/*! \brief Register a speech recognition engine */
int ast_speech_register(struct ast_speech_engine *engine)
{
int res = 0;
/* Confirm the engine meets the minimum API requirements */
if (!engine->create || !engine->write || !engine->destroy) {
ast_log(LOG_WARNING, "Speech recognition engine '%s' did not meet minimum API requirements.\n", engine->name);
return -1;
}
/* If an engine is already loaded with this name, error out */
if (ast_speech_find_engine(engine->name)) {
ast_log(LOG_WARNING, "Speech recognition engine '%s' already exists.\n", engine->name);
return -1;
}
ast_verb(2, "Registered speech recognition engine '%s'\n", engine->name);
/* Add to the engine linked list and make default if needed */
AST_RWLIST_WRLOCK(&engines);
AST_RWLIST_INSERT_HEAD(&engines, engine, list);
if (!default_engine) {
default_engine = engine;
ast_verb(2, "Made '%s' the default speech recognition engine\n", engine->name);
}
AST_RWLIST_UNLOCK(&engines);
return res;
}
/*! \brief Unregister a speech recognition engine */
int ast_speech_unregister(const char *engine_name)
{
return ast_speech_unregister2(engine_name) == NULL ? -1 : 0;
}
struct ast_speech_engine *ast_speech_unregister2(const char *engine_name)
{
struct ast_speech_engine *engine = NULL;
if (ast_strlen_zero(engine_name)) {
return NULL;
}
AST_RWLIST_WRLOCK(&engines);
AST_RWLIST_TRAVERSE_SAFE_BEGIN(&engines, engine, list) {
if (!strcasecmp(engine->name, engine_name)) {
/* We have our engine... removed it */
AST_RWLIST_REMOVE_CURRENT(list);
/* If this was the default engine, we need to pick a new one */
if (engine == default_engine) {
default_engine = AST_RWLIST_FIRST(&engines);
}
ast_verb(2, "Unregistered speech recognition engine '%s'\n", engine_name);
/* All went well */
break;
}
}
AST_RWLIST_TRAVERSE_SAFE_END;
AST_RWLIST_UNLOCK(&engines);
return engine;
}
void ast_speech_unregister_engines(
int (*should_unregister)(const struct ast_speech_engine *engine, void *data), void *data,
void (*on_unregistered)(void *obj))
{
struct ast_speech_engine *engine = NULL;
if (!should_unregister) {
return;
}
AST_RWLIST_WRLOCK(&engines);
AST_RWLIST_TRAVERSE_SAFE_BEGIN(&engines, engine, list) {
if (should_unregister(engine, data)) {
/* We have our engine... removed it */
AST_RWLIST_REMOVE_CURRENT(list);
/* If this was the default engine, we need to pick a new one */
if (engine == default_engine) {
default_engine = AST_RWLIST_FIRST(&engines);
}
ast_verb(2, "Unregistered speech recognition engine '%s'\n", engine->name);
/* All went well */
if (on_unregistered) {
on_unregistered(engine);
}
}
}
AST_RWLIST_TRAVERSE_SAFE_END;
AST_RWLIST_UNLOCK(&engines);
}
static int unload_module(void)
{
/* We can not be unloaded */
return -1;
}
static int load_module(void)
{
return AST_MODULE_LOAD_SUCCESS;
}
AST_MODULE_INFO(ASTERISK_GPL_KEY, AST_MODFLAG_GLOBAL_SYMBOLS | AST_MODFLAG_LOAD_ORDER, "Generic Speech Recognition API",
.support_level = AST_MODULE_SUPPORT_CORE,
.load = load_module,
.unload = unload_module,
.load_pri = AST_MODPRI_APP_DEPEND - 1,
);