SkylarkOS 语音唤醒使用指南
优质
小牛编辑
132浏览
2023-12-01
SkylarkOS 获取音频数据不是走linux的portaudio的接口方式,我们自己定义了一个设备驱动/dev/gxvsp,通过对它的读来进行数据的获取。
通过我们编译出vsp.ko的操作。该驱动放在文件系统的/lib/modules/4.2.25
输出音频只支持:16K,16bit,单声道,PCM音频数据
操作接口参考:vsp_ioctl.h
/* Voice Signal Preprocess
* Copyright (C) 2001-2019 NationalChip Co., Ltd
* ALL RIGHTS RESERVED!
*
* vsp_ioctl.h: VSP I/O Control command between user space application and device driver
*
*/
#ifndef __VSP_IOCTL_H__
#define __VSP_IOCTL_H__
#define VSP_DEVICE_NAME "vsp"
#define VSP_IRQ_NAME "vsp"
#define VSP_CLASS_NAME "gxvsp"
#define VSP_IOC_VERSION 0x20190819
//=================================================================================================
// For VSP_IOC_SWITCH_MODE
typedef enum {
VSP_IOC_MODE_IDLE,
VSP_IOC_MODE_BOOT,
VSP_IOC_MODE_STANDBY,
VSP_IOC_MODE_ACTIVE,
VSP_IOC_MODE_MODEM,
VSP_IOC_MODE_BYPASS,
VSP_IOC_MODE_TEST,
VSP_IOC_MODE_UAC, // For Test
VSP_IOC_MODE_FACTORY,
VSP_IOC_MODE_PLC,
} VSP_IOC_MODE_TYPE;
//-------------------------------------------------------------------------------------------------
// For VSP_IOC_LOAD_DSP and VSP_IOC_LOAD_MCU
typedef struct {
void *buffer;
unsigned int size;
} VSP_IOC_FIRMWARE;
//-------------------------------------------------------------------------------------------------
// For VSP_IOC_GET_INFO
typedef struct {
/* Version related */
unsigned int msg_version;
unsigned int ioc_version;
/* Audio related */
unsigned int sample_rate;
unsigned int mic_num;
unsigned int ref_num;
unsigned int out_num;
unsigned int out_interlaced;
unsigned int frame_length;
unsigned int frame_num; /* per context */
/* Result related */
unsigned int features_dim; /* per frame */
unsigned int ext_buffer_size;
/* Led related */
unsigned int led_num;
/* Commannd related */
unsigned int cmd_num;
unsigned int cmd_size;
unsigned int cmd_data_size;
} VSP_IOC_INFO;
//-------------------------------------------------------------------------------------------------
// For VSP_IOC_GET_CONTEXT and VSP_IOC_PUT_CONTEXT
typedef struct {
void *addr;
unsigned int size;
} VSP_IOC_BUFFER;
typedef struct {
unsigned mic_mask:16; /* output */
unsigned ref_mask:16; /* output */
unsigned int frame_index; /* output */
unsigned int ctx_index; /* output */
unsigned vad; /* output */
unsigned int kws; /* output */
unsigned int mic_gain; /* input */
unsigned int ref_gain; /* input */
unsigned int direction; /* 0 - 360 degree */
VSP_IOC_BUFFER features;
VSP_IOC_BUFFER out_buffer; /* only 1 channel */
VSP_IOC_BUFFER mic_buffer[8]; /* max 8 channel */
VSP_IOC_BUFFER ref_buffer[2]; /* max 2 channel */
VSP_IOC_BUFFER ext_buffer;
} VSP_IOC_CONTEXT;
//-------------------------------------------------------------------------------------------------
// For VSP_IOC_PUT_LED_FRAME
typedef union {
unsigned int value;
struct {
unsigned r:8;
unsigned g:8;
unsigned b:8;
unsigned a:8;
}bits;
} VSP_IOC_PIXEL;
typedef struct {
unsigned short transition; /* transition time, in ms, should <= duration */
unsigned short duration; /* duration time, in ms */
VSP_IOC_BUFFER pixels; /* list of pixels */
} VSP_IOC_LED_FRAME;
//-------------------------------------------------------------------------------------------------
// For VSP_IOC_GET_MCU_COMMAND
typedef struct {
unsigned int cmd_id;
unsigned int cmd_index;
VSP_IOC_BUFFER cmd_data;
} VSP_IOC_COMMAND;
//-------------------------------------------------------------------------------------------------
// For VSP_IOC_GET_WAKEUP_REASON
// The Enum is clone of VSP_CPU_WAKE_UP_REASON
typedef enum {
VSP_IOC_WAKEUP_REASON_COLD = 0,
VSP_IOC_WAKEUP_REASON_BUTTON, /* User press button */
VSP_IOC_WAKEUP_REASON_KEYWORD, /* User Speak a keyword */
VSP_IOC_WAKEUP_REASON_WIFI, /* WiFi Module */
VSP_IOC_WAKEUP_REASON_RTC, /* RTC */
VSP_IOC_WAKEUP_REASON_CHARGER, /* Wall plug */
} VSP_IOC_WAKE_UP_REASON;
//-------------------------------------------------------------------------------------------------
// For VSP_IOC_SET_PARAM
typedef struct {
void *addr;
unsigned int size;
} VSP_IOC_PARAM;
//=================================================================================================
// For VSP
#define VSP_IOC_SWITCH_MODE (0x0101) /* VSP_IOC_MODE_TYPE */
#define VSP_IOC_LOAD_DSP (0x0102) /* VSP_IOC_FIRMWARE */
#define VSP_IOC_LOAD_MCU (0x0103) /* VSP_IOC_FIRMWARE */
#define VSP_IOC_START_STREAM (0x0104)
#define VSP_IOC_STOP_STREAM (0x0105)
#define VSP_IOC_GET_INFO (0x0106) /* VSP_IOC_INFO */
#define VSP_IOC_SET_TIMEOUT (0x0107) /* integer, millisecond */
#define VSP_IOC_GET_CONTEXT (0x0108) /* VSP_IOC_CONTEXT */
#define VSP_IOC_PUT_CONTEXT (0x0109) /* VSP_IOC_CONTEXT */
#define VSP_IOC_SET_PARAM (0x010A) /* VSP_IOC_PARAM */
#define VSP_IOC_EXIT_VSP (0x010B) /* VSP_IOC_EXIT_VSP */
#define VSP_IOC_ENTER_UPGRADE (0x010C) /* VSP_IOC_ENTER_UPGRADE */
#define VSP_IOC_GET_MCU_COMMAND (0x010D) /* VSP_IOC_GET_MCU_COMMAND */
// For LED
#define VSP_IOC_PUT_LED_FRAME (0x0201) /* VSP_IOC_LED_FRAME */
#define VSP_IOC_FLUSH_LED_QUEUE (0x0202)
// For Standby
#define VSP_IOC_GET_WAKEUP_REASON (0x0301) /* VSP_IOC_WAKE_UP_REASON */
#define VSP_IOC_SET_WAKEUP_MASK (0x0302) /* unsigned int */
// For PMU
#define VSP_IOC_GET_BATTERY_VOLTAGE (0x0403) /* unsigned, 0 - 1024 */
#endif /* __VSP_IOCTL_H__ */
SkylarkApp 中使用的例子
见:/skylark/senseflow/compoents/vsp/sf_vsp_input.cc
senseflow是我们的一个专门做信号处理的系统,包括音频的获取,离线识别的处理等。在另外的章节专门讲解。
单独使用的参考例子
下面给出一个用C语音实现的获取算法处理后的音频的例子
vsp_read.h
#ifndef __VSP_READ_H__
#define __VSP_READ_H__
int vsp_open(void);
void vsp_close(void);
int vsp_read(int fd, int *vad, bool readOn);
#endif
vsp_read.c
#include <vector>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <pthread.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include "vsp_ioctl.h"
#include "udp/UdpClient.h"
#include "udp/UdpServer.h"
#define VSP_DEV_PATH "/dev/" VSP_DEVICE_NAME
static int _vspFd;
static VSP_IOC_INFO _info;
static VSP_IOC_CONTEXT _contextTemplate;
static short * context_buffer = NULL;
int vsp_open(void)
{
// Open Device
_vspFd = open(VSP_DEV_PATH, O_RDWR);
if (_vspFd < 0) {
printf("Failed to open device\n");
return -1;
}
// Get Context size
int result = ioctl(_vspFd, VSP_IOC_GET_INFO, &_info);
if (result) {
printf("Failed to get system info!\n");
return -1;
}
printf("==========================================================\n");
printf(" Message Version: %x\n", _info.msg_version);
printf(" Kernel IOC Version: %x\n", _info.ioc_version);
printf(" SenseFlow IOC Version: %x\n", VSP_IOC_VERSION);
printf(" Sample Rate: %d (Hz)\n", _info.sample_rate);
printf(" MIC Channel Number: %d (channels)\n", _info.mic_num);
printf(" REF Channel Number: %d (channels)\n", _info.ref_num);
printf(" Output Channel Number: %d (channels)\n", _info.out_num);
printf(" Audio Frame Length: %d (ms)\n", _info.frame_length);
printf(" Audio Frame Num in Context: %d (frames)\n", _info.frame_num);
printf(" Features Dimension: %d (dims)\n", _info.features_dim);
printf("==========================================================\n");
// Reserve input/output buffers
unsigned int channel_size = _info.frame_length * _info.frame_num * _info.sample_rate / 1000;
unsigned int channel_num = _info.mic_num + _info.ref_num + _info.out_num;
unsigned int buffer_size = channel_size * channel_num * sizeof(short);
context_buffer = (short *)malloc(buffer_size);
if (!context_buffer) {
printf("Failed to allocate %d bytes for context buffer!\n", buffer_size);
return -1;
}
// Fill Context Template
memset(&_contextTemplate, 0, sizeof(_contextTemplate));
for (unsigned int i = 0; i < _info.mic_num; i++) {
_contextTemplate.mic_buffer[i].addr = context_buffer + i * channel_size;
_contextTemplate.mic_buffer[i].size = channel_size * sizeof(short);
}
for (unsigned int i = 0; i < _info.ref_num; i++) {
_contextTemplate.ref_buffer[i].addr = context_buffer + (_info.mic_num + i) * channel_size;
_contextTemplate.ref_buffer[i].size = channel_size * sizeof(short);
}
_contextTemplate.out_buffer.addr = context_buffer + (_info.mic_num + _info.ref_num) * channel_size;
_contextTemplate.out_buffer.size = _info.out_num * channel_size * sizeof(short);
result = ioctl(_vspFd, VSP_IOC_SWITCH_MODE, VSP_IOC_MODE_ACTIVE);
if(result)
{
printf("Failed to switch to ACTIVE mode!\n");
return -1;
}
return 0;
}
void vsp_close(void)
{
if (_vspFd >= 0)
close(_vspFd);
if (context_buffer) {
free(context_buffer);
context_buffer = NULL;
}
}
int vsp_read(int fd, int *vad, bool readOn)
{
*vad = 0;
if (_vspFd < 0)
return -1;
VSP_IOC_CONTEXT context = _contextTemplate;
int result = ioctl(_vspFd, VSP_IOC_GET_CONTEXT, &context);
if (get_azero_mute_status())
return 0;
if (!result) {
// copy out to output
if (readOn) {
if (context.out_buffer.size == _contextTemplate.out_buffer.size)
{
int channel_size = _info.frame_length * _info.frame_num * _info.sample_rate / 1000;
if (context.out_buffer.size >= channel_size * sizeof(short) * 2)
write(fd, context.out_buffer.addr + channel_size * sizeof(short), channel_size * sizeof(short));
else
write(fd, context.out_buffer.addr, channel_size * sizeof(short));
}
}
if (context.kws) {
std::string sendInfo = "kws::" + std::to_string(context.direction);
UdpClient::SendInfo(sendInfo.c_str(), sendInfo.length());
}
*vad = context.vad;
return context.kws;
}
else {
printf("Failed to get context!");
}
return 0;
}