(此处为http上传音频识别模式)
一,找到百度AI平台,管理台点击SDK下载 选择c#sdk
下载完以后会有对应不同.net版本的dll包,找到自己unity对应的。net版本,将文件夹拖入
然后找到你的应用管理,点击管理,在这里要用到一个apikey 一个secretKey,将这两个值复制,识别会用到,至于id展示不需要使用
二,实现录音-上传识别
1,录音,可以百度到很多教程,基于Microphone类实现。不作阐释。该类可以设置一直录制和指定时间录制,这里采取第二种,指定录制时间,结束后停止,上传识别,如果需要一直识别,可以在拿到结果后或者上传后再次开始录制,根据需求这里是拿到结果后才开始新的录制。另外一种原理类似,只是节点不同。
2,拿到音频数据后,即录音完成后,要使用UnityWebRequest来进行上传。在此之前,要先访问百度平台拿取一次token,做识别认证(官方有说明)。重点:http的token获取地址在这里,官网教程找到REST API文档:要使用的是这里的token地址不然很容易出现认证失败。
在该文档中找到上传地址,在token地址下方,翻找,如下:
注意:一定要确保你的sdk,apikey,secretkey是同一应用账号下的,就是说你要使用自己的,相对应的,否则是无法上传识别成功的。
三,上代码:
这里是我实现了一个识别类,进行了简单封装,相关点都有注释,可做参考:
using Baidu.Aip.Speech;
using LitJson;
using Newtonsoft.Json.Linq;
using System;
using System.Collections;
using System.Collections.Generic;
using System.Net.Security;
using System.Security.Cryptography.X509Certificates;
using UnityEngine;
using UnityEngine.Networking;
namespace BaiDuVoice
{
public class AccessToken
{
public string access_token;
public int expires_in;
public string session_key;
public string scope;
public string refresh_token;
public string session_secret;
}
//识别结果,用于json序列化,请勿改动
public class Recognizeresult
{
public string corpus_no;
public string err_msg;
public int err_no;
/// <summary>
/// 语音识别到的结果
/// </summary>
public List<string> result;
public string sn;
}
public enum Language
{
None=1535,
Chinese=1536,//无标点,可识别简单英文
StandardChinese =1537,//纯中文,有标点
English=1737,//纯英文,无标点
}
/// <summary>
/// 设置账户等信息
/// </summary>
public class SpeechSetting
{
public string AppID;
public string ApiKey;
public string SecretKey;
/// <summary>
/// 时间间隔
/// </summary>
public int SpaceTime;
}
public delegate void OnRecongnition( Recognizeresult msg);//识别结果的回调
/// <summary>
/// 识别
/// </summary>
public class BaiDuSpeech : MonoBehaviour
{
public static bool Available;//是否可用
private SpeechSetting setting;//配置信息
private string GetTokenUrl = "https://openapi.baidu.com/oauth/2.0/token";//获取token的地址
private string token = "";//拿到的token
private string CongnitionAddress = "http://vop.baidu.com/server_api";//识别地址
private string recordStr = "";//识别出来的字符串
//录制
private int rate = 16000;//采样率
private string deviceName;//设备名称
private AudioClip clip;//
private int recordTime =3;//录制时长 s
//private AudioSource sour;
/// <summary>
/// 识别结果回调
/// </summary>
public OnRecongnition OnRecongnition;
private Recognizeresult Reg_result;//识别结果
private bool isRecongnting = false;//是否正在识别
private Coroutine cor;
private WaitForSeconds waittime;
public void Init(SpeechSetting setting)
{
this.setting = setting;
recordTime = setting.SpaceTime;
waittime = new WaitForSeconds(recordTime);
if (Microphone.devices.Length > 0)
{
deviceName = Microphone.devices[0];
//sour = gameObject.AddComponent<AudioSource>();
Available = true;
Debug.Log("获取token");
GetToken();
}
else
{
Available = false;
Debug.LogError("未发现可用mic设备");
}
}
private void GetToken()
{
WWWForm from = new WWWForm();
from.AddField("grant_type", "client_credentials");
from.AddField("client_id", setting .ApiKey);
from.AddField("client_secret", setting .SecretKey );
StartCoroutine(HttpPostRequest(GetTokenUrl , from));
}
IEnumerator HttpPostRequest(string urls, WWWForm form)
{
string url = string.Format("{0}?grant_type={1}&client_id={2}&client_secret={3}", GetTokenUrl , "client_credentials", setting .ApiKey , setting .SecretKey );
Debug.Log(url);
UnityWebRequest Request = UnityWebRequest.Post(url, form);
yield return Request.SendWebRequest();
if (Request.isNetworkError)
{
Debug .Log ("授权失败" + Request.error);
Reg_result.err_msg = "授权失败" + Request.error;
SendMsg();
}
else
{
if (Request.responseCode == 200)
{
string result = Request.downloadHandler.text;
Debug .Log ("成功获取数据:" + result);
OnGetToken(result);
}
else
{
Debug .Log ("状态码错误:" + Request.responseCode+"\n 授权数据:"+ Request.downloadHandler.text);
Reg_result.err_msg = "状态码错误:" + Request.responseCode;
SendMsg();
}
}
}
/// <summary>
/// 当成功获取到服务器返回的json数据,进行解析
/// </summary>
private void OnGetToken(string res)
{
AccessToken accessToken = JsonMapper.ToObject<AccessToken>(res);
token = accessToken.access_token;
Debug.Log("token :" + token);
}
/// <summary>
/// 开始识别
/// </summary>
/// <param name="rate"></param>
/// <param name="loop">是否实时</param>
public void StartRecongnition()
{
if (isRecongnting) return;//
isRecongnting = true;
if (Reg_result ==null)
{
Reg_result = new Recognizeresult();
}
if (!Available)
{
Reg_result.err_msg = "mic不可用,请检查mic";
SendMsg();
}
else
{
if (cor != null)
{
StopCoroutine(cor);
}
cor = StartCoroutine(Recongnition());
}
}
/// <summary>
/// 结束识别
/// </summary>
public void StopRecongnition()
{
isRecongnting = false;
if (cor != null)
{
StopCoroutine(cor);
cor = null;
}
clip = null;
Reg_result = null;
}
//通知
private void SendMsg()
{
if (Reg_result == null) return;
if (OnRecongnition != null)
{
OnRecongnition(Reg_result);
}
//清除信息
Reg_result.err_msg ="";
}
IEnumerator Recongnition()
{
Debug.Log("开始识别");
//开始识别
clip = Microphone.Start(deviceName, false ,recordTime, rate);
yield return waittime;
//结束当前录制
Microphone.End(deviceName);
//sour.PlayOneShot(clip);
//将Audioclip填充到数组中
float[] samples = new float[rate * recordTime * clip.channels];
clip.GetData(samples, 0);
short[] sampleshort = new short[samples.Length];
for (int i = 0; i < samples.Length; i++)
{
sampleshort[i] = (short)(samples[i] * short.MaxValue);
}
byte[] data = new byte[samples.Length * 2];
Buffer.BlockCopy(sampleshort, 0, data, 0, data.Length);
//发送数据,识别
WWWForm form = new WWWForm();
string url = string.Format("{0}?cuid={1}&token={2}&dev_pid={3}", CongnitionAddress, SystemInfo.deviceUniqueIdentifier, token, "1536");//上传信息到百度
form.AddBinaryData("audio", data);
UnityWebRequest request = UnityWebRequest.Post(url, form);
request.SetRequestHeader("Content-Type", "audio/pcm;rate=" + rate);
yield return request.SendWebRequest();
if (request.isNetworkError)
{
print("Net error:" + request.error);
}
else
{
if (request.responseCode == 200)
{
string result = request.downloadHandler.text;
if (result.Contains("error"))
{
Debug.Log("识别失败:" + result);
Reg_result.err_msg = "识别失败: " + result;
}
else
{
Debug.Log("成功获取数据:" + result);
Reg_result = JsonMapper.ToObject<Recognizeresult>(result);
}
}
else
{
Debug.Log("状态码错误:" + request.responseCode);
Reg_result.err_msg = "状态码错误:" + request.responseCode;
}
}
SendMsg();
isRecongnting = false ;
}
}
}
调用测试:
private BaiDuSpeech baidu;//百度识别
private void Start()
{
baidu = gameObject.AddComponent<BaiDuSpeech>();
baidu.OnRecongnition += OnGetResult;
//要在初始化之前将事件注册上
SpeechSetting setting = new SpeechSetting()
{
AppID = "**********",//换成自己的就可以
ApiKey = "*******",//
SecretKey = "******",//
SpaceTime = 3,
};
baidu.Init(setting);
}
private void OnGetResult(Recognizeresult msg)
{
Debug.Log("msg.corpus_no " + msg.corpus_no + "\nmsg.err_msg" + msg.err_msg + " \nmsg.sn " + msg.sn);
if (msg.err_msg.Contains("success"))
{
Debug.Log("识别成功"+msg.result[0]);//result里边包含有可能的识别结果,一般取第一个
}
else
{
Debug.Log("识别失败");
}
}
亲测可用,项目使用中,如有疑问欢迎提出!