QT5.14.1简单实现百度语音识别
注:本教程通过bilibili网站习得,并对ui的布局简单进行了调整,在学习过程中还原了语音识别流程和全部源码(源码获取方法和使用见本文底部),根据学习流程,编写下面的教程,大家可以下载学习。
1.点击运行,按住“按住说话”不放鼠标开始音频录入,当松开后完成语音识别转换文字。先来看看运行效果图和UI所使用组件
2.语音识别编程流程:1.通过TCP协议向服务器发送id,secret,其中一个服务器会返回一个access_token(其有效期为1个月)2.再向另一个服务器发送音频文件access_token,并返回语音识别结果
3.头文件及cpp中代码
3.1 audio.h
#ifndef AUDIO_H
#define AUDIO_H
#include <QObject>
#include <QAudioFormat>
#include <QAudioDeviceInfo>
#include <QMessageBox>
#include <QAudioInput>
#include <QFile>
class Audio : public QObject
{
Q_OBJECT
public:
explicit Audio(QObject *parent = nullptr);
void startAudio(QString fileName);
void stopAudio();
signals:
private:
QAudioInput *m_audio;
QFile *m_file;
};
#endif // AUDIO_H
3.2 http.h
#ifndef HTTP_H
#define HTTP_H
#include <QObject>
#include <QMap>
#include <QNetworkAccessManager>
#include <QNetworkRequest>
#include <QMapIterator>
#include <QNetworkReply>
#include <QEventLoop>
#include <QDebug>
class Http : public QObject
{
Q_OBJECT
public:
explicit Http(QObject *parent = nullptr);
bool post_sync(QString Url,QMap<QString,QString> header,QByteArray requestData,QByteArray &replyData);
signals:
};
#endif // HTTP_H
3.3 speech.h
#ifndef SPEECH_H
#define SPEECH_H
#include <QObject>
#include <http.h>
#include <QJsonDocument>
#include <QJsonParseError>
#include <QJsonObject>
#include <QJsonArray>
#include <QHostInfo>
#include <QFile>
#include <QMessageBox>
//获取access_token相关
const QString baiduTokenUrl = "https://aip.baidubce.com/oauth/2.0/token?grant_type=client_credentials&client_id=%1&client_secret=%2&";
const QString client_id = "xxxxxxxxxxxx";
const QString client_secret = "xxxxxxxxxxxxxxx";
//语音识别相关
const QString baiduSpeechUrl = "http://vop.baidu.com/server_api?dev_pid=1537&cuid=%1&token=%2";
class Speech : public QObject
{
Q_OBJECT
public:
explicit Speech(QObject *parent = nullptr);
QString speechIdentify(QString fileName);
QString getJsonValue(QByteArray ba,QString key);
signals:
};
#endif // SPEECH_H
3.4 widget.h
#ifndef WIDGET_H
#define WIDGET_H
#include <QWidget>
#include <audio.h>
#include <speech.h>
QT_BEGIN_NAMESPACE
namespace Ui { class Widget; }
QT_END_NAMESPACE
class Widget : public QWidget
{
Q_OBJECT
public:
Widget(QWidget *parent = nullptr);
~Widget();
private slots:
void on_pushButton_pressed();
void on_pushButton_released();
void on_clearButton_clicked();
private:
Ui::Widget *ui;
Audio *audio;
};
#endif // WIDGET_H
3.5 audio.cpp
#include "audio.h"
Audio::Audio(QObject *parent) : QObject(parent)
{
}
void Audio::startAudio(QString fileName)
{
QAudioDeviceInfo device = QAudioDeviceInfo::defaultInputDevice();
if(device.isNull())//录音设备不存在
{
QMessageBox::warning(NULL,"QAudioDeviceInfo","录音设备不存在");
}
else
{
//音频编码要求
QAudioFormat m_format;
//设置采样频率
m_format.setSampleRate(16000);//采样率为16k
//设置通道
m_format.setChannelCount(1);
//设置位深
m_format.setSampleSize(16);
//设置编码格式
m_format.setCodec("aduio/pcm");
//判断设备是否支持该格式
if(!device.isFormatSupported(m_format))
{
m_format = device.nearestFormat(m_format);
}
//打开文件
m_file = new QFile;
m_file->setFileName(fileName);
m_file->open(QIODevice::WriteOnly);//向里面写数据
//创建录音对象
m_audio = new QAudioInput(m_format,this);
m_audio->start(m_file);
}
}
void Audio::stopAudio()
{
//停止录音
m_audio->stop();
//关闭文件
m_file->close();
//删除文件对象
delete m_file;
m_file = NULL;
}
3.6 http.cpp
#include "http.h"
Http::Http(QObject *parent) : QObject(parent)
{
}
bool Http::post_sync(QString Url, QMap<QString, QString> header, QByteArray requestData, QByteArray &replyData)
{
//发送请求对象
QNetworkAccessManager manager;
//请求对象
QNetworkRequest request;
request.setUrl(Url);
QMapIterator<QString,QString> it(header);//迭代器
while (it.hasNext()) //遍历map对象
{
it.next();
request.setRawHeader(it.key().toLatin1(),it.value().toLatin1());
}
QNetworkReply *reply = manager.post(request,requestData);
//服务器返回,reply会发出信号
QEventLoop l;
connect(reply,&QNetworkReply::finished,&l,&QEventLoop::quit);
l.exec();//死循环,reply发出信号才结束循环
if(reply != nullptr && reply->error() == QNetworkReply::NoError)
{
replyData = reply->readAll();//读取服务器返回的数据
//qDebug() << replyData;
return true;
}
else
{
qDebug()<<"请求失败";
return false;
}
}
3.7 main.cpp
#include "widget.h"
#include <QApplication>
int main(int argc, char *argv[])
{
QApplication a(argc, argv);
Widget w;
w.show();
return a.exec();
}
3.8 speech.cpp
#include "speech.h"
Speech::Speech(QObject *parent) : QObject(parent)
{
}
QString Speech::speechIdentify(QString fileName)
{
QString accessToken;
//获取access_token
QString tokenUrl = QString(baiduTokenUrl).arg(client_id).arg(client_secret);
QMap<QString,QString> header;
header.insert(QString("Content-Type"),QString("audio/pcm;rate=16000"));
QByteArray requestData;//发送具体内容
QByteArray replyData;//服务器返回内容
Http m_http;
bool result = m_http.post_sync(tokenUrl,header,requestData,replyData);
if(result)
{
QString key = "access_token";
accessToken = getJsonValue(replyData,key);
//qDebug()<<accessToken;
}
else
{
}
//组合URL
QString speechUrl = QString(baiduSpeechUrl).arg(QHostInfo::localHostName()).arg(accessToken);
//把文件转化为QByteArray;
QFile file;
file.setFileName(fileName);
file.open(QIODevice::ReadOnly);
requestData = file.readAll();
file.close();
replyData.clear();
//再次发送请求
result = m_http.post_sync(speechUrl,header,requestData,replyData);
if(result)
{
QString key = "result";
QString text = getJsonValue(replyData,key);
return text;
//qDebug()<<accessToken;
}
else
{
QMessageBox::warning(NULL,"识别提示","识别失败");
}
return "";
}
QString Speech::getJsonValue(QByteArray ba, QString key)
{
QJsonParseError parseError;
QJsonDocument jsonDocument = QJsonDocument::fromJson(ba,&parseError);
if(parseError.error == QJsonParseError::NoError)
{
if(jsonDocument.isObject())
{
//jsonDocument转化为json对象
QJsonObject jsonObj = jsonDocument.object();
if(jsonObj.contains(key))
{
QJsonValue jsonVal= jsonObj.value(key);
if(jsonVal.isString())//说明是个字符串
{
return jsonVal.toString();
}
else if(jsonVal.isArray())//数组
{
QJsonArray arr =jsonVal.toArray();//转换成jsonArray
QJsonValue jv=arr.at(0); //获取第一个元素
return jv.toString();
}
}
}
}
return "";
}
3.9 speech.cpp
#include "widget.h"
#include "ui_widget.h"
Widget::Widget(QWidget *parent)
: QWidget(parent)
, ui(new Ui::Widget)
{
ui->setupUi(this);
ui->pushButton->setText("按住说话");
}
Widget::~Widget()
{
delete ui;
}
void Widget::on_pushButton_pressed()
{
ui->pushButton->setText("松开识别");
//开始录音
audio = new Audio;
audio->startAudio("E:\\QT document\\baiduSpeech\\audio.pcm");
}
void Widget::on_pushButton_released()
{
//停止录音
audio->stopAudio();
//修改按钮文字
ui->pushButton->setText("开始识别");
//开始识别
Speech m_speech;
QString text = m_speech.speechIdentify("E:\\QT document\\baiduSpeech\\audio.pcm");
ui->textEdit->setText(text);
ui->pushButton->setText("按住说话");
}
void Widget::on_clearButton_clicked()
{
ui->textEdit->clear();
}
4.相关URL地址
(1)百度智能云获取地址
(2)baiduTokenUrl获取地址
(3)baiduSpeechUrl获取地址
(4)打包好的程序exe百度云地址提取码:txpf,若是不会打包可以看我的另一篇博文
(5)源码地址(包含已经创建好的Id,Secret,源程序和打包好的发行版本exe)
5.源码使用方法:
注:1.程序需要访问百度服务器并使用百度语音识别的API,在程序运行之前需要修改client_id,client_secret,所以在此之前需要在百度智能云中创建应用
2.程序运行会生成一个audio.pcm,需要更改pcm音频的存放位置
百度智能云创建应用及Id,Secret获取
1.打开网址 用百度云账号登录,并创建应用
2.创建完成后在应用列表可以看见相关信息,需要将这里的两个值替换到相应位置(speech.h头文件中)
pcm存放位置更改
1.在widget.cpp中,更改为你所创建的文件目录中,注意此处此处需要用到转义“\”,见下图