程序说明
该程序是继我上一次调用科大讯飞语音合成离线SDK,用Python写了一个文本转语音的程序的进一步开发,这次开发是用pyqt5模块和其自带的designer做了一个gui界面。
程序展示
该程序我分为了三大板块进行编写,第一板块是对科大讯飞语音合成离线的SDK,第二板块是对调用pyqt5对gui界面的编写,第三板块是主函数的编写。
第一板块SDK的调用
import ctypes
import wave
import time
from ctypes import cdll, c_int, byref, string_at
frame_rate = 8000 # 频率
MSP_SUCCESS = 0 # 成功标识
MSP_TTS_FLAG_STILL_HAVE_DATA = 1
MSP_TTS_FLAG_DATA_END = 2 # 结束标识
MSP_TTS_FLAG_CMD_CANCELED = 4
"""
LoadLibrary: 将指定的模块加载到调用进程的地址空间中(C++)
MSPLogin: 初始化msc,用户登录
QTTSSessionBegin: 开始一次语音合成,分配语音合成资源
QTTSTextPut: 写入要合成的文本
QTTSAudioGet: 获取合成音频
QTTSSessionEnd: 结束本次语音合成
MSPLogout: 退出登录
"""
# 登入科大讯飞离线语音合成SDK
def login(login_dll,login_params):
dll=login_dll
params=login_params
# ret为0时,则登入成功
ret=dll.MSPLogin(None, None, params)
if ret!=MSP_SUCCESS:
print("登入失败")
print(ret)
else:
print("登入成功")
# 开始一次语音合成,分配语音合成资源
def QTTS_Session_Begin(login_dll,session_begin_params):
dll=login_dll
# 将session_begin_params以指定的编码格式编码字符串,格式为utf-8
session_begin_params_bytes = bytes(session_begin_params, 'UTF-8')
# python中c语音int型
error_code = c_int()
# python中c语音char型
dll.QTTSSessionBegin.restype = ctypes.c_char_p
sessionID = dll.QTTSSessionBegin(session_begin_params_bytes, byref(error_code))
if error_code.value!=0 :
print(f'调用失败,错误码 {error_code.value}')
else:
print("调用成功")
return sessionID
# 写入要合成的文本
def QTTS_Text_PUT(login_dll,session_ID,text):
dll = login_dll
# 将text以指定的编码格式编码字符串,格式为utf-8
text = text.encode('UTF-8')
ret = dll.QTTSTextPut(session_ID, text, len(text), None)
if ret!=MSP_SUCCESS:
print("文本写入失败")
else:
print("文本写入成功")
# 获取合成音频
def QTTS_Audio_Get(login_dll,session_ID,wavFile):
audio_len, synth_status, getret = c_int(), c_int(), c_int()
dll = login_dll
dll.QTTSAudioGet.restype = ctypes.c_void_p
pdata = bytes()
while True:
pdata = dll.QTTSAudioGet(session_ID, byref(audio_len),
byref(synth_status), byref(getret))
# print(getret.value)
# print(synth_status.value)
if getret.value != MSP_SUCCESS:
break
if pdata:
data = string_at(pdata, audio_len.value)
# 将wav_data转换为二进制数据写入文件
wavFile.writeframes(data)
if synth_status.value == MSP_TTS_FLAG_DATA_END:
break
time.sleep(0.1) # 这里为官方建议,可以去除不使用,避免转换时间过长
wavFile.close()
#结束本次语音合成
def QTTS_Session_End(login_dll,session_ID):
dll = login_dll
dll.QTTSSessionEnd(session_ID, "Normal")
dll.MSPLogout()
第二板块gui界面的编写
# -*- coding: utf-8 -*-
# Form implementation generated from reading ui file 'xiaoyuzhuanhuan.ui'
#
# Created by: PyQt5 UI code generator 5.15.4
#
# WARNING: Any manual changes made to this file will be lost when pyuic5 is
# run again. Do not edit this file unless you know what you are doing.
from PyQt5 import QtCore, QtGui, QtWidgets
class Ui_MainWindow(object):
def setupUi(self, MainWindow):
MainWindow.setObjectName("小鱼语音转换")
MainWindow.resize(590, 576)
self.centralwidget = QtWidgets.QWidget(MainWindow)
self.centralwidget.setObjectName("centralwidget")
self.textBrowser = QtWidgets.QTextBrowser(self.centralwidget)
self.textBrowser.setGeometry(QtCore.QRect(0, 460, 400, 60))
self.textBrowser.setObjectName("textBrowser")
self.label = QtWidgets.QLabel(self.centralwidget)
self.label.setGeometry(QtCore.QRect(0, 440, 72, 15))
self.label.setObjectName("label")
self.textEdit = QtWidgets.QTextEdit(self.centralwidget)
self.textEdit.setEnabled(True)
self.textEdit.setGeometry(QtCore.QRect(0, 30, 400, 400))
self.textEdit.setObjectName("textEdit")
self.label_2 = QtWidgets.QLabel(self.centralwidget)
self.label_2.setGeometry(QtCore.QRect(0, 10, 72, 15))
self.label_2.setObjectName("label_2")
self.pushButton = QtWidgets.QPushButton(self.centralwidget)
self.pushButton.setGeometry(QtCore.QRect(410, 270, 171, 251))
self.pushButton.setStyleSheet("font: 14pt \"仿宋\";")
self.pushButton.setIconSize(QtCore.QSize(100, 40))
self.pushButton.setObjectName("pushButton")
self.splitter = QtWidgets.QSplitter(self.centralwidget)
self.splitter.setGeometry(QtCore.QRect(410, 30, 171, 221))
self.splitter.setOrientation(QtCore.Qt.Vertical)
self.splitter.setObjectName("splitter")
self.label_3 = QtWidgets.QLabel(self.splitter)
self.label_3.setObjectName("label_3")
self.checkBox = QtWidgets.QCheckBox(self.splitter)
self.checkBox.setObjectName("checkBox")
self.checkBox_2 = QtWidgets.QCheckBox(self.splitter)
self.checkBox_2.setObjectName("checkBox_2")
self.label_4 = QtWidgets.QLabel(self.splitter)
self.label_4.setObjectName("label_4")
self.lineEdit = QtWidgets.QLineEdit(self.splitter)
self.lineEdit.setObjectName("lineEdit")
self.label_5 = QtWidgets.QLabel(self.splitter)
self.label_5.setObjectName("label_5")
self.lineEdit_2 = QtWidgets.QLineEdit(self.splitter)
self.lineEdit_2.setObjectName("lineEdit_2")
MainWindow.setCentralWidget(self.centralwidget)
self.menubar = QtWidgets.QMenuBar(MainWindow)
self.menubar.setGeometry(QtCore.QRect(0, 0, 590, 26))
self.menubar.setObjectName("menubar")
MainWindow.setMenuBar(self.menubar)
self.statusbar = QtWidgets.QStatusBar(MainWindow)
self.statusbar.setObjectName("statusbar")
MainWindow.setStatusBar(self.statusbar)
self.retranslateUi(MainWindow)
QtCore.QMetaObject.connectSlotsByName(MainWindow)
def retranslateUi(self, MainWindow):
_translate = QtCore.QCoreApplication.translate
MainWindow.setWindowTitle(_translate("MainWindow", "小鱼语音转换"))
self.label.setText(_translate("MainWindow", "转换结果"))
self.label_2.setText(_translate("MainWindow", "文本输入"))
self.pushButton.setText(_translate("MainWindow", "转换"))
self.label_3.setText(_translate("MainWindow", "音色"))
self.checkBox.setText(_translate("MainWindow", "xiaoyan"))
self.checkBox_2.setText(_translate("MainWindow", "xiaofeng"))
self.label_4.setText(_translate("MainWindow", "语速"))
self.label_5.setText(_translate("MainWindow", "音量"))
第三板块主函数的编写
from PyQt5.QtWidgets import QApplication, QMainWindow, QFileDialog
import sys
import xiaoyuzhuanhuan # UI文件
import yuyin_Y
class MainCode(QMainWindow, xiaoyuzhuanhuan.Ui_MainWindow):
def __init__(self):
QMainWindow.__init__(self)
xiaoyuzhuanhuan.Ui_MainWindow.__init__(self)
self.setupUi(self)
# 给 pushButton 这个控件加上点击事件的响应函数zhuanhuand
self.pushButton.clicked.connect(self.zhuanhuan)
def zhuanhuan(self):
self.textBrowser.clear()
success_flag = "转换成功"
voice_name=""
yuyin_path=""
zhuanhuan_text= self.textEdit.toPlainText() # 获得textEdit输入框的内容
zhuanhuan_yusu=self.lineEdit.text()
zhuanhuan_yinliang=self.lineEdit_2.text()
if zhuanhuan_text==None:
success_flag="未输入文本"
if zhuanhuan_yinliang==None:
success_flag = "未输入音量大小"
if zhuanhuan_yusu==None:
success_flag = "未输入语速大小"
if self.checkBox.isChecked():
voice_name="xiaoyan"
yuyin_path="fo | D:\\opencv\\test_yuyin\\Windows_aisound_exp1226_6e05e282\\bin\\msc\\res\\tts\\xiaoyan.jet;fo|D:\\opencv\\test_yuyin\\Windows_aisound_exp1226_6e05e282\\bin\\msc\\res\\tts\\common.jet"
if self.checkBox_2.isChecked():
voice_name="xiaofeng"
yuyin_path="fo | D:\\opencv\\test_yuyin\\Windows_aisound_exp1226_6e05e282\\bin\\msc\\res\\tts\\xiaofeng.jet;fo|D:\\opencv\\test_yuyin\\Windows_aisound_exp1226_6e05e282\\bin\\msc\\res\\tts\\common.jet"
# else:
# success_flag = "未选择音色"
# 调用科大讯飞离线SDK
login_dll = yuyin_Y.cdll.LoadLibrary(r"/opencv\test_yuyin\Windows_aisound_exp1226_6e05e282\bin\msc_x64.dll")
# 账号登入
login_params = b"appid = 6e05e282, work_dir = ."
# 参数填写,实际参数去以下官网
# https: // www.xfyun.cn / doc / mscapi / Windows & Linux / wlapi.html # qtts-h-%E8%AF%AD%E9%9F%B3%E5%90%88%E6%88%90
session_begin_params = "engine_type=local, voice_name=%s, text_encoding=utf8, tts_res_path=%s, sample_rate=8000, speed=%s, volume=%s, pitch=50, rdn=2, effect=0, speed_increase=1, rcn=1"%(voice_name,yuyin_path,zhuanhuan_yusu,zhuanhuan_yinliang)
yuyin_Y.login(login_dll, login_params)
session_ID =yuyin_Y.QTTS_Session_Begin(login_dll, session_begin_params)
yuyin_Y.QTTS_Text_PUT(login_dll, session_ID,zhuanhuan_text)
# 1、打开WAV文档
wavFile = yuyin_Y.wave.open(r"yuyin_Y.wav", "wb")
# 2、配置声道数、量化位数和取样频率
wavFile.setnchannels(1)
wavFile.setsampwidth(2)
wavFile.setframerate(yuyin_Y.frame_rate)
yuyin_Y.QTTS_Audio_Get(login_dll, session_ID, wavFile)
yuyin_Y.QTTS_Session_End(login_dll, session_ID)
self.textBrowser.append(success_flag) # 在底部文本显示框显示 须为字符串
if __name__ == '__main__':
app = QApplication(sys.argv)
md = MainCode()
md.show()
sys.exit(app.exec_())
总结
该程序还存在一些问题,比如第一次语音转换之后,第二次使用时输出框依然显示转换成功,无法确定第二转换成功的确切时间。输出的文件名无法在gui进行变动和无法使文件保存在使用者想保存的文件夹。这些等等将在以后的文章里进一步完善。