程序说明

该程序是继我上一次调用科大讯飞语音合成离线SDK,用Python写了一个文本转语音的程序的进一步开发,这次开发是用pyqt5模块和其自带的designer做了一个gui界面。

程序展示

该程序我分为了三大板块进行编写,第一板块是对科大讯飞语音合成离线的SDK,第二板块是对调用pyqt5对gui界面的编写,第三板块是主函数的编写。

第一板块SDK的调用

import ctypes
import wave
import time
from ctypes import cdll, c_int, byref, string_at

frame_rate = 8000                 # 频率
MSP_SUCCESS = 0                   # 成功标识
MSP_TTS_FLAG_STILL_HAVE_DATA = 1
MSP_TTS_FLAG_DATA_END = 2         # 结束标识
MSP_TTS_FLAG_CMD_CANCELED = 4
"""
LoadLibrary: 将指定的模块加载到调用进程的地址空间中(C++)
MSPLogin: 初始化msc,用户登录
QTTSSessionBegin: 开始一次语音合成,分配语音合成资源
QTTSTextPut: 写入要合成的文本
QTTSAudioGet: 获取合成音频
QTTSSessionEnd: 结束本次语音合成
MSPLogout:  退出登录
"""

# 登入科大讯飞离线语音合成SDK
def login(login_dll,login_params):
    dll=login_dll
    params=login_params
    # ret为0时,则登入成功
    ret=dll.MSPLogin(None, None, params)
    if ret!=MSP_SUCCESS:
        print("登入失败")
        print(ret)
    else:
        print("登入成功")
# 开始一次语音合成,分配语音合成资源
def QTTS_Session_Begin(login_dll,session_begin_params):
    dll=login_dll
    # 将session_begin_params以指定的编码格式编码字符串,格式为utf-8
    session_begin_params_bytes = bytes(session_begin_params, 'UTF-8')
    # python中c语音int型
    error_code = c_int()
    # python中c语音char型
    dll.QTTSSessionBegin.restype = ctypes.c_char_p
    sessionID = dll.QTTSSessionBegin(session_begin_params_bytes, byref(error_code))
    if error_code.value!=0 :
        print(f'调用失败,错误码 {error_code.value}')
    else:
        print("调用成功")
    return sessionID
# 写入要合成的文本
def QTTS_Text_PUT(login_dll,session_ID,text):
    dll = login_dll
    # 将text以指定的编码格式编码字符串,格式为utf-8
    text = text.encode('UTF-8')
    ret = dll.QTTSTextPut(session_ID, text, len(text), None)
    if ret!=MSP_SUCCESS:
        print("文本写入失败")
    else:
        print("文本写入成功")
# 获取合成音频
def QTTS_Audio_Get(login_dll,session_ID,wavFile):
    audio_len, synth_status, getret = c_int(), c_int(), c_int()
    dll = login_dll
    dll.QTTSAudioGet.restype = ctypes.c_void_p
    pdata = bytes()
    while True:
        pdata = dll.QTTSAudioGet(session_ID, byref(audio_len),
                                 byref(synth_status), byref(getret))
        # print(getret.value)
        # print(synth_status.value)
        if getret.value != MSP_SUCCESS:
            break
        if pdata:
            data = string_at(pdata, audio_len.value)
            # 将wav_data转换为二进制数据写入文件
            wavFile.writeframes(data)

        if synth_status.value == MSP_TTS_FLAG_DATA_END:
            break
        time.sleep(0.1)  # 这里为官方建议,可以去除不使用,避免转换时间过长
    wavFile.close()
#结束本次语音合成
def QTTS_Session_End(login_dll,session_ID):
    dll = login_dll
    dll.QTTSSessionEnd(session_ID, "Normal")
    dll.MSPLogout()

第二板块gui界面的编写

# -*- coding: utf-8 -*-

# Form implementation generated from reading ui file 'xiaoyuzhuanhuan.ui'
#
# Created by: PyQt5 UI code generator 5.15.4
#
# WARNING: Any manual changes made to this file will be lost when pyuic5 is
# run again.  Do not edit this file unless you know what you are doing.


from PyQt5 import QtCore, QtGui, QtWidgets


class Ui_MainWindow(object):
    def setupUi(self, MainWindow):
        MainWindow.setObjectName("小鱼语音转换")
        MainWindow.resize(590, 576)
        self.centralwidget = QtWidgets.QWidget(MainWindow)
        self.centralwidget.setObjectName("centralwidget")
        self.textBrowser = QtWidgets.QTextBrowser(self.centralwidget)
        self.textBrowser.setGeometry(QtCore.QRect(0, 460, 400, 60))
        self.textBrowser.setObjectName("textBrowser")
        self.label = QtWidgets.QLabel(self.centralwidget)
        self.label.setGeometry(QtCore.QRect(0, 440, 72, 15))
        self.label.setObjectName("label")
        self.textEdit = QtWidgets.QTextEdit(self.centralwidget)
        self.textEdit.setEnabled(True)
        self.textEdit.setGeometry(QtCore.QRect(0, 30, 400, 400))
        self.textEdit.setObjectName("textEdit")
        self.label_2 = QtWidgets.QLabel(self.centralwidget)
        self.label_2.setGeometry(QtCore.QRect(0, 10, 72, 15))
        self.label_2.setObjectName("label_2")
        self.pushButton = QtWidgets.QPushButton(self.centralwidget)
        self.pushButton.setGeometry(QtCore.QRect(410, 270, 171, 251))
        self.pushButton.setStyleSheet("font: 14pt \"仿宋\";")
        self.pushButton.setIconSize(QtCore.QSize(100, 40))
        self.pushButton.setObjectName("pushButton")
        self.splitter = QtWidgets.QSplitter(self.centralwidget)
        self.splitter.setGeometry(QtCore.QRect(410, 30, 171, 221))
        self.splitter.setOrientation(QtCore.Qt.Vertical)
        self.splitter.setObjectName("splitter")
        self.label_3 = QtWidgets.QLabel(self.splitter)
        self.label_3.setObjectName("label_3")
        self.checkBox = QtWidgets.QCheckBox(self.splitter)
        self.checkBox.setObjectName("checkBox")
        self.checkBox_2 = QtWidgets.QCheckBox(self.splitter)
        self.checkBox_2.setObjectName("checkBox_2")
        self.label_4 = QtWidgets.QLabel(self.splitter)
        self.label_4.setObjectName("label_4")
        self.lineEdit = QtWidgets.QLineEdit(self.splitter)
        self.lineEdit.setObjectName("lineEdit")
        self.label_5 = QtWidgets.QLabel(self.splitter)
        self.label_5.setObjectName("label_5")
        self.lineEdit_2 = QtWidgets.QLineEdit(self.splitter)
        self.lineEdit_2.setObjectName("lineEdit_2")
        MainWindow.setCentralWidget(self.centralwidget)
        self.menubar = QtWidgets.QMenuBar(MainWindow)
        self.menubar.setGeometry(QtCore.QRect(0, 0, 590, 26))
        self.menubar.setObjectName("menubar")
        MainWindow.setMenuBar(self.menubar)
        self.statusbar = QtWidgets.QStatusBar(MainWindow)
        self.statusbar.setObjectName("statusbar")
        MainWindow.setStatusBar(self.statusbar)

        self.retranslateUi(MainWindow)
        QtCore.QMetaObject.connectSlotsByName(MainWindow)

    def retranslateUi(self, MainWindow):
        _translate = QtCore.QCoreApplication.translate
        MainWindow.setWindowTitle(_translate("MainWindow", "小鱼语音转换"))
        self.label.setText(_translate("MainWindow", "转换结果"))
        self.label_2.setText(_translate("MainWindow", "文本输入"))
        self.pushButton.setText(_translate("MainWindow", "转换"))
        self.label_3.setText(_translate("MainWindow", "音色"))
        self.checkBox.setText(_translate("MainWindow", "xiaoyan"))
        self.checkBox_2.setText(_translate("MainWindow", "xiaofeng"))
        self.label_4.setText(_translate("MainWindow", "语速"))
        self.label_5.setText(_translate("MainWindow", "音量"))

第三板块主函数的编写

from PyQt5.QtWidgets import QApplication, QMainWindow, QFileDialog
import sys
import xiaoyuzhuanhuan  # UI文件

import yuyin_Y

class MainCode(QMainWindow, xiaoyuzhuanhuan.Ui_MainWindow):
    def __init__(self):
        QMainWindow.__init__(self)
        xiaoyuzhuanhuan.Ui_MainWindow.__init__(self)
        self.setupUi(self)

        # 给 pushButton 这个控件加上点击事件的响应函数zhuanhuand
        self.pushButton.clicked.connect(self.zhuanhuan)

    def zhuanhuan(self):
        self.textBrowser.clear()

        success_flag = "转换成功"
        voice_name=""
        yuyin_path=""

        zhuanhuan_text= self.textEdit.toPlainText()  # 获得textEdit输入框的内容

        zhuanhuan_yusu=self.lineEdit.text()
        zhuanhuan_yinliang=self.lineEdit_2.text()
        if zhuanhuan_text==None:
            success_flag="未输入文本"

        if zhuanhuan_yinliang==None:
            success_flag = "未输入音量大小"
        if zhuanhuan_yusu==None:
            success_flag = "未输入语速大小"

        if self.checkBox.isChecked():
            voice_name="xiaoyan"
            yuyin_path="fo | D:\\opencv\\test_yuyin\\Windows_aisound_exp1226_6e05e282\\bin\\msc\\res\\tts\\xiaoyan.jet;fo|D:\\opencv\\test_yuyin\\Windows_aisound_exp1226_6e05e282\\bin\\msc\\res\\tts\\common.jet"
        if self.checkBox_2.isChecked():
            voice_name="xiaofeng"
            yuyin_path="fo | D:\\opencv\\test_yuyin\\Windows_aisound_exp1226_6e05e282\\bin\\msc\\res\\tts\\xiaofeng.jet;fo|D:\\opencv\\test_yuyin\\Windows_aisound_exp1226_6e05e282\\bin\\msc\\res\\tts\\common.jet"
        # else:
        #     success_flag = "未选择音色"

        # 调用科大讯飞离线SDK
        login_dll = yuyin_Y.cdll.LoadLibrary(r"/opencv\test_yuyin\Windows_aisound_exp1226_6e05e282\bin\msc_x64.dll")
        # 账号登入
        login_params = b"appid = 6e05e282, work_dir = ."
        # 参数填写,实际参数去以下官网
        # https: // www.xfyun.cn / doc / mscapi / Windows & Linux / wlapi.html  # qtts-h-%E8%AF%AD%E9%9F%B3%E5%90%88%E6%88%90
        session_begin_params = "engine_type=local, voice_name=%s, text_encoding=utf8, tts_res_path=%s, sample_rate=8000, speed=%s, volume=%s, pitch=50, rdn=2, effect=0, speed_increase=1, rcn=1"%(voice_name,yuyin_path,zhuanhuan_yusu,zhuanhuan_yinliang)

        yuyin_Y.login(login_dll, login_params)

        session_ID =yuyin_Y.QTTS_Session_Begin(login_dll, session_begin_params)

        yuyin_Y.QTTS_Text_PUT(login_dll, session_ID,zhuanhuan_text)

        # 1、打开WAV文档
        wavFile = yuyin_Y.wave.open(r"yuyin_Y.wav", "wb")
        # 2、配置声道数、量化位数和取样频率
        wavFile.setnchannels(1)
        wavFile.setsampwidth(2)
        wavFile.setframerate(yuyin_Y.frame_rate)
        yuyin_Y.QTTS_Audio_Get(login_dll, session_ID, wavFile)

        yuyin_Y.QTTS_Session_End(login_dll, session_ID)


        self.textBrowser.append(success_flag)  # 在底部文本显示框显示 须为字符串


if __name__ == '__main__':
    app = QApplication(sys.argv)
    md = MainCode()
    md.show()
    sys.exit(app.exec_())

总结

该程序还存在一些问题,比如第一次语音转换之后,第二次使用时输出框依然显示转换成功,无法确定第二转换成功的确切时间。输出的文件名无法在gui进行变动和无法使文件保存在使用者想保存的文件夹。这些等等将在以后的文章里进一步完善。