1. 程式人生 > >Unity整合百度語音識別和合成--REST API

Unity整合百度語音識別和合成--REST API

直接上unity的C#指令碼程式碼

百度語音識別

using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using System.Xml;
using LitJson;
using System.Text;
using System;
using UnityEngine.UI;
using System.IO;

public class showVoiceResult1 : MonoBehaviour {

    private string token;                           //access_token
private string cuid = "liang"; //使用者標識 private string format = "wav"; //語音格式 private int rate = 8000; //取樣率 private int channel = 1; //聲道數 private string speech; //語音資料,進行base64編碼 private
int len; //原始語音長度 private string lan = "zh"; //語種 private string grant_Type = "client_credentials"; private string client_ID = "********"; //百度appkey private string client_Secret = "******"; //百度Secret Key
private string baiduAPI = "http://vop.baidu.com/server_api"; private string getTokenAPIPath = "https://openapi.baidu.com/oauth/2.0/token"; private byte[] clipByte; public Text debugText; /// <summary> /// /// 轉換出來的TEXT /// </summary> public static string audioToString; private AudioSource aud; private int audioLength;//錄音的長度 void Start () { } // Update is called once per frame void Update () { debugText.text = audioToString; } /// <summary> /// 獲取百度使用者令牌 /// </summary> /// <param name="url">獲取的url</param> /// <returns></returns> private IEnumerator GetToken(string url) { WWWForm getTForm = new WWWForm(); getTForm.AddField("grant_type", grant_Type); getTForm.AddField("client_id", client_ID); getTForm.AddField("client_secret", client_Secret); WWW getTW = new WWW(url, getTForm); yield return getTW; if (getTW.isDone) { if (getTW.error == null) { token = JsonMapper.ToObject(getTW.text)["access_token"].ToString(); StartCoroutine(GetAudioString(baiduAPI)); } else Debug.LogError(getTW.error); } } private IEnumerator GetAudioString(string url) { JsonWriter jw = new JsonWriter(); jw.WriteObjectStart(); jw.WritePropertyName("format"); jw.Write(format); jw.WritePropertyName("rate"); jw.Write(rate); jw.WritePropertyName("channel"); jw.Write(channel); jw.WritePropertyName("token"); jw.Write(token); jw.WritePropertyName("cuid"); jw.Write(cuid); jw.WritePropertyName("len"); jw.Write(len); jw.WritePropertyName("speech"); jw.Write(speech); jw.WriteObjectEnd(); WWWForm w = new WWWForm(); WWW getASW = new WWW(url, Encoding.Default.GetBytes(jw.ToString())); yield return getASW; if (getASW.isDone) { if (getASW.error == null) { JsonData getASWJson = JsonMapper.ToObject(getASW.text); if (getASWJson["err_msg"].ToString() == "success.") { audioToString = getASWJson["result"][0].ToString(); if (audioToString.Substring(audioToString.Length - 1) == ",") audioToString = audioToString.Substring(0, audioToString.Length - 1); Debug.Log(audioToString); } } else { Debug.LogError(getASW.error); } } } public void StartMic() { if (Microphone.devices.Length == 0) { Debug.Log ("no devices"); return; } Microphone.End(null); Debug.Log("Start"); Debug.Log(Microphone.devices); aud.clip = Microphone.Start("Built-in Microphone", false, 10, rate); } /// <summary> /// 結束錄音 /// </summary> public void EndMic() { int lastPos = Microphone.GetPosition(null); if (Microphone.IsRecording(null)) audioLength = lastPos / rate;//錄音時長 else audioLength = 10; Debug.Log("Stop"); Microphone.End(null); clipByte = GetClipData(); len = clipByte.Length; speech = Convert.ToBase64String(clipByte); StartCoroutine(GetToken(getTokenAPIPath)); Debug.Log(len); Debug.Log(audioLength); } /// <summary> /// 把錄音轉換為Byte[] /// </summary> /// <returns></returns> public byte[] GetClipData() { if (aud.clip == null) { Debug.LogError("錄音資料為空"); return null; } float[] samples = new float[aud.clip.samples]; aud.clip.GetData(samples, 0); byte[] outData = new byte[samples.Length * 2]; int rescaleFactor = 32767; //to convert float to Int16 for (int i = 0; i < samples.Length; i++) { short temshort = (short)(samples[i] * rescaleFactor); byte[] temdata = System.BitConverter.GetBytes(temshort); outData[i * 2] = temdata[0]; outData[i * 2 + 1] = temdata[1]; } if (outData == null || outData.Length <= 0) { Debug.LogError("錄音資料為空"); return null; } //return SubByte(outData, 0, audioLength * 8000 * 2); return outData; } }

百度語音主要借鑑了另一篇文章,地址忘了

百度語音合成部分

using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using System.Xml;
using LitJson;
using System.Text;
using System;
using UnityEngine.UI;
using System.IO;

public class showTextTTSResult : MonoBehaviour {

    private string text;  //user input text
    private string token;                           //access_token
    private string cuid = "***";                  //current user id
    private int ctp = 1; // client type choose, web is only value 1
    private string lan = "zh"; 
    private int spd = 5;
    private int pit = 5;
    private int vol = 5;
    private int per = 3;    //person voice 

    private string grant_Type = "client_credentials";
    private string client_ID = "****";                       //百度appkey
    private string client_Secret = "****";                   //百度Secret Key

    private string baiduAPI = "http://tsn.baidu.com/text2audio";
    private string getTokenAPIPath = "https://openapi.baidu.com/oauth/2.0/token";

    private byte[] clipByte;
    public Text debugText;
    public Text debugMsg;

    /// <summary>
    /// 
    /// 轉換出來的TEXT
    /// </summary>
    public static string audioToString;

    private AudioSource aud;
    private int audioLength;//錄音的長度
    private string filePath;
    void Start () {

    }

    // Update is called once per frame
    void Update () {
        /*if (audioToString != null) {
            debugText.text = audioToString;
        }*/


    }
    /// <summary>
    /// get token
    /// </summary>
    /// <param name="url">url</param>
    /// <returns></returns>
    private IEnumerator GetToken(string url)
    {
        WWWForm getTForm = new WWWForm();
        getTForm.AddField("grant_type", grant_Type);
        getTForm.AddField("client_id", client_ID);
        getTForm.AddField("client_secret", client_Secret);

        WWW getTW = new WWW(url, getTForm);
        yield return getTW;

        Debug.Log (getTW.text);
        if (getTW.isDone)
        {
            if (getTW.error == null)
            {
                token = JsonMapper.ToObject(getTW.text)["access_token"].ToString();
                Debug.Log (token);
                debugMsg.text += "token:"+token+"\n";
                //StartCoroutine(GetAudioString(baiduAPI));
                StartCoroutine(GetTextAudio(baiduAPI));
            }
            else
                Debug.LogError(getTW.error);
        }
    }

    private IEnumerator GetTextAudio(string url){
        //url?lan ctp  cuid  tok tex vol per spd pit
        WWWForm getTForm = new WWWForm();
        getTForm.AddField ("lan", lan);
        getTForm.AddField ("ctp", ctp);
        getTForm.AddField ("cuid", cuid);
        getTForm.AddField ("tok", token);
        getTForm.AddField ("tex", /*WWW.EscapeURL(*/debugText.text/*)*/);
        getTForm.AddField ("vol",vol);
        getTForm.AddField ("per", per);
        getTForm.AddField ("spd", spd);
        getTForm.AddField ("pit", pit);

        WWW getTW = new WWW (url,getTForm);
        yield return getTW;
        byte[] s = getTW.bytes;
        filePath = Application.persistentDataPath+"/1.mp3";
        //filePath = "/data/data/com.example.baiduTTS/1.mp3";
        File.Delete (filePath);
        if (writeFile (s, filePath)) {
            debugMsg.text += "success to translate txt to voice\n";
            debugMsg.text += "the voice byte[] length:"+s.Length+"\n";
        } else {
            debugMsg.text = "fail";
        }
        WWW w = new WWW ("file://"+filePath);
        aud.clip = w.GetAudioClip (false, false, AudioType.MPEG);

        Debug.Log (debugText.text);
        //debugMsg.text += "txt source:" + debugText.text+"\n";
        Debug.Log (s.Length);
        if (getTW.isDone) {
            if (getTW.error == null) {
                //debugMsg.text = "合成成功 音訊位元組長度為"+getTW.bytesDownloaded;
                //Debug.Log (getTW.bytesDownloaded);
                //JsonData getASWJson = JsonMapper.ToObject (getTW.text);
                //Debug.Log (getASWJson.Count);
                //Debug.Log (getASWJson["result"]);
            }else{
                Debug.Log (getTW.error);
            }
        }

    }


    private bool writeFile(byte[] readByte,string fileName){
        FileStream pFileStream = null;
        try{
            pFileStream = new FileStream(fileName,FileMode.OpenOrCreate);
            pFileStream.Write(readByte,0,readByte.Length);
        }catch{
            return false;
        }finally{
            if (pFileStream != null) {
                pFileStream.Close ();
            }
        }
        return true;
    }
    public void startTTS()
    {
        debugMsg.text = "";
        StartCoroutine(GetToken(getTokenAPIPath));
    }

    public void playAud(){

        aud.Play ();
        /*if (!aud.isPlaying) {
            aud.Play ();
        }*/
        debugMsg.text += "play the audio:"+aud.isPlaying+"\n";
        debugMsg.text += "the audio useful:"+aud.enabled+"\n";

    }
}

仿照百度語音識別指令碼寫的,裡面重點主要是獲取的音訊無法在unity直接播放,主要是資料夾許可權問題,unity可讀寫資料夾和Android不一樣,有固定的對應資料夾,Application.persistentDataPath是一個可讀寫資料夾,相關知識有部落格,可自行搜尋,地址忘了。aud.clip = w.GetAudioClip (false, false, AudioType.MPEG);是將MP3檔案賦給unity的音訊物件。

時隔幾個月,現在使用此指令碼的時候報錯,¥_¥上週在專案中還能用
補充下暫時的情況:經過給評論區同學答疑,發現麥克風裝置只能有一個,如果是兩個就無法Start。但現在麥克風能啟動,但資料獲取不到,解決中。。。@[email protected]

最近事情多,找到解決方案後再進行說明。。。