Unity整合百度語音識別和合成--REST API

阿新 • • 發佈：2019-01-01

直接上unity的C#指令碼程式碼

百度語音識別

using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using System.Xml;
using LitJson;
using System.Text;
using System;
using UnityEngine.UI;
using System.IO;

public class showVoiceResult1 : MonoBehaviour {

    private string token;                           //access_token 

    private string cuid = "liang";                  //使用者標識
    private string format = "wav";                  //語音格式
    private int rate = 8000;                        //取樣率
    private int channel = 1;                        //聲道數
    private string speech;                          //語音資料，進行base64編碼
    private 
 int len;                                //原始語音長度
    private string lan = "zh";                      //語種

    private string grant_Type = "client_credentials";
    private string client_ID = "********";                       //百度appkey
    private string client_Secret = "******";                   //百度Secret Key 


    private string baiduAPI = "http://vop.baidu.com/server_api";
    private string getTokenAPIPath = "https://openapi.baidu.com/oauth/2.0/token";

    private byte[] clipByte;
    public Text debugText;

    /// <summary>
    /// 
    /// 轉換出來的TEXT
    /// </summary>
    public static string audioToString;

    private AudioSource aud;
    private int audioLength;//錄音的長度

    void Start () {

    }

    // Update is called once per frame
    void Update () {
        debugText.text = audioToString;
    }
    /// <summary>
    /// 獲取百度使用者令牌
    /// </summary>
    /// <param name="url">獲取的url</param>
    /// <returns></returns>
    private IEnumerator GetToken(string url)
    {
        WWWForm getTForm = new WWWForm();
        getTForm.AddField("grant_type", grant_Type);
        getTForm.AddField("client_id", client_ID);
        getTForm.AddField("client_secret", client_Secret);

        WWW getTW = new WWW(url, getTForm);
        yield return getTW;
        if (getTW.isDone)
        {
            if (getTW.error == null)
            {
                token = JsonMapper.ToObject(getTW.text)["access_token"].ToString();
                StartCoroutine(GetAudioString(baiduAPI));
            }
            else
                Debug.LogError(getTW.error);
        }
    }

    private IEnumerator GetAudioString(string url)
    {
        JsonWriter jw = new JsonWriter();
        jw.WriteObjectStart();
        jw.WritePropertyName("format");
        jw.Write(format);
        jw.WritePropertyName("rate");
        jw.Write(rate);
        jw.WritePropertyName("channel");
        jw.Write(channel);
        jw.WritePropertyName("token");
        jw.Write(token);
        jw.WritePropertyName("cuid");
        jw.Write(cuid);
        jw.WritePropertyName("len");
        jw.Write(len);
        jw.WritePropertyName("speech");
        jw.Write(speech);
        jw.WriteObjectEnd();
        WWWForm w = new WWWForm();


        WWW getASW = new WWW(url, Encoding.Default.GetBytes(jw.ToString()));
        yield return getASW;
        if (getASW.isDone)
        {
            if (getASW.error == null)
            {
                JsonData getASWJson = JsonMapper.ToObject(getASW.text);
                if (getASWJson["err_msg"].ToString() == "success.")
                {
                    audioToString = getASWJson["result"][0].ToString();
                    if (audioToString.Substring(audioToString.Length - 1) == "，")
                        audioToString = audioToString.Substring(0, audioToString.Length - 1);
                    Debug.Log(audioToString);
                }
            }
            else
            {
                Debug.LogError(getASW.error);
            }
        }
    }

    public void StartMic()
    {

        if (Microphone.devices.Length == 0) {
            Debug.Log ("no devices");
            return;
        } 
        Microphone.End(null);
        Debug.Log("Start");
        Debug.Log(Microphone.devices);
        aud.clip = Microphone.Start("Built-in Microphone", false, 10, rate);
    }

    /// <summary>
    /// 結束錄音
    /// </summary>
    public void EndMic()
    {
        int lastPos = Microphone.GetPosition(null);
        if (Microphone.IsRecording(null))
            audioLength = lastPos / rate;//錄音時長  
        else
            audioLength = 10;
        Debug.Log("Stop");
        Microphone.End(null);

        clipByte = GetClipData();
        len = clipByte.Length;
        speech = Convert.ToBase64String(clipByte);
        StartCoroutine(GetToken(getTokenAPIPath));
        Debug.Log(len);
        Debug.Log(audioLength);
    }

    /// <summary>
    /// 把錄音轉換為Byte[]
    /// </summary>
    /// <returns></returns>
    public byte[] GetClipData()
    {
        if (aud.clip == null)
        {
            Debug.LogError("錄音資料為空");
            return null;
        }

        float[] samples = new float[aud.clip.samples];

        aud.clip.GetData(samples, 0);


        byte[] outData = new byte[samples.Length * 2];

        int rescaleFactor = 32767; //to convert float to Int16   

        for (int i = 0; i < samples.Length; i++)
        {
            short temshort = (short)(samples[i] * rescaleFactor);

            byte[] temdata = System.BitConverter.GetBytes(temshort);

            outData[i * 2] = temdata[0];
            outData[i * 2 + 1] = temdata[1];
        }
        if (outData == null || outData.Length <= 0)
        {
            Debug.LogError("錄音資料為空");
            return null;
        }

        //return SubByte(outData, 0, audioLength * 8000 * 2);
        return outData;
    }
}

百度語音主要借鑑了另一篇文章，地址忘了

百度語音合成部分

using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using System.Xml;
using LitJson;
using System.Text;
using System;
using UnityEngine.UI;
using System.IO;

public class showTextTTSResult : MonoBehaviour {

    private string text;  //user input text
    private string token;                           //access_token
    private string cuid = "***";                  //current user id
    private int ctp = 1; // client type choose, web is only value 1
    private string lan = "zh"; 
    private int spd = 5;
    private int pit = 5;
    private int vol = 5;
    private int per = 3;    //person voice 

    private string grant_Type = "client_credentials";
    private string client_ID = "****";                       //百度appkey
    private string client_Secret = "****";                   //百度Secret Key

    private string baiduAPI = "http://tsn.baidu.com/text2audio";
    private string getTokenAPIPath = "https://openapi.baidu.com/oauth/2.0/token";

    private byte[] clipByte;
    public Text debugText;
    public Text debugMsg;

    /// <summary>
    /// 
    /// 轉換出來的TEXT
    /// </summary>
    public static string audioToString;

    private AudioSource aud;
    private int audioLength;//錄音的長度
    private string filePath;
    void Start () {

    }

    // Update is called once per frame
    void Update () {
        /*if (audioToString != null) {
            debugText.text = audioToString;
        }*/


    }
    /// <summary>
    /// get token
    /// </summary>
    /// <param name="url">url</param>
    /// <returns></returns>
    private IEnumerator GetToken(string url)
    {
        WWWForm getTForm = new WWWForm();
        getTForm.AddField("grant_type", grant_Type);
        getTForm.AddField("client_id", client_ID);
        getTForm.AddField("client_secret", client_Secret);

        WWW getTW = new WWW(url, getTForm);
        yield return getTW;

        Debug.Log (getTW.text);
        if (getTW.isDone)
        {
            if (getTW.error == null)
            {
                token = JsonMapper.ToObject(getTW.text)["access_token"].ToString();
                Debug.Log (token);
                debugMsg.text += "token:"+token+"\n";
                //StartCoroutine(GetAudioString(baiduAPI));
                StartCoroutine(GetTextAudio(baiduAPI));
            }
            else
                Debug.LogError(getTW.error);
        }
    }

    private IEnumerator GetTextAudio(string url){
        //url?lan ctp  cuid  tok tex vol per spd pit
        WWWForm getTForm = new WWWForm();
        getTForm.AddField ("lan", lan);
        getTForm.AddField ("ctp", ctp);
        getTForm.AddField ("cuid", cuid);
        getTForm.AddField ("tok", token);
        getTForm.AddField ("tex", /*WWW.EscapeURL(*/debugText.text/*)*/);
        getTForm.AddField ("vol",vol);
        getTForm.AddField ("per", per);
        getTForm.AddField ("spd", spd);
        getTForm.AddField ("pit", pit);

        WWW getTW = new WWW (url,getTForm);
        yield return getTW;
        byte[] s = getTW.bytes;
        filePath = Application.persistentDataPath+"/1.mp3";
        //filePath = "/data/data/com.example.baiduTTS/1.mp3";
        File.Delete (filePath);
        if (writeFile (s, filePath)) {
            debugMsg.text += "success to translate txt to voice\n";
            debugMsg.text += "the voice byte[] length:"+s.Length+"\n";
        } else {
            debugMsg.text = "fail";
        }
        WWW w = new WWW ("file://"+filePath);
        aud.clip = w.GetAudioClip (false, false, AudioType.MPEG);

        Debug.Log (debugText.text);
        //debugMsg.text += "txt source:" + debugText.text+"\n";
        Debug.Log (s.Length);
        if (getTW.isDone) {
            if (getTW.error == null) {
                //debugMsg.text = "合成成功 音訊位元組長度為"+getTW.bytesDownloaded;
                //Debug.Log (getTW.bytesDownloaded);
                //JsonData getASWJson = JsonMapper.ToObject (getTW.text);
                //Debug.Log (getASWJson.Count);
                //Debug.Log (getASWJson["result"]);
            }else{
                Debug.Log (getTW.error);
            }
        }

    }


    private bool writeFile(byte[] readByte,string fileName){
        FileStream pFileStream = null;
        try{
            pFileStream = new FileStream(fileName,FileMode.OpenOrCreate);
            pFileStream.Write(readByte,0,readByte.Length);
        }catch{
            return false;
        }finally{
            if (pFileStream != null) {
                pFileStream.Close ();
            }
        }
        return true;
    }
    public void startTTS()
    {
        debugMsg.text = "";
        StartCoroutine(GetToken(getTokenAPIPath));
    }

    public void playAud(){

        aud.Play ();
        /*if (!aud.isPlaying) {
            aud.Play ();
        }*/
        debugMsg.text += "play the audio:"+aud.isPlaying+"\n";
        debugMsg.text += "the audio useful:"+aud.enabled+"\n";

    }
}

仿照百度語音識別指令碼寫的，裡面重點主要是獲取的音訊無法在unity直接播放，主要是資料夾許可權問題，unity可讀寫資料夾和Android不一樣，有固定的對應資料夾，Application.persistentDataPath是一個可讀寫資料夾，相關知識有部落格，可自行搜尋，地址忘了。aud.clip = w.GetAudioClip (false, false, AudioType.MPEG);是將MP3檔案賦給unity的音訊物件。

時隔幾個月，現在使用此指令碼的時候報錯，￥_￥上週在專案中還能用
補充下暫時的情況：經過給評論區同學答疑，發現麥克風裝置只能有一個，如果是兩個就無法Start。但現在麥克風能啟動，但資料獲取不到，解決中。。。@[email protected]

最近事情多，找到解決方案後再進行說明。。。

Unity整合百度語音識別和合成--REST API

直接上unity的C#指令碼程式碼百度語音識別 using System.Collections; using System.Collections.Generic; using UnityEngine; using System.Xml; using

Android studio整合百度語音識別api

今天專案中要用到語音功能，所以看了下百度語音api 1，根據百度語音開發文件，建立應用，下載相應的jar包，新增到你的應用 2，清單檔案配置資訊，

百度語音識別REST API使用方法（含C++程式碼）——不需要整合SDK的方法

本文程式碼為C++版，可以用於C環境的應用開發中，下面介紹其中重要的程式碼。下面程式碼是一個可以使用該方式進行語音識別功能的例項程式碼 #include <stdio.h> #include <stdlib.h> #include "curl

百度語音識別API的使用樣例（python實現）

百度給的樣例程式，不論C還是Java版，都分為method1和method2兩種前者稱為隱式（post的是json串，音訊資料編碼到json裡），後者稱為顯式（post的就是音訊資料）一開始考慮到python wave包處理的都是“字串”，擔心跟C語言的陣列不一致，所

Qt：使用百度語音識別REST API，做全平臺語音識別

百度語音開發介紹文件：使用語音識別，需要在百度申請一個應用，然後拿到API Key和Secret Key，然後才可以使用語音識別 ps：我的示例裡面有放了一組可用的Key，但是僅供各位測試使用，有需要開發App的請自行申請，放在示例中的Key我可能隨時會撤銷。 ps：

python呼叫百度語音識別 api

#!/usr/bin/env python # -*- coding: utf-8 -*- #####################################################

百度語音識別api呼叫 python

最近在處理語音檢索相關的事。其中用到語音識別，呼叫的是訊飛與百度的api，前者使用js是實現，後者用python3實現（因為自己使用python）環境： python3.5 centos 7 流程整個百度語音識別rest api 使用分

百度語音識別--示例

終於找到能跑通的，copy下~~import requests import json import os import base64 #設定應用資訊 baidu_server = "https://openapi.baidu.com/oauth/2.0/token?"

android整合百度語音使用離線語音是報（-111）錯誤是解決辦法

android6.0以上在讀寫sd時需要加上執行時許可權判斷，使用百度語音demo出現上述錯誤很可能就是沒有加上執行時許可權判斷，導致百度的 bd_etts_speech_female.dat，bd_etts_speech_male.dat，bd_etts_text.dat

node百度語音識別

絕對乾貨，直接程式碼詳解 1. 擷取一段音訊檔案（wav） eg:audio.wav 2. 驗證語音識別賬戶 var config_data= { apiKey: "4eymBfpmT4Laaaaaaaa", secretKey: "da

mui 百度語音識別轉換文字

前言　　用mui混合開發的APP，現有一個功能需求就是語音轉換成文字，並把語音進行儲存。對此考慮兩種選擇訊飛和百度。最終選擇了百度語音。百度語音　　通過官方文件我們大致可以確定如果想要實現語音識別，要做到以下幾點： 1.獲取Access Token 2.獲取錄音 REST

在Android Studio下使用百度語音識別的一個簡單例子

一、引言利用百度語音識別和百度語音合成可以很方便的設計一個語音互動應用，這裡記錄一下百度線上語音識別的簡單例子以便快速上手。我所用的語音識別包的版本是Baidu-Voice-SDK-Android-1.6.2.zip，開發平臺用的是Android Studio

javaMP3轉pcm 百度語音識別

不多說直接貼程式碼import com.baidu.aip.speech.AipSpeech; import javazoom.spi.mpeg.sampled.file.MpegAudioFileReader; import org.json.JSONObject; imp

【百度語音識別】JavaAPI方式語音識別示例 MP3轉PCM檔案Java實現

【百度語音識別】JavaAPI方式語音識別示例MP3轉PCM Java-API合成語音示例:http://ai.baidu.com/forum/topic/show/496727REST-API文

python pyaudio 百度語音識別api 圖靈api 語音聊天機器人

最近有點頹，還是寫寫python緩解一下（不務正業x idea 之前看到有拆筆記本螢幕做魔鏡的覺得特別神奇，當時記得是要樹莓派來著。現在自己搞搞發現百度的語音識別api還不錯，加上以前註冊的圖靈ai大概也能搞個語音聊天機器人，主要還是筆記本上終於帶了一個

Android開發學習之使用百度語音識別SDK實現語音識別(上)

作為移動網際網路殺手級的互動方式，語音識別從問世以來就一直備受人們的關注，從IOS的Siri到國內的訊飛語音，語音識別技術在移動開發領域是最為充滿前景和希望的技術。Android作為一個移動作業系統，其本身就繼承了Google天生的搜尋基因，因此Androi

ROS下百度語音識別

看到一個好玩的東西，百度的語音識別，大家可以自己DIY一些小東西了。參考：http://www.rosclub.cn/post-1032.html 1. 前面的下載，安裝都沒問題，故跳過不用理會。 2. 測試下載之後：CMakeList.t

基於java的百度語音識別示例

最近一直在搞java，就選擇了java工程。將程式碼拷過去。同時複製檔案“test.pcm”到工程目錄下。就基本上可以了。注：test.pcm是語音檔案，可以用audacity軟體開啟，選擇檔案->匯入->裸資料。設定取樣率為8000Hz。點選播放就能聽見聲音

【ROS總結】ROS下的百度語音識別應用

前言今天閒來無事檢視下語音識別在ROS中的應用，之前在ROS中玩過一段時間的Pocket Sphinx，關於Pocket Sphinx的學習過程以後會介紹，或者可以去網上搜索一些教程，都是比較不錯

python調用百度語音識別接口實時識別

bsp 鼠標 -m pyw lan 語音識別 .com 實時代碼 1、本文直接上幹貨　　奉獻代碼：https://github.com/wuzaipei/audio_discern/tree/master/%E8%AF%AD%E9%9F%B3%E8%AF%86%E5%

Unity整合百度語音識別和合成--REST API

最近事情多，找到解決方案後再進行說明。。。

相關推薦