一個超簡單的語音識別編程，聽寫程序

阿新 • • 發佈：2019-01-23

ngram 語音 art article bob ext version font blog

CSpeechRecognition類封裝了語音識別操作所需調用的幾個接口，使用它進行語音識別編程很方便，也很簡潔。

CSpeechRecognition類的定義如下：

///////////////////////////////////////////////////////////////

// active speech engine

#include <atlbase.h>

extern CComModule _Module;

#include <atlcom.h>

#include <sapi.h>

#include <sphelper.h>

#include <spuihelp.h>

///////////////////////////////////////////////////////////////

// speech message

#define WM_SREVENT

WM_USER+102

class CSpeechRecognition

{

public:

CSpeechRecognition();

virtual ~CSpeechRecognition();

// initialize

BOOL Initialize(HWND hWnd = NULL, BOOL bIsShared = TRUE);

void Destroy();

// start and stop

BOOL Start();

BOOL Stop();

BOOL IsDictationOn()

{

return m_bOnDictation;

}

// event handler

void GetText(WCHAR **ppszCoMemText, ULONG ulStart = 0, ULONG nlCount = -1);

// voice training

HRESULT VoiceTraining(HWND hWndParent);

// microphone setup

HRESULT MicrophoneSetup(HWND hWndParent);

// token list

HRESULT InitTokenList(HWND hWnd, BOOL bIsComboBox = FALSE);

// error string

CString GetErrorString()

{

return m_sError;

}

// interface

CComPtr<ISpRecognizer> m_cpRecoEngine; // SR engine

CComPtr<ISpRecoContext> m_cpRecoCtxt; //Recognition contextfor dictation

CComPtr<ISpRecoGrammar> m_cpDictationGrammar; // Dictation grammar

private:

CString m_sError;

BOOL m_bOnDictation;

};

其中定義的消息WM_SREVENT用於指示語音識別事件，該消息將通知到初始化函數指定的響應窗口。

類中定義了3個接口指針m_cpRecoEngine，m_cpRecoCtxt和m_cpDictationGrammar，分別用於引用語音識別引擎的3個重要接口IspRecognizer，ISpRecoContext和IspRecoGrammar。

初始化函數Initialize設定了語音識別引擎的基本工作環境，包括引擎、識別上下文、語法、音頻和事件等的初始化：

BOOL CSpeechRecognition::Initialize(HWND hWnd, BOOL bIsShared)

{

// com library

if (FAILED(CoInitialize(NULL)))

{

m_sError=_T("Error intialization COM");

return FALSE;

}

// SR engine

HRESULT hr = S_OK;

if (bIsShared)

{

// Shared reco engine.

// For a shared reco engine, the audio gets setup automatically

hr = m_cpRecoEngine.CoCreateInstance( CLSID_SpSharedRecognizer );

}

else

{

hr = m_cpRecoEngine.CoCreateInstance(CLSID_SpInprocRecognizer);

}

// RecoContext

if( SUCCEEDED( hr ) )

{

hr = m_cpRecoEngine->CreateRecoContext( &m_cpRecoCtxt );

}

// Set recognition notification for dictation

if (SUCCEEDED(hr))

{

hr = m_cpRecoCtxt->SetNotifyWindowMessage( hWnd, WM_SREVENT, 0, 0 );

}

if (SUCCEEDED(hr))

{

// when the engine has recognized something

const ULONGLONG ullInterest = SPFEI(SPEI_RECOGNITION);

hr = m_cpRecoCtxt->SetInterest(ullInterest, ullInterest);

}

// create default audio object

CComPtr<ISpAudio> cpAudio;

hr = SpCreateDefaultObjectFromCategoryId(SPCAT_AUDIOIN, &cpAudio);

// set the input for the engine

hr = m_cpRecoEngine->SetInput(cpAudio, TRUE);

hr = m_cpRecoEngine->SetRecoState( SPRST_ACTIVE );

// grammar

if (SUCCEEDED(hr))

{

// Specifies that the grammar we want is a dictation grammar.

// Initializes the grammar (m_cpDictationGrammar)

hr = m_cpRecoCtxt->CreateGrammar( 0, &m_cpDictationGrammar );

}

if (SUCCEEDED(hr))

{hr = m_cpDictationGrammar->LoadDictation(NULL, SPLO_STATIC);

}

if (SUCCEEDED(hr))

{

hr = m_cpDictationGrammar->SetDictationState( SPRS_ACTIVE );

}

if (FAILED(hr))

{

m_cpDictationGrammar.Release();

}

return (hr == S_OK);

}

釋放函數Destroy被類的析構函數調用，釋放了類所引用的所有接口：

void CSpeechRecognition::Destroy()

{

if (m_cpDictationGrammar)

m_cpDictationGrammar.Release();

if (m_cpRecoCtxt)

m_cpRecoCtxt.Release();

if (m_cpRecoEngine)

m_cpRecoEngine.Release();

CoUninitialize();

}

函數Start和Stop用來控制開始和停止接受及識別語音，它們通過調用引擎接口的SetRecoState方法來實現：

BOOL CSpeechRecognition::Start()

{

if (m_bOnDictation)

return TRUE;

HRESULT hr = m_cpRecoEngine->SetRecoState( SPRST_ACTIVE );

if (FAILED(hr))

return FALSE;

m_bOnDictation = TRUE;

return TRUE;

}

BOOL CSpeechRecognition::Stop()

{

if (! m_bOnDictation)

return TRUE;

HRESULT hr = m_cpRecoEngine->SetRecoState( SPRST_INACTIVE );

if (FAILED(hr))

return FALSE;

m_bOnDictation = FALSE;

return TRUE;

}

函數GetText是獲取從語音中已識別出的文字的關鍵，應該在響應識別事件/消息的響應函數中調用，其代碼如下所示。

void CSpeechRecognition::GetText(WCHAR **ppszCoMemText, ULONG ulStart, ULONG nlCount)

{

USES_CONVERSION;

CSpEvent event;

// Process all of the recognition events

while (event.GetFrom(m_cpRecoCtxt) == S_OK)

{

switch (event.eEventId)

{

case SPEI_RECOGNITION:

// There may be multiple recognition results, so get all of them

{

HRESULT hr = S_OK;

if (nlCount == -1)

event.RecoResult()->GetText(SP_GETWHOLEPHRASE,

SP_GETWHOLEPHRASE, TRUE, ppszCoMemText, NULL);

else

{

ASSERT(nlCount > 0);

event.RecoResult()->GetText(ulStart, nlCount, FALSE,

ppszCoMemText, NULL);

}

break;

}

函數InitTokenList調用SpInitTokenComboBox和SpInitTokenListBox函數來實現語音語言在列表或組合列表中的列表顯示和選擇：

HRESULT CSpeechRecognition::InitTokenList(HWND hWnd, BOOL bIsComboBox)

{

if (bIsComboBox)

return SpInitTokenComboBox(hWnd, SPCAT_RECOGNIZERS);

else

return SpInitTokenListBox(hWnd, SPCAT_RECOGNIZERS);

}

語音識別涉及語音的輸入，通常用話筒來輸入語音。進行語音識別前，需要判斷話筒的位置和設置是否合理，以保證語音識別引擎能獲得有效的語音輸入。函數MicrophoneSetup調用語音識別引擎接口的DisplayUI方法來顯示一個設置話筒的向導，如圖11-4所示。示例代碼如下所示：

HRESULT CSpeechRecognition::MicrophoneSetup(HWND hWndParent)

{

return m_cpRecoEngine->DisplayUI(hWndParent, NULL, SPDUI_MicTraining, NULL, 0);

}

語音訓練是語音識別的重要基礎，為了獲得期望的識別效果，必須進行語音訓練，以讓語音識別引擎熟悉說話者的口音。函數VoiceTraining調用語音識別引擎接口的DisplayUI方法來顯示一個語音訓練向導，如圖11-5所示。示例代碼如下所示：

HRESULT CSpeechRecognition::VoiceTraining(HWND hWndParent)

{

return m_cpRecoEngine->DisplayUI(hWndParent, NULL, SPDUI_UserTraining, NULL, 0);

}

與CText2Speech類似，CSpeechRecognition類也提供錯誤處理機制，由GetErrorString函數可以獲得錯誤信息。

11.3.2 示例：用CSpeechRecognition類編制聽寫程序

使用CSpeechRecognition類來編寫語音識別程序很簡單，下面讓我們實現一個聽寫程序Stenotypist，其界面如圖11-6所示。

用VisualC++編制Stenotypist的步驟和要點如下：

1）使用AppWizard生成一個基於對話框的項目Stenotypist；

2）將SpeechRecognition.H，SpeechRecognition.CPP增加到Stenotypist項目中；

3）在資源編輯器中編輯好響應的控件；

4）用ClassWizard再分享一下我老師大神的人工智能教程吧。零基礎！通俗易懂！風趣幽默！還帶黃段子！希望你也加入到我們人工智能的隊伍中來！https://blog.csdn.net/jiangjunshow

一個超簡單的語音識別編程，聽寫程序

ngram 語音 art article bob ext version font blog CSpeechRecognition類封裝了語音識別操作所需調用的幾個接口，使用它進行語音識別編程很方便，也很簡潔。 CSpeechRecognition類的定義如下： ///

一個超簡單的語音識別編程，聽寫程序

11.3.2 示例：用CSpeechRecognition類編制聽寫程序

一個超簡單的語音識別編程，聽寫程序

python網絡編程，通過服務名稱和會話類型（tcp，udp）獲取端口號，簡單的異常處理

一個整合微軟語音識別技術與語音朗讀的類，基於Microsoft SpeechAPI5.1的開發

《基礎網頁製作》一個超簡單的div佈局，製作靜態網頁很簡單

自學Java編程，如何才能混到一個7K薪資實習生的崗位

極快瑞的函數式編程，Jquery涉及的一些函數

python函數式編程，列表生成式

python學習筆記9：面向對象編程，類

gulp安裝+一個超簡單入門小demo

溫故而知新---淺析三層架構（一個超簡單的系統登錄三層架構實例）

學習編程，技術那麽多，如何選擇呢？

Windows 編程，程序編譯使用的命令行工具。

09.javaweb簡單標簽編程

shell編程，跨服務器備份文件

IO編程，相關概念

簡單的TCP編程2

簡單的UDP編程1

【轉】關於編程，你的練習是不是有效的？

14套java精品高級架構課，Dubbo分布式Restful 服務，並發原理編程，SpringBoot，SpringCloud，RocketMQ中間件視頻教程

使用Web Scraper 插件，不需要編程，也能爬網

一個超簡單的語音識別編程，聽寫程序

11.3.2 示例：用CSpeechRecognition類編制聽寫程序

相關推薦