1. 程式人生 > >使用微軟的語音識別引擎Microsoft Speech API進行語音控制

使用微軟的語音識別引擎Microsoft Speech API進行語音控制


本人僅作提取:

SREngine語音識別引擎封裝類:

#pragma once

/************************************************************************/
/* Notice: this project is used to support speech recognition of my		*/
/* ImageSpirit project. Please see the corresponding paper for more		*/
/* details. The CORE part of ImageSpirit system will be made public		*/
/* available soon. More resource: http://mmcheng.net/imagespirit/		*/
/* ImageSpirit: Verbal Guided Image Parsing. M.-M. Cheng, S. Zheng,		*/
/* W.-Y. Lin, V. Vineet, P. Sturgess, N. Crook, N. Mitra, P. Torr,		*/
/* ACM TOG, 2014.														*/
/************************************************************************/

#include <sphelper.h> // Microsoft Speech API
#pragma comment(lib,"SAPI.lib")

class SREngine
{
public:
	//speech variable
	CComPtr <ISpRecognizer> m_cpRecognizer;
	CComPtr <ISpRecoContext> m_cpRecoContext;
	CComPtr <ISpRecoGrammar> m_cpCmdGrammar;

	//audio variable
	CComPtr <ISpAudio> m_cpAudio;

	// Const values
	static const UINT WM_RECOEVENT = WM_USER+100;
	static const UINT MYGRAMMARID = 101;

public:
	HRESULT SetRuleState(const WCHAR * pszRuleName = NULL, BOOL fActivate = SPRS_ACTIVE);
	HRESULT LoadCmdFromFile(const WCHAR * xmlFileName);
	HRESULT InitializeSapi(HWND hWnd, UINT Msg = WM_RECOEVENT, const WCHAR *xmlFileName = NULL);
};
#include "stdafx.h"
#include "SREngine.h"


HRESULT SREngine::InitializeSapi(HWND hWnd, UINT Msg, const WCHAR *xmlFileName)
{
	HRESULT hr = S_OK;
	const ULONGLONG ullInterest = SPFEI(SPEI_SOUND_START) | SPFEI(SPEI_SOUND_END) | SPFEI(SPEI_PHRASE_START) | 
		SPFEI(SPEI_RECOGNITION) | SPFEI(SPEI_FALSE_RECOGNITION) | SPFEI(SPEI_HYPOTHESIS) | 
		SPFEI(SPEI_INTERFERENCE) | SPFEI(SPEI_RECO_OTHER_CONTEXT) | SPFEI(SPEI_REQUEST_UI) | 
		SPFEI(SPEI_RECO_STATE_CHANGE) | SPFEI(SPEI_PROPERTY_NUM_CHANGE) | SPFEI(SPEI_PROPERTY_STRING_CHANGE);
	V_RETURN(m_cpRecognizer.CoCreateInstance( CLSID_SpInprocRecognizer));
	V_RETURN(SpCreateDefaultObjectFromCategoryId(SPCAT_AUDIOIN, &m_cpAudio));
	V_RETURN(m_cpRecognizer ->SetInput(m_cpAudio, TRUE));  
	V_RETURN(m_cpRecognizer->CreateRecoContext(&m_cpRecoContext));  
	V_RETURN(m_cpRecoContext->SetNotifyWindowMessage(hWnd, Msg, 0, 0));
	V_RETURN(m_cpRecoContext->SetInterest(ullInterest, ullInterest)); 
	if (xmlFileName != NULL)
		return LoadCmdFromFile(xmlFileName);
	return hr;
}

HRESULT SREngine::LoadCmdFromFile(const WCHAR *xmlFileName)
{
	HRESULT hr = S_OK;
	if (m_cpCmdGrammar != NULL)
		return hr;
	V_RETURN(m_cpRecoContext ->CreateGrammar(MYGRAMMARID, &m_cpCmdGrammar));  //Command and control---C&C
	V_RETURN(m_cpCmdGrammar->LoadCmdFromFile(xmlFileName, SPLO_DYNAMIC));
	return hr;
}

HRESULT SREngine::SetRuleState(const WCHAR *pszRuleName, BOOL fActivate)
{
	return m_cpCmdGrammar ->SetRuleState(pszRuleName, NULL, fActivate ? SPRS_ACTIVE : SPRS_INACTIVE);
}

介面操作展示類:
#ifndef SPRECOUI_H
#define SPRECOUI_H

//#include "ui_SpRecoUI.h"
#include "ui_ImageSpirit.h"

class SpRecoUI : public QMainWindow
{
	Q_OBJECT

public:
	SpRecoUI(QWidget *parent = 0, Qt::WindowFlags flags = 0);
	~SpRecoUI();

	LRESULT OnRecoEvent();


	bool nativeEvent(const QByteArray &eventType, void *message, long *result);

private slots:
	void onVoiceStart();
	void onVoiceStop();

private:
	Ui::SpRecoUIClass ui;

	// For speech recognition
	bool m_bSoundEnd, m_bSoundStart;
	SREngine m_SREngine;

	void Recognized(CSpEvent &spEvent);
};

#endif // SPRECOUI_H
#include "stdafx.h"
#include "SpRecoUI.h"

#include <QtWidgets/QMessageBox>


SpRecoUI::SpRecoUI(QWidget *parent, Qt::WindowFlags flags)
	: QMainWindow(parent, flags)
{
	ui.setupUi(this);
	connect(ui.pbVoiceInput, SIGNAL(pressed()), this, SLOT(onVoiceStart()));
	connect(ui.pbVoiceInput, SIGNAL(released()), this, SLOT(onVoiceStop()));	

	//SAPI init
	m_bSoundStart = false;
	m_bSoundEnd = false;	
	if(FAILED(m_SREngine.InitializeSapi((HWND)this->winId(), SREngine::WM_RECOEVENT, L"./SpeechGrammar.xml")))
		QMessageBox::information(NULL, "Error", "Initialize speech engine failed!", MB_OK);
}

SpRecoUI::~SpRecoUI()
{

}

void SpRecoUI::onVoiceStart()
{
	VERIFY_RES(m_SREngine.SetRuleState(NULL, TRUE));
	setWindowTitle("Sound started");
}

void SpRecoUI::onVoiceStop()
{
	VERIFY_RES(m_SREngine.SetRuleState(NULL, FALSE));
	setWindowTitle("Sound stopped");
}

bool SpRecoUI::nativeEvent(const QByteArray &eventType, void *message, long *result)
{
	MSG* pMsg = (MSG*) message;
	setWindowTitle("Control - Debug: winEvent");
	if(pMsg->message == SREngine::WM_RECOEVENT)
		*result = this->OnRecoEvent();

	return false;
}

// Speech Recognition Event Process
LRESULT SpRecoUI::OnRecoEvent()
{
	if (m_SREngine.m_cpRecoContext == NULL)
		return FALSE;

	CSpEvent spEvent;
	HRESULT hr = S_OK;
	while(spEvent.GetFrom(m_SREngine.m_cpRecoContext) == S_OK) {
		setWindowTitle("Control - Debug");
		switch(spEvent.eEventId){
		case SPEI_SOUND_START: m_bSoundStart = true; break;
		case SPEI_SOUND_END: m_bSoundEnd = true; break;
		case SPEI_RECOGNITION: if (m_bSoundStart && m_bSoundEnd) Recognized(spEvent); break;
		}
	}
	return TRUE;
}

void SpRecoUI::Recognized(CSpEvent &spEvent)
{
	USES_CONVERSION;
	CComPtr<ISpRecoResult> cpResult = spEvent.RecoResult();
	CSpDynamicString dstrText;
	cpResult->GetText(SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE, TRUE, &dstrText, NULL);
	QString strResult = dstrText.CopyToChar(); //  W2T(dstrText);
	SPPHRASE* pPhrase = NULL;
	if (SUCCEEDED(cpResult->GetPhrase(&pPhrase))){
		strResult += tr(" RuleName:") + QString::fromStdWString(pPhrase->Rule.pszName);
		strResult += tr(" PropName:") + QString::fromStdWString(pPhrase->pProperties->pszName);		
		if (pPhrase->pProperties->pNextSibling)
			strResult += tr(" Sibling:") + QString::fromStdWString(pPhrase->pProperties->pNextSibling->pszName);
		if (pPhrase->pProperties->pFirstChild)
			strResult += tr(" Child:") + QString::fromStdWString(pPhrase->pProperties->pFirstChild->pszName);
	}

	if (pPhrase)
		::CoTaskMemFree(pPhrase);	
	ui.textEdit->insertPlainText(strResult+"\n");
}

SpeechGrammar XML語音命令字配置:
<?xml version="1.0"?>
<!-- Refer to `Grammar XML Schema' for details about how to generate this file.  -->
<GRAMMAR LANGID="409">
  <DEFINE>
    <ID NAME="Activate" VAL="1010"/>
    <ID NAME="MkDeform" VAL="1011"/>
    <ID NAME="ChangeMatCmd" VAL="1012"/>
    <ID NAME="ChangeClrCmd" VAL="1013"/>
    <ID NAME="MoveCmd" VAL="1014"/>
  </DEFINE>
  <RULE NAME="Activate" ID="Activate" TOPLEVEL="ACTIVE">
    <P>+Activate -the</P>
    <RULEREF NAME="ObjectAndDescription" PROPNAME="ObjDes"/>
  </RULE>
  <RULE NAME="MkDeform" ID="MkDeform" TOPLEVEL="ACTIVE">
    <P>+Make -the</P>
    <RULEREF NAME="ObjectAndDescription" PROPNAME="ObjDes"/>
    <RULEREF NAME="DeformType" PROPNAME="DType"/>
  </RULE>
  <RULE NAME="ChangeClrCmd" ID="ChangeClrCmd" TOPLEVEL="ACTIVE">
    <P>+Change -the</P>
    <RULEREF NAME="ObjectAndDescription" PROPNAME="ObjDes"/>
    <OPT>-from</OPT>
    <RULEREF NAME="ColorAtri" PROPNAME="Color1"/>
    <P>-to</P>
    <RULEREF NAME="ColorAtri" PROPNAME="Color2"/>
  </RULE>
  <RULE NAME="ChangeMatCmd" ID="ChangeMatCmd" TOPLEVEL="ACTIVE">
    <P>+Change -the</P>
    <RULEREF NAME="ObjectAndDescription" PROPNAME="ObjDes"/>
    <OPT>-from</OPT>
    <RULEREF NAME="MaterialAtri" PROPNAME="Material1"/>
    <P>-to</P>
    <RULEREF NAME="MaterialAtri" PROPNAME="Material2"/>
  </RULE>
  <RULE NAME="MoveCmd" ID="MoveCmd" TOPLEVEL="ACTIVE">
    <L>
      <P>Move</P>
      <P>Repeat</P>
    </L>
    <P>-the</P>
    <RULEREF NAME="ObjectAndDescription" PROPNAME="ObjDes"/>
    <RULEREF NAME="MoveDirection" PROPNAME="MvDir"/>
    <OPT>
      <P>along -the</P>
      <RULEREF NAME="ObjectAndDescription" PROPNAME="ObjDes2"/>
    </OPT>
  </RULE>
  <RULE NAME="ObjectAndDescription">
    <RULEREF NAME="ColorAtri" PROPNAME="ClrAtri"/>
    <RULEREF NAME="MaterialAtri" PROPNAME="MaterAtri"/>
    <RULEREF NAME="ObjectName" PROPNAME="ObjName"/>
    <RULEREF NAME="PositionAtri" PROPNAME="PosAtri"/>
  </RULE>
  <RULE NAME="ObjectName" DYNAMIC="TRUE">
    <L>
      <P PROPNAME="NUMBER" VAL="0">wall</P>
      <P PROPNAME="NUMBER" VAL="1">floor</P>
      <P PROPNAME="NUMBER" VAL="2">picture</P>
      <P PROPNAME="NUMBER" VAL="3">cabinet</P>
      <P PROPNAME="NUMBER" VAL="4">chair</P>
      <P PROPNAME="NUMBER" VAL="5">table</P>
      <P PROPNAME="NUMBER" VAL="6">window</P>
      <P PROPNAME="NUMBER" VAL="7">door</P>
      <P PROPNAME="NUMBER" VAL="8">ceiling</P>
      <P PROPNAME="NUMBER" VAL="9">lamp</P>
      <P PROPNAME="NUMBER" VAL="10">bed</P>
      <P PROPNAME="NUMBER" VAL="11">desk</P>
      <P PROPNAME="NUMBER" VAL="12">monitor</P>
      <P PROPNAME="NUMBER" VAL="13">car</P>
      <P PROPNAME="NUMBER" VAL="14">blinds</P>
      <P PROPNAME="NUMBER" VAL="15">television</P>
      <P PROPNAME="NUMBER" VAL="16">sofa</P>
      <P PROPNAME="NUMBER" VAL="17">counter</P>
    </L>
  </RULE>
  <RULE NAME="ColorAtri">
    <OPT>
      <L>
        <P PROPNAME="NUMBER" VAL="0">black</P>
        <P PROPNAME="NUMBER" VAL="1">blue</P>
        <P PROPNAME="NUMBER" VAL="2">brown</P>
        <P PROPNAME="NUMBER" VAL="3">gray</P>
        <P PROPNAME="NUMBER" VAL="4">green</P>
        <P PROPNAME="NUMBER" VAL="5">orange</P>
        <P PROPNAME="NUMBER" VAL="6">pink</P>
        <P PROPNAME="NUMBER" VAL="7">purple</P>
        <P PROPNAME="NUMBER" VAL="8">red</P>
        <P PROPNAME="NUMBER" VAL="9">white</P>
        <P PROPNAME="NUMBER" VAL="10">yellow</P>
      </L>
    </OPT>
  </RULE>
  <RULE NAME="PositionAtri">
    <OPT>
      <P>-in</P>
      <L>
        <P PROPNAME="NUMBER" VAL="0">top-left</P>
        <P PROPNAME="NUMBER" VAL="1">top-middle</P>
        <P PROPNAME="NUMBER" VAL="2">top-right</P>
        <P PROPNAME="NUMBER" VAL="3">center-left</P>
        <P PROPNAME="NUMBER" VAL="4">center-middle</P>
        <P PROPNAME="NUMBER" VAL="5">center-right</P>
        <P PROPNAME="NUMBER" VAL="6">bottom-left</P>
        <P PROPNAME="NUMBER" VAL="7">bottom-middle</P>
        <P PROPNAME="NUMBER" VAL="8">bottom-right</P>
      </L>
    </OPT>
  </RULE>
  <RULE NAME="MaterialAtri">
    <OPT>
      <L>
        <P PROPNAME="NUMBER" VAL="0">wood</P>
        <P PROPNAME="NUMBER" VAL="1">painted</P>
        <P PROPNAME="NUMBER" VAL="2">cotton</P>
        <P PROPNAME="NUMBER" VAL="3">paper</P>
        <P PROPNAME="NUMBER" VAL="4">glass</P>
        <P PROPNAME="NUMBER" VAL="5">brick</P>
        <P PROPNAME="NUMBER" VAL="6">metal</P>
        <P PROPNAME="NUMBER" VAL="7">leather</P>
        <P PROPNAME="NUMBER" VAL="8">plastic</P>
      </L>
    </OPT>
  </RULE>
  <RULE NAME="MoveDirection">
    <L>
      <P>up</P>
      <P>down</P>
      <P>left</P>
      <P>right</P>
    </L>
  </RULE>
  <RULE NAME="DeformType">
    <L>
      <P>lowwer</P>
      <P>taller</P>
      <P>smaller</P>
      <P>larger</P>
    </L>
  </RULE>
</GRAMMAR>