1. 程式人生 > >iOS中 語音識別功能/語音轉文字教程具體解釋 韓俊強的博客

iOS中 語音識別功能/語音轉文字教程具體解釋 韓俊強的博客

rdd tex forkey dex errortype map pin ins prop


原文地址http://blog.csdn.net/qq_31810357/article/details/51111702

前言:近期研究了一下語音識別,從百度語音識別到訊飛語音識別;首先說一下個人針對兩者的看法,訊飛毫無疑問比較專業。識別率也非常高真對語音識別是比較精準的,可是非常多開發人員和我一樣期望離線識別,而訊飛離線是收費的;請求次數來講。兩者都能夠申請高配額,針對用戶較多的差點兒都一樣。

基於免費而且支持離線我選擇了百度離線語音識別。比較簡單,UI設計多一點,以下寫一下教程:

1.首先:須要的庫

技術分享

2.我是自己定義的UI所以以功能實現為主(頭文件)

// 頭文件
#import "BDVRCustomRecognitonViewController.h"
#import "BDVRClientUIManager.h"
#import "WBVoiceRecordHUD.h"
#import "BDVRViewController.h"
#import "MyViewController.h"
#import "BDVRSConfig.h"

3.須要知道的功能:能用到的例如以下:

//-------------------類方法------------------------
// 創建語音識別客戶對像,該對像是個單例
+ (BDVoiceRecognitionClient *)sharedInstance;

// 釋放語音識別客戶端對像
+ (void)releaseInstance;


//-------------------識別方法-----------------------
// 推斷能否夠錄音
- (BOOL)isCanRecorder;

// 開始語音識別,須要實現MVoiceRecognitionClientDelegate代理方法。並傳入實現對像監聽事件
// 返回值參考 TVoiceRecognitionStartWorkResult
- (int)startVoiceRecognition:(id<MVoiceRecognitionClientDelegate>)aDelegate;

// 說完了,用戶主動完畢錄音時調用
- (void)speakFinish;

// 結束本次語音識別
- (void)stopVoiceRecognition;

/**
 * @brief 獲取當前識別的採樣率
 *
 * @return 採樣率(16000/8000)
 */
- (int)getCurrentSampleRate;

/**
 * @brief 得到當前識別模式(deprecated)
 *
 * @return 當前識別模式
 */
- (int)getCurrentVoiceRecognitionMode __attribute__((deprecated));

/**
 * @brief 設置當前識別模式(deprecated)。請使用-(void)setProperty:(TBDVoiceRecognitionProperty)property;
 *
 * @param 識別模式
 *
 * @return 是否設置成功
 */
- (void)setCurrentVoiceRecognitionMode:(int)aMode __attribute__((deprecated));

// 設置識別類型
- (void)setProperty:(TBDVoiceRecognitionProperty)property __attribute__((deprecated));

// 獲取當前識別類型
- (int)getRecognitionProperty __attribute__((deprecated));

// 設置識別類型列表, 除EVoiceRecognitionPropertyInput和EVoiceRecognitionPropertySong外
// 能夠識別類型復合
- (void)setPropertyList: (NSArray*)prop_list;

// cityID僅對EVoiceRecognitionPropertyMap識別類型有效
- (void)setCityID: (NSInteger)cityID;

// 獲取當前識別類型列表
- (NSArray*)getRecognitionPropertyList;

//-------------------提示音-----------------------
// 播放提示音。默覺得播放,錄音開始,錄音結束提示音
// BDVoiceRecognitionClientResources/Tone
// record_start.caf   錄音開始聲音文件
// record_end.caf     錄音結束聲音文件
// 聲音資源須要加到項目project裏。用戶可替換資源文件,文件名稱不能夠變,建音提示音不宜過長,0。5秒左右。
// aTone 取值參考 TVoiceRecognitionPlayTones,如沒有找到文件。則返回NO
- (BOOL)setPlayTone:(int)aTone isPlay:(BOOL)aIsPlay;

4.錄音button相關動畫(我自己定義的,大家能夠借鑒)


// 錄音button相關
@property (nonatomic, weak, readonly) UIButton *holdDownButton;// 說話button
/**
 *  是否取消錄音
 */
@property (nonatomic, assign, readwrite) BOOL isCancelled;

/**
 *  是否正在錄音
 */
@property (nonatomic, assign, readwrite) BOOL isRecording;
/**
 *  當錄音button被按下所觸發的事件,這時候是開始錄音
 */
- (void)holdDownButtonTouchDown;

/**
 *  當手指在錄音button範圍之外離開屏幕所觸發的事件。這時候是取消錄音
 */
- (void)holdDownButtonTouchUpOutside;

/**
 *  當手指在錄音button範圍之內離開屏幕所觸發的事件,這時候是完畢錄音
 */
- (void)holdDownButtonTouchUpInside;

/**
 *  當手指滑動到錄音button的範圍之外所觸發的事件
 */
- (void)holdDownDragOutside;

5.初始化系統UI

#pragma mark - layout subViews UI

/**
 *  依據正常顯示和高亮狀態創建一個button對象
 *
 *  @param image   正常顯示圖
 *  @param hlImage 高亮顯示圖
 *
 *  @return 返回button對象
 */
- (UIButton *)createButtonWithImage:(UIImage *)image HLImage:(UIImage *)hlImage ;
- (void)holdDownDragInside;
- (void)createInitView; // 創建初始化界面。播放提示音時會用到
- (void)createRecordView;  // 創建錄音界面
- (void)createRecognitionView; // 創建識別界面
- (void)createErrorViewWithErrorType:(int)aStatus; // 在識別view中顯示具體錯誤信息
- (void)createRunLogWithStatus:(int)aStatus; // 在狀態view中顯示具體狀態信息

- (void)finishRecord:(id)sender; // 用戶點擊完畢動作
- (void)cancel:(id)sender; // 用戶點擊取消動作

- (void)startVoiceLevelMeterTimer;
- (void)freeVoiceLevelMeterTimerTimer;

6.最重要的部分


// 錄音完畢
 [[BDVoiceRecognitionClient sharedInstance] speakFinish];

// 取消錄音
[[BDVoiceRecognitionClient sharedInstance] stopVoiceRecognition];

7.兩個代理方法

- (void)VoiceRecognitionClientWorkStatus:(int)aStatus obj:(id)aObj
{
    switch (aStatus)
    {
        case EVoiceRecognitionClientWorkStatusFlushData: // 連續上屏中間結果
        {
            NSString *text = [aObj objectAtIndex:0];
            
            if ([text length] > 0)
            {
//                [clientSampleViewController logOutToContinusManualResut:text];
                
                UILabel *clientWorkStatusFlushLabel = [[UILabel alloc]initWithFrame:CGRectMake(kScreenWidth/2 - 100,64,200,60)];
                clientWorkStatusFlushLabel.text = text;
                clientWorkStatusFlushLabel.textAlignment = NSTextAlignmentCenter;
                clientWorkStatusFlushLabel.font = [UIFont systemFontOfSize:18.0f];
                clientWorkStatusFlushLabel.numberOfLines = 0;
                clientWorkStatusFlushLabel.backgroundColor = [UIColor whiteColor];
                [self.view addSubview:clientWorkStatusFlushLabel];
                
            }

            break;
        }
        case EVoiceRecognitionClientWorkStatusFinish: // 識別正常完畢並獲得結果
        {
			[self createRunLogWithStatus:aStatus];
            
            if ([[BDVoiceRecognitionClient sharedInstance] getRecognitionProperty] != EVoiceRecognitionPropertyInput)
            {
                //  搜索模式下的結果為數組,演示樣例為
                // ["公園", "公元"]
                NSMutableArray *audioResultData = (NSMutableArray *)aObj;
                NSMutableString *tmpString = [[NSMutableString alloc] initWithString:@""];
                
                for (int i=0; i < [audioResultData count]; i++)
                {
                    [tmpString appendFormat:@"%@\r\n",[audioResultData objectAtIndex:i]];
                }
                
                clientSampleViewController.resultView.text = nil;
                [clientSampleViewController logOutToManualResut:tmpString];
                
            }
            else
            {
                NSString *tmpString = [[BDVRSConfig sharedInstance] composeInputModeResult:aObj];
                [clientSampleViewController logOutToContinusManualResut:tmpString];
                
            }
           
            if (self.view.superview)
            {
                [self.view removeFromSuperview];
            }
            
            break;
        }
        case EVoiceRecognitionClientWorkStatusReceiveData:
        {
            // 此狀態僅僅有在輸入模式下使用
            // 輸入模式下的結果為帶置信度的結果,示比例如以下:
            //  [
            //      [
            //         {
            //             "百度" = "0.6055192947387695";
            //         },
            //         {
            //             "擺渡" = "0.3625582158565521";
            //         },
            //      ]
            //      [
            //         {
            //             "一下" = "0.7665404081344604";
            //         }
            //      ],
            //   ]
//臨時關掉 -- 否則影響跳轉結果
//            NSString *tmpString = [[BDVRSConfig sharedInstance] composeInputModeResult:aObj];
//            [clientSampleViewController logOutToContinusManualResut:tmpString];
            
            break;
        }
        case EVoiceRecognitionClientWorkStatusEnd: // 用戶說話完畢,等待server返回識別結果
        {
			[self createRunLogWithStatus:aStatus];
            if ([BDVRSConfig sharedInstance].voiceLevelMeter)
            {
                [self freeVoiceLevelMeterTimerTimer];
            }
			
            [self createRecognitionView];
            
            break;
        }
        case EVoiceRecognitionClientWorkStatusCancel:
        {            
            if ([BDVRSConfig sharedInstance].voiceLevelMeter) 
            {
                [self freeVoiceLevelMeterTimerTimer];
            }
            
			[self createRunLogWithStatus:aStatus];  
            
            if (self.view.superview) 
            {
                [self.view removeFromSuperview];
            }
            break;
        }
        case EVoiceRecognitionClientWorkStatusStartWorkIng: // 識別庫開始識別工作,用戶能夠說話
        {
            if ([BDVRSConfig sharedInstance].playStartMusicSwitch) // 假設播放了提示音。此時再給用戶提示能夠說話
            {
                [self createRecordView];
            }
            
            if ([BDVRSConfig sharedInstance].voiceLevelMeter)  // 開啟語音音量監聽
            {
                [self startVoiceLevelMeterTimer];
            }
            
			[self createRunLogWithStatus:aStatus]; 

            break;
        }
		case EVoiceRecognitionClientWorkStatusNone:
		case EVoiceRecognitionClientWorkPlayStartTone:
		case EVoiceRecognitionClientWorkPlayStartToneFinish:
		case EVoiceRecognitionClientWorkStatusStart:
		case EVoiceRecognitionClientWorkPlayEndToneFinish:
		case EVoiceRecognitionClientWorkPlayEndTone:
		{
			[self createRunLogWithStatus:aStatus];
			break;
		}
        case EVoiceRecognitionClientWorkStatusNewRecordData:
        {
            break;
        }
        default:
        {
			[self createRunLogWithStatus:aStatus];
            if ([BDVRSConfig sharedInstance].voiceLevelMeter) 
            {
                [self freeVoiceLevelMeterTimerTimer];
            }
            if (self.view.superview) 
            {
                [self.view removeFromSuperview];
            }
 
            break;
        }
    }
}

- (void)VoiceRecognitionClientNetWorkStatus:(int) aStatus
{
    switch (aStatus) 
    {
        case EVoiceRecognitionClientNetWorkStatusStart:
        {	
			[self createRunLogWithStatus:aStatus];
            [[UIApplication sharedApplication] setNetworkActivityIndicatorVisible:YES];
            break;   
        }
        case EVoiceRecognitionClientNetWorkStatusEnd:
        {
			[self createRunLogWithStatus:aStatus];
			[[UIApplication sharedApplication] setNetworkActivityIndicatorVisible:NO];
            break;   
        }          
    }
}

8.錄音button的一些操作


#pragma mark ------ 關於button操作的一些事情-------
- (void)holdDownButtonTouchDown {
    // 開始動畫
    _disPlayLink = [CADisplayLink displayLinkWithTarget:self selector:@selector(delayAnimation)];
    _disPlayLink.frameInterval = 40;
    [_disPlayLink addToRunLoop:[NSRunLoop currentRunLoop] forMode:NSDefaultRunLoopMode];
    
    self.isCancelled = NO;
    self.isRecording = NO;
    
 // 開始語音識別功能,之前必須實現MVoiceRecognitionClientDelegate協議中的VoiceRecognitionClientWorkStatus:obj方法
    int startStatus = -1;
    startStatus = [[BDVoiceRecognitionClient sharedInstance] startVoiceRecognition:self];
    if (startStatus != EVoiceRecognitionStartWorking) // 創建失敗則報告錯誤
    {
        NSString *statusString = [NSString stringWithFormat:@"%d",startStatus];
        [self performSelector:@selector(firstStartError:) withObject:statusString afterDelay:0.3];  // 延遲0.3秒。以便能在出錯時正常刪除view
        return;
    }
    // "按住說話-松開搜索"提示
    [voiceImageStr removeFromSuperview];
    voiceImageStr = [[UIImageView alloc]initWithFrame:CGRectMake(kScreenWidth/2 - 40, kScreenHeight - 153, 80, 33)];
    voiceImageStr.backgroundColor = [UIColor colorWithPatternImage:[UIImage imageNamed:@"searchVoice"]];
    [self.view addSubview:voiceImageStr];
   
}

- (void)holdDownButtonTouchUpOutside {
    // 結束動畫
    [self.view.layer removeAllAnimations];
    [_disPlayLink invalidate];
    _disPlayLink = nil;
    
    // 取消錄音
    [[BDVoiceRecognitionClient sharedInstance] stopVoiceRecognition];
    
    if (self.view.superview)
    {
        [self.view removeFromSuperview];
    }
}

- (void)holdDownButtonTouchUpInside {
    // 結束動畫
    [self.view.layer removeAllAnimations];
    [_disPlayLink invalidate];
    _disPlayLink = nil;
    
    [[BDVoiceRecognitionClient sharedInstance] speakFinish];
}

- (void)holdDownDragOutside {
    
    //假設已經開始錄音了, 才須要做拖曳出去的動作, 否則僅僅要切換 isCancelled, 不讓錄音開始.
    if (self.isRecording) {
//        if ([self.delegate respondsToSelector:@selector(didDragOutsideAction)]) {
//            [self.delegate didDragOutsideAction];
//        }
    } else {
        self.isCancelled = YES;
    }
}


#pragma mark - layout subViews UI

- (UIButton *)createButtonWithImage:(UIImage *)image HLImage:(UIImage *)hlImage {
    UIButton *button = [[UIButton alloc] initWithFrame:CGRectMake(kScreenWidth/2 -36, kScreenHeight - 120, 72, 72)];
    
    if (image)
        [button setBackgroundImage:image forState:UIControlStateNormal];
    if (hlImage)
        [button setBackgroundImage:hlImage forState:UIControlStateHighlighted];
    
    return button;
}

#pragma mark ----------- 動畫部分 -----------
- (void)startAnimation
{
    CALayer *layer = [[CALayer alloc] init];
    layer.cornerRadius = [UIScreen mainScreen].bounds.size.width/2;
    layer.frame = CGRectMake(0, 0, layer.cornerRadius * 2, layer.cornerRadius * 2);
    layer.position = CGPointMake([UIScreen mainScreen].bounds.size.width/2,[UIScreen mainScreen].bounds.size.height - 84);
    //    self.view.layer.position;
    UIColor *color = [UIColor colorWithRed:arc4random()%10*0.1 green:arc4random()%10*0.1 blue:arc4random()%10*0.1 alpha:1];
    layer.backgroundColor = color.CGColor;
    [self.view.layer addSublayer:layer];
    
    CAMediaTimingFunction *defaultCurve = [CAMediaTimingFunction functionWithName:kCAMediaTimingFunctionDefault];
    
    _animaTionGroup = [CAAnimationGroup animation];
    _animaTionGroup.delegate = self;
    _animaTionGroup.duration = 2;
    _animaTionGroup.removedOnCompletion = YES;
    _animaTionGroup.timingFunction = defaultCurve;
    
    CABasicAnimation *scaleAnimation = [CABasicAnimation animationWithKeyPath:@"transform.scale.xy"];
    scaleAnimation.fromValue = @0.0;
    scaleAnimation.toValue = @1.0;
    scaleAnimation.duration = 2;
    
    CAKeyframeAnimation *opencityAnimation = [CAKeyframeAnimation animationWithKeyPath:@"opacity"];
    opencityAnimation.duration = 2;
    opencityAnimation.values = @[@0.8,@0.4,@0];
    opencityAnimation.keyTimes = @[@0,@0.5,@1];
    opencityAnimation.removedOnCompletion = YES;
    
    NSArray *animations = @[scaleAnimation,opencityAnimation];
    _animaTionGroup.animations = animations;
    [layer addAnimation:_animaTionGroup forKey:nil];
    
    [self performSelector:@selector(removeLayer:) withObject:layer afterDelay:1.5];
}

- (void)removeLayer:(CALayer *)layer
{
    [layer removeFromSuperlayer];
}


- (void)delayAnimation
{
    [self startAnimation];
}

完畢以上操作,就大功告成了!

溫馨提示:

1.因為是語音識別,須要用到麥克風相關權限。模擬器會爆12個錯誤。使用真機能夠解決;

2.涉及到授權文件相關並不復雜,projectBundle Identifier僅僅須要設置百度的離線授權一致就可以。例如以下圖:

技術分享

技術分享


終於效果例如以下:

技術分享

技術分享

技術分享

技術分享


有不懂或不明確的地方能夠微博聯系我:


技術分享

iOS開發人員交流群:446310206





iOS中 語音識別功能/語音轉文字教程具體解釋 韓俊強的博客