1. 程式人生 > >音頻自動增益 與 靜音檢測 算法 附完整C代碼

音頻自動增益 與 靜音檢測 算法 附完整C代碼

kron gmm input del led sampler 文件 += frame

前面分享過一個算法《音頻增益響度分析 ReplayGain 附完整C代碼示例》

主要用於評估一定長度音頻的音量強度,

而分析之後,很多類似的需求,肯定是做音頻增益,提高音量諸如此類做法。

不過在項目實測的時候,其實真的很難定標準,

到底在什麽樣的環境下,要增大音量,還是降低。

在通訊行業一般的做法就是采用靜音檢測,

一旦檢測為靜音或者噪音,則不做處理,反之通過一定的策略進行處理。

這裏就涉及到兩個算法,一個是靜音檢測,一個是音頻增益。

增益其實沒什麽好說的,類似於數據歸一化拉伸的做法。

靜音檢測 在WebRTC中 是采用計算GMM (Gaussian Mixture Model,高斯混合模型)進行特征提取的。

在很長一段時間裏面,音頻特征 有3個主要的方法,

GMM ,Spectrogram (聲譜圖), MFCC 即 Mel-Frequency Cepstrum(Mel頻率倒譜)

恕我直言,GMM 提取的特征,其魯棒性 不如後兩者。

也不多做介紹,感興趣的同學,翻翻 維基百科 ,補補課。

當然在實際使用算法時,會由此延伸出來一些小技巧。

例如,用靜音檢測 來做音頻裁剪,或者搭配音頻增益做一些音頻增強之類的操作。

自動增益在WebRTC 源代碼文件是:analog_agc.c 和 digital_agc.c

靜音檢測 源代碼文件是: webrtc_vad.c

這個命名,有一定的歷史原因了。

經過梳理後,

增益算法為 agc.c agc.h

靜音檢測為 vad.c vad.h

增益算法的完整示例代碼:

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
//采用https://github.com/mackron/dr_libs/blob/master/dr_wav.h 解碼
#define DR_WAV_IMPLEMENTATION
#include "dr_wav.h"
#include "agc.h"

#ifndef nullptr
#define nullptr 0
#endif

#ifndef MIN
#define MIN(A, B) ((A) < (B) ? (A) : (B)) #endif //寫wav文件 void wavWrite_int16(char *filename, int16_t *buffer, size_t sampleRate, size_t totalSampleCount) { drwav_data_format format = {}; format.container = drwav_container_riff; // <-- drwav_container_riff = normal WAV files, drwav_container_w64 = Sony Wave64. format.format = DR_WAVE_FORMAT_PCM; // <-- Any of the DR_WAVE_FORMAT_* codes. format.channels = 1; format.sampleRate = (drwav_uint32) sampleRate; format.bitsPerSample = 16; drwav *pWav = drwav_open_file_write(filename, &format); if (pWav) { drwav_uint64 samplesWritten = drwav_write(pWav, totalSampleCount, buffer); drwav_uninit(pWav); if (samplesWritten != totalSampleCount) { fprintf(stderr, "ERROR\n"); exit(1); } } } //讀取wav文件 int16_t *wavRead_int16(char *filename, uint32_t *sampleRate, uint64_t *totalSampleCount) { unsigned int channels; int16_t *buffer = drwav_open_and_read_file_s16(filename, &channels, sampleRate, totalSampleCount); if (buffer == nullptr) { printf("讀取wav文件失敗."); } //僅僅處理單通道音頻 if (channels != 1) { drwav_free(buffer); buffer = nullptr; *sampleRate = 0; *totalSampleCount = 0; } return buffer; } //分割路徑函數 void splitpath(const char *path, char *drv, char *dir, char *name, char *ext) { const char *end; const char *p; const char *s; if (path[0] && path[1] == :) { if (drv) { *drv++ = *path++; *drv++ = *path++; *drv = \0; } } else if (drv) *drv = \0; for (end = path; *end && *end != :;) end++; for (p = end; p > path && *--p != \\ && *p != /;) if (*p == .) { end = p; break; } if (ext) for (s = end; (*ext = *s++);) ext++; for (p = end; p > path;) if (*--p == \\ || *p == /) { p++; break; } if (name) { for (s = p; s < end;) *name++ = *s++; *name = \0; } if (dir) { for (s = path; s < p;) *dir++ = *s++; *dir = \0; } } int agcProcess(int16_t *buffer, uint32_t sampleRate, size_t samplesCount, int16_t agcMode) { if (buffer == nullptr) return -1; if (samplesCount == 0) return -1; WebRtcAgcConfig agcConfig; agcConfig.compressionGaindB = 9; // default 9 dB agcConfig.limiterEnable = 1; // default kAgcTrue (on) agcConfig.targetLevelDbfs = 3; // default 3 (-3 dBOv) int minLevel = 0; int maxLevel = 255; size_t samples = MIN(160, sampleRate / 100); if (samples == 0) return -1; const int maxSamples = 320; int16_t *input = buffer; size_t nTotal = (samplesCount / samples); void *agcInst = WebRtcAgc_Create(); if (agcInst == NULL) return -1; int status = WebRtcAgc_Init(agcInst, minLevel, maxLevel, agcMode, sampleRate); if (status != 0) { printf("WebRtcAgc_Init fail\n"); WebRtcAgc_Free(agcInst); return -1; } status = WebRtcAgc_set_config(agcInst, agcConfig); if (status != 0) { printf("WebRtcAgc_set_config fail\n"); WebRtcAgc_Free(agcInst); return -1; } size_t num_bands = 1; int inMicLevel, outMicLevel = -1; int16_t out_buffer[maxSamples]; int16_t *out16 = out_buffer; uint8_t saturationWarning = 1; //是否有溢出發生,增益放大以後的最大值超過了65536 int16_t echo = 0; //增益放大是否考慮回聲影響 for (int i = 0; i < nTotal; i++) { inMicLevel = 0; int nAgcRet = WebRtcAgc_Process(agcInst, (const int16_t *const *) &input, num_bands, samples, (int16_t *const *) &out16, inMicLevel, &outMicLevel, echo, &saturationWarning); if (nAgcRet != 0) { printf("failed in WebRtcAgc_Process\n"); WebRtcAgc_Free(agcInst); return -1; } memcpy(input, out_buffer, samples * sizeof(int16_t)); input += samples; } WebRtcAgc_Free(agcInst); return 1; } void auto_gain(char *in_file, char *out_file) { //音頻采樣率 uint32_t sampleRate = 0; //總音頻采樣數 uint64_t inSampleCount = 0; int16_t *inBuffer = wavRead_int16(in_file, &sampleRate, &inSampleCount); //如果加載成功 if (inBuffer != nullptr) { // kAgcModeAdaptiveAnalog 模擬音量調節 // kAgcModeAdaptiveDigital 自適應增益 // kAgcModeFixedDigital 固定增益 agcProcess(inBuffer, sampleRate, inSampleCount, kAgcModeAdaptiveDigital); wavWrite_int16(out_file, inBuffer, sampleRate, inSampleCount); free(inBuffer); } } int main(int argc, char *argv[]) { printf("WebRTC Automatic Gain Control\n"); printf("博客:http://cpuimage.cnblogs.com/\n"); printf("音頻自動增益\n"); if (argc < 2) return -1; char *in_file = argv[1]; char drive[3]; char dir[256]; char fname[256]; char ext[256]; char out_file[1024]; splitpath(in_file, drive, dir, fname, ext); sprintf(out_file, "%s%s%s_out%s", drive, dir, fname, ext); auto_gain(in_file, out_file); printf("按任意鍵退出程序 \n"); getchar(); return 0; }

靜音檢測完整示例代碼:

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
//采用https://github.com/mackron/dr_libs/blob/master/dr_wav.h 解碼
#define DR_WAV_IMPLEMENTATION

#include "dr_wav.h"
#include "vad.h"

#ifndef nullptr
#define nullptr 0
#endif

#ifndef MIN
#define  MIN(A, B)        ((A) < (B) ? (A) : (B))
#endif

#ifndef MAX
#define  MAX(A, B)        ((A) > (B) ? (A) : (B))
#endif


//讀取wav文件
int16_t *wavRead_int16(char *filename, uint32_t *sampleRate, uint64_t *totalSampleCount) {
    unsigned int channels;
    int16_t *buffer = drwav_open_and_read_file_s16(filename, &channels, sampleRate, totalSampleCount);
    if (buffer == nullptr) {
        printf("讀取wav文件失敗.");
    }
    //僅僅處理單通道音頻
    if (channels != 1) {
        drwav_free(buffer);
        buffer = nullptr;
        *sampleRate = 0;
        *totalSampleCount = 0;
    }
    return buffer;
}


int vadProcess(int16_t *buffer, uint32_t sampleRate, size_t samplesCount, int16_t vad_mode, int per_ms_frames) {
    if (buffer == nullptr) return -1;
    if (samplesCount == 0) return -1;
    // kValidRates : 8000, 16000, 32000, 48000
    // 10, 20 or 30 ms frames
    per_ms_frames = MAX(MIN(30, per_ms_frames), 10);
    size_t samples = sampleRate * per_ms_frames / 1000;
    if (samples == 0) return -1;
    int16_t *input = buffer;
    size_t nTotal = (samplesCount / samples);

    void *vadInst = WebRtcVad_Create();
    if (vadInst == NULL) return -1;
    int status = WebRtcVad_Init(vadInst);
    if (status != 0) {
        printf("WebRtcVad_Init fail\n");
        WebRtcVad_Free(vadInst);
        return -1;
    }
    status = WebRtcVad_set_mode(vadInst, vad_mode);
    if (status != 0) {
        printf("WebRtcVad_set_mode fail\n");
        WebRtcVad_Free(vadInst);
        return -1;
    }
    printf("Activity : \n");
    for (int i = 0; i < nTotal; i++) {
        int nVadRet = WebRtcVad_Process(vadInst, sampleRate, input, samples);
        if (nVadRet == -1) {
            printf("failed in WebRtcVad_Process\n");
            WebRtcVad_Free(vadInst);
            return -1;
        } else {
            // output result
            printf(" %d \t", nVadRet);
        }
        input += samples;
    }
    printf("\n");
    WebRtcVad_Free(vadInst);
    return 1;
}

void vad(char *in_file) {
    //音頻采樣率
    uint32_t sampleRate = 0;
    //總音頻采樣數
    uint64_t inSampleCount = 0;
    int16_t *inBuffer = wavRead_int16(in_file, &sampleRate, &inSampleCount);
    //如果加載成功
    if (inBuffer != nullptr) {
        //    Aggressiveness mode (0, 1, 2, or 3)
        int16_t mode = 1;
        int per_ms = 30;
        vadProcess(inBuffer, sampleRate, inSampleCount, mode, per_ms);
        free(inBuffer);
    }
}

int main(int argc, char *argv[]) {
    printf("WebRTC Voice Activity Detector\n");
    printf("博客:http://cpuimage.cnblogs.com/\n");
    printf("靜音檢測\n");
    if (argc < 2)
        return -1;
    char *in_file = argv[1];
    vad(in_file);
    printf("按任意鍵退出程序 \n");
    getchar();
    return 0;
}

自動增益項目地址:https://github.com/cpuimage/WebRTC_AGC

具體流程為:

加載wav(拖放wav文件到可執行文件上)->增益處理->保存為_out.wav文件

靜音檢測項目地址:https://github.com/cpuimage/WebRTC_VAD

具體流程為:

加載wav(拖放wav文件到可執行文件上)->輸出靜音檢測結果

備註 :1 為非靜音,0 為靜音

該註意的地方和參數,見代碼註釋。

用cmake即可進行編譯示例代碼,詳情見CMakeLists.txt。

若有其他相關問題或者需求也可以郵件聯系俺探討。

郵箱地址是:
[email protected]

音頻自動增益 與 靜音檢測 算法 附完整C代碼