1. 程式人生 > >Android語音轉文字一使用AudioRecord錄音

Android語音轉文字一使用AudioRecord錄音

  參考:https://www.cnblogs.com/sowhat4999/p/4439837.html

 為什麼不直接使用谷歌封裝好的MediaRecorder,而使用AudioRecord錄音呢?因為專案中需要將語音轉成文字,訊飛語音聽寫只支援PCM和WAV格式的音訊,轉成wav格式也是為了方便播放語音

下面開始上程式碼吧;

public class AudioRecorder {
  //取樣率:8000Hz,電話所用取樣率, 對於人的說話已經足夠
  public static final int LONG_SAMPLE_RATE = 8000;
  // 音訊資料格式:PCM 16位每個樣本。保證裝置支援。PCM 8位每個樣本。不一定能得到裝置支援。  
  public static final int ENCODING_PCM_16_BIT = AudioFormat.ENCODING_PCM_16BIT;
  // 設定音訊的錄製的聲道CHANNEL_IN_STEREO為雙聲道,CHANNEL_CONFIGURATION_MONO為單聲道  
  public static final int CHANNEL_IN_STEREO = AudioFormat.CHANNEL_IN_STEREO;

  private int bufferSizeInBytes;
  //儲存裸資料檔案路徑
  private String mRawFilePath;

  //儲存WAV檔案路徑
  private String mWavFilePath;

  private AudioRecord mAudioRecord;
  //是否取消錄音
  private boolean mCancel;
  //是否正在錄音
  private boolean mRunning;
  private Context mContext;
  private static AudioRecorder mAudioRecorder;
  private final ThreadPoolProxy mThreadPoolProxy;
  private OnRecordCompleteListener mCompleteListener;
  private long mStartTimes;
  private int mLen;

  private AudioRecorder(Context context) {
    mContext = context;
    //初始化執行緒池,避免執行緒過多建立,回收困難,記憶體消耗過多.當然也可以new Thread建立執行緒
    mThreadPoolProxy = ThreadPoolProxy.getInstance();
    //建立一個buffer緩衝區
    bufferSizeInBytes =
        AudioRecord.getMinBufferSize(LONG_SAMPLE_RATE, CHANNEL_IN_STEREO,
            ENCODING_PCM_16_BIT);
  }

  public void start() {
    if (mRunning) {
      return;
    }

    if (mAudioRecord == null) {
      mAudioRecord = new AudioRecord(MediaRecorder.AudioSource.MIC, LONG_SAMPLE_RATE,
          CHANNEL_IN_STEREO, ENCODING_PCM_16_BIT, bufferSizeInBytes);
    }
    mAudioRecord.startRecording();
    mStartTimes = System.currentTimeMillis();
    mRunning = true;
    mCancel = false;
    mThreadPoolProxy.excute(mRunnable);
  }



  private Runnable mRunnable = () -> {
    writeDateTOFile();
    rawToWav(mRawFilePath, mWavFilePath);
    if (mCompleteListener != null) {
      mCompleteListener.onRecordComplete(mWavFilePath);
    }
  };
  
  public void stop() {
    if (mAudioRecord != null) {
      mLen = (int) (System.currentTimeMillis() - mStartTimes) / 1000;
      mCompleteListener.onRecordComplete(mLen);
      mRunning = false;
      mAudioRecord.stop();
      mAudioRecord.release();
      mAudioRecord = null;
    }
  }

儲存音訊資料

 private void writeDateTOFile() {
    byte[] audiodata = new byte[bufferSizeInBytes];
    FileOutputStream fos = null;

    final File dir = new File(FileUtils.getAppCacheDir(mContext) + "/audio");
    if (!dir.exists()) {
      dir.mkdir();
    }

    final String cacheDir = dir + File.separator + System.currentTimeMillis();

    mRawFilePath = cacheDir + "R.raw";

    mWavFilePath = cacheDir + "W.wav";
    try {
      fos = new FileOutputStream(mRawFilePath);

      while (!mCancel) {
        int readsize = mAudioRecord.read(audiodata, 0, bufferSizeInBytes);

        if (android.media.AudioRecord.ERROR_INVALID_OPERATION != readsize) {
          fos.write(audiodata);
        }
      }
    } catch (Exception e) {
      e.printStackTrace();
    } finally {
      ioClose(fos);
    }
  }

  /***
   *raw檔案轉換成wav檔案
   * @param rawPath 未經處理的音訊檔案路徑
   * @param wavPath 要儲存的wav檔案路徑
   */
  private void rawToWav(String rawPath, String wavPath) {
    FileInputStream fis = null;
    FileOutputStream fos = null;
    long byteRate = 16 * LONG_SAMPLE_RATE * 2 / 8;

    try {
      byte[] audiodata = new byte[bufferSizeInBytes];
      fis = new FileInputStream(rawPath);
      fos = new FileOutputStream(wavPath);
      long totalAudioLen = fis.getChannel().size();
      long totalDataLen = totalAudioLen + 36;

      writeWavFileHeader(fos, totalAudioLen, totalDataLen, byteRate);
      while (fis.read(audiodata) != -1) {
        //如果取消錄音,結束寫入檔案操作,並將檔案刪除
        if (mCancel) {
          FileUtils.deleteFile(wavPath);
          break;
        }
        fos.write(audiodata);
      }
    } catch (Exception e) {
      e.printStackTrace();
    } finally {
      ioClose(fis);
      ioClose(fos);
    }
    File pcm = new File(rawPath);
    if (pcm.exists()) {
      pcm.delete();
    }
  }

  /***
   *寫入wav格式頭資料
   * @param fos 輸出
   * @param totalAudioLen 音訊長度
   * @param totalDataLen  音訊長度+頭部欄位的大小
   * @param byteRate
   * @throws IOException
   */
  private void writeWavFileHeader(FileOutputStream fos, long totalAudioLen, long totalDataLen,
      long byteRate) throws IOException {
    byte[] header = new byte[44];
    header[0] = 'R';
    header[1] = 'I';
    header[2] = 'F';
    header[3] = 'F';
    header[4] = (byte) (totalDataLen & 0xff);
    header[5] = (byte) ((totalDataLen >> 8) & 0xff);
    header[6] = (byte) ((totalDataLen >> 16) & 0xff);
    header[7] = (byte) ((totalDataLen >> 24) & 0xff);
    header[8] = 'W';
    header[9] = 'A';
    header[10] = 'V';
    header[11] = 'E';
    header[12] = 'f';
    header[13] = 'm';
    header[14] = 't';
    header[15] = ' ';
    header[16] = 16;
    header[17] = 0;
    header[18] = 0;
    header[19] = 0;
    header[20] = 1;
    header[21] = 0;
    header[22] = (byte) 2;
    header[23] = 0;
    header[24] = (byte) (LONG_SAMPLE_RATE & 0xff);
    header[25] = (byte) ((LONG_SAMPLE_RATE >> 8) & 0xff);
    header[26] = (byte) ((LONG_SAMPLE_RATE >> 16) & 0xff);
    header[27] = (byte) ((LONG_SAMPLE_RATE >> 24) & 0xff);
    header[28] = (byte) (byteRate & 0xff);
    header[29] = (byte) ((byteRate >> 8) & 0xff);
    header[30] = (byte) ((byteRate >> 16) & 0xff);
    header[31] = (byte) ((byteRate >> 24) & 0xff);
    header[32] = (byte) (2 * 16 / 8);
    header[33] = 0;
    header[34] = 16;
    header[35] = 0;
    header[36] = 'd';
    header[37] = 'a';
    header[38] = 't';
    header[39] = 'a';
    header[40] = (byte) (totalAudioLen & 0xff);
    header[41] = (byte) ((totalAudioLen >> 8) & 0xff);
    header[42] = (byte) ((totalAudioLen >> 16) & 0xff);
    header[43] = (byte) ((totalAudioLen >> 24) & 0xff);
    fos.write(header, 0, 44);
  }

  public void cancel() {
    mCancel = true;
    stop();
  }

  public void setOnRecordCompleteListener(OnRecordCompleteListener completeListener) {
    mCompleteListener = completeListener;
  }

  void ioClose(Closeable fos) {
    try {
      if (fos != null) {
        fos.close();
      }
    } catch (IOException e) {
      e.printStackTrace();
    }
  }

  public static AudioRecorder getInstance(Context context) {
    if (mAudioRecorder == null) {
      synchronized (AudioRecorder.class) {
        if (mAudioRecorder == null) {
          mAudioRecorder = new AudioRecorder(context);
        }
      }
    }
    return mAudioRecorder;
  }

  public boolean isRunning() {
    return mRunning;
  }
}

執行緒池管理

public class ThreadPoolProxy {

  private final ThreadPoolExecutor mSingleThreadPool;

  private ThreadPoolProxy(){
    ThreadFactory namedThreadFactory = new ThreadFactoryBuilder()
        .setNameFormat("pool-%d").build();
    mSingleThreadPool = new ThreadPoolExecutor(3, 6,
        0L, TimeUnit.MILLISECONDS,
        new LinkedBlockingQueue<Runnable>(1024), namedThreadFactory, new ThreadPoolExecutor.AbortPolicy());

  }

  public static ThreadPoolProxy getInstance(){
    return  InstanceHolder.threadPoolProxy;
  }

  public void excute(Runnable runnable) {
    mSingleThreadPool.execute(runnable);
  }

  public void remove(Runnable runnable) {
   mSingleThreadPool.remove(runnable);

  }

  static class InstanceHolder{
   private static final ThreadPoolProxy threadPoolProxy  = new ThreadPoolProxy();
 }
}