1. 程式人生 > >RTP解析音視訊幀

RTP解析音視訊幀

RTP解析音視訊幀

RTP解析H264、AAC負載

RTSP中音視訊是通過RTP傳輸的,本文記錄從RTP解析出H264、AAC的過程。
協議介紹可參考 https://blog.csdn.net/lostyears/article/details/51374997


拿到RTP資料後,先去除12位元組RTP頭部,然後進行下面處理。

解析H264

資料較大的H264包,需要進行RTP分片傳送。
實現程式碼:

  /*
     * @par        pBufIn   待解析RTP(不包含12位元組頭)
     *             nLenIn   載荷長度
     *             pBufOut  裝載H264的buf(外部傳入,分配空間不小於nLenIn)
     *             nLenOut  一幀H264資料長度,函式返回true時有效
     *
     * @return    true     一幀結束
     *            false    分片未結束
     */
bool UnpackRtpH264(const UInt8 *pBufIn, const Int32 nLenIn, UInt8 *pBufOut, Int32& nLenOut) { bool bFinished = true; do { nLenOut = 0; const Int32 eFrameType = pBufIn[0] & 0x1F; if (eFrameType >= 1 && eFrameType <= 23) //單一NAL單元 { pBufOut[
0] = 0x00;//新增H264四位元組頭部 pBufOut[1] = 0x00; pBufOut[2] = 0x00; pBufOut[3] = 0x01; memcpy(pBufOut + 4, pBufIn, nLenIn); nLenOut = nLenIn + 4; } else //分片NAL單元,由多個RTP包拼接成完整的NAL單元 { bFinished = false; if (pBufIn[1] & 0x80) // 分片Nal單元開始位 { m_nH264FrmeSize = 0; pBufOut[0] = 0x00; pBufOut[1] = 0x00; pBufOut[2] = 0x00; pBufOut[3] = 0x01; pBufOut[4] = ((pBufIn[0] & 0xe0)|(pBufIn[1] & 0x1f));//取pBufIn[0]的前3位 與 pBufIn[1]的後5位 memcpy(pBufOut + 5, pBufIn + 2, nLenIn - 2); //跳過分片RTP的前兩位元組 m_nH264FrmeSize = nLenIn + 5 - 2; } else //後續的Nal單元載荷 { Assert(m_nH264FrmeSize + nLenIn - 2 <= MAX_FRAME_SISE); memcpy(pBufOut + m_nH264FrmeSize, pBufIn + 2, nLenIn -2);//跳過分片RTP的前兩位元組 m_nH264FrmeSize += nLenIn -2; if (pBufIn[1] & 0x40) // 分片Nal單元結束位 { nLenOut = m_nH264FrmeSize; m_nH264FrmeSize = 0; bFinished = true; } } } }while (0); return bFinished; }

解析AAC

這裡要注意,可能是一個RTP包含多個AAC幀,之前按網上找的RTP後直接負載1幀AAC,大部分場景沒問題,後面有個輸入源解析AAC後沒聲音,最後發現是一個RTP包含了多個AAC負載。解析協議規範最好還是花時間研讀協議規範文件,網上找的部落格介紹可能不夠全面,導致部分場景失效。
實現程式碼如下:

    /*
     * @par        pBufIn   待解析RTP(不包含12位元組頭)
     *             nLenIn   載荷長度
     *             pBufOut  裝載AAC的buf(外部傳入,分配空間不小於nLenIn)
     *             nLenOut  一幀AAC資料長度,函式返回true時有效
     *
     *             注:可能一個RTP包中包含多個AAC幀,是通過AU_HEADER_LENGTH(除以8得幀個數)來判斷
     *
     * @return    true     一幀結束
     *            false    分片未結束
     */
    bool UnpackRtpAAC(const UInt8 * pBufIn, const Int32 nLenIn, UInt8* pBufOut,  Int32& nLenOut)
    {
    bool bFinished = true;
    do
    {
        nLenOut = 0;

        Int32 nAuHeaderOffset = 0;//查詢頭部的偏移,每次2位元組
        const UInt16 AU_HEADER_LENGTH = (((pBufIn[nAuHeaderOffset] << 8) | pBufIn[nAuHeaderOffset + 1]) >> 4);//首2位元組表示Au-Header的長度,單位bit,所以除以16得到Au-Header位元組數
        nAuHeaderOffset += 2;
        Assert(nLenIn > (2 + AU_HEADER_LENGTH*2));
        vector<UInt32 > vecAacFrameLen[AU_HEADER_LENGTH];
        for (int i = 0; i < AU_HEADER_LENGTH; ++i)
        {
            const UInt16 AU_HEADER = ((pBufIn[nAuHeaderOffset] << 8) | pBufIn[nAuHeaderOffset + 1]);//之後的2位元組是AU_HEADER
            UInt32 nAac = (AU_HEADER >> 3);//其中高13位表示一幀AAC負載的位元組長度,低3位無用
            vecAacFrameLen->push_back(nAac);
            nAuHeaderOffset += 2;
        }

        const UInt8 *pAacPayload = pBufIn + nAuHeaderOffset;//真正AAC負載開始處
        UInt32 nAacPayloadOffset = 0;
        for (int j = 0; j < AU_HEADER_LENGTH; ++j)
        {
            const UInt32 nAac = vecAacFrameLen->at(j);
            //生成ADTS頭
            SAacParam param(nAac, m_AudioInfo.nSample, m_AudioInfo.nChannel);
            CADTS adts;
            adts.Init(param);

            //寫入ADTS頭
            memcpy(pBufOut + nLenOut, adts.GetBuf(), adts.GetBufSize());
            nLenOut += adts.GetBufSize();

            //寫入AAC負載
            memcpy(pBufOut + nLenOut, pAacPayload + nAacPayloadOffset, nAac);
            nLenOut += nAac;
            nAacPayloadOffset += nAac;
        }
        Assert((nLenIn - nAuHeaderOffset) == nAacPayloadOffset);
    } while (0);

    return bFinished;
}

封裝AAC的ADTS頭部

CADTS.h
#ifndef max
#define max(a, b) (((a) > (b)) ? (a) : (b))
#endif
#ifndef min
#define min(a, b) (((a) < (b)) ? (a) : (b))
#endif

#define BYTE_NUMBIT 8       /* bits in byte (char) */

#define N_ADTS_SIZE 7
/*
 * 定義是哪個級別的AAC
 */
enum eAACProfile
{
    E_AAC_PROFILE_MAIN_PROFILE = 0,
    E_AAC_PROFILE_LC,
    E_AAC_PROFILE_SSR,
    E_AAC_PROFILE_PROFILE_RESERVED,
};

enum eAACSample
{
    E_AAC_SAMPLE_96000_HZ = 0,
    E_AAC_SAMPLE_88200_HZ,
    E_AAC_SAMPLE_64000_HZ,
    E_AAC_SAMPLE_48000_HZ,
    E_AAC_SAMPLE_44100_HZ,
    E_AAC_SAMPLE_32000_HZ,
    E_AAC_SAMPLE_24000_HZ,
    E_AAC_SAMPLE_22050_HZ,
    E_AAC_SAMPLE_16000_HZ,
    E_AAC_SAMPLE_12000_HZ,
    E_AAC_SAMPLE_11025_HZ,
    E_AAC_SAMPLE_8000_HZ,
    E_AAC_SAMPLE_7350_HZ,
    E_AAC_SAMPLE_RESERVED,
};

enum eAACChannel
{
    E_AAC_CHANNEL_SPECIFC_CONFIG = 0,
    E_AAC_CHANNEL_MONO,
    E_AAC_CHANNEL_STEREO,
    E_AAC_CHANNEL_TRIPLE_TRACK,
    E_AAC_CHANNEL_4,
    E_AAC_CHANNEL_5,
    E_AAC_CHANNEL_6,
    E_AAC_CHANNEL_8,
    E_AAC_CHANNEL_RESERVED,
};

enum eMpegId
{
    E_MPEG4 = 0,
    E_MPEG_2
};


struct SAacParam
{
    SAacParam(UInt32 playod, Int32 sample, Int32 channel = 1, eAACProfile profile = E_AAC_PROFILE_LC, eMpegId id = E_MPEG4)
            :eId(id), eProfile(profile), nChannel(channel), nSample(sample), nPlayod(playod)
    {

    };
    eMpegId eId;
    eAACProfile eProfile;
    Int32 nChannel;
    Int32 nSample;
    UInt32 nPlayod;//aac負載大小(不包含ADTS頭)
};

class CADTS
{
public:
    CADTS();

public:
    /*
     * 初始化函式完成ADTS頭的填充
     */
    void Init(const SAacParam& aacHead);

    /*
     * 獲取ADTS頭地址
     */
    UInt8* GetBuf();

    /*
     * 獲取ADTS頭長度(位元組)
     */
    UInt32 GetBufSize() const ;

private:
    int PutBit(UInt32 data, int numBit);

    int WriteByte(UInt32 data, int numBit);
    /*
     * 取樣率下標
     */
    static eAACSample GetSampleIndex(const UInt32 nSample);

    /*
     * 聲道下標
     */
    static eAACChannel GetChannelIndex(const UInt32 nChannel);

private:
    UInt8                  m_pBuf[N_ADTS_SIZE]; //buffer的頭指標
    const UInt32           m_nBit;  //總位數
    UInt32                 m_curBit; //當前位數
};
CADTS.cpp
CADTS::CADTS():m_pBuf(),m_nBit(BYTE_NUMBIT*N_ADTS_SIZE),m_curBit(0)
{

}

void CADTS::Init(const SAacParam &aacHead)
{
    /* Fixed ADTS header */
    PutBit(0xFFFF, 12);// 12 bit Syncword
    PutBit(aacHead.eId, 1); //ID == 0 for MPEG4 AAC, 1 for MPEG2 AAC
    PutBit(0, 2); //layer == 0
    PutBit(1, 1); //protection absent
    PutBit(aacHead.eProfile, 2); //profile
    PutBit(CADTS::GetSampleIndex(aacHead.nSample), 4); //sampling rate
    PutBit(0, 1); //private bit
    PutBit(CADTS::GetChannelIndex(aacHead.nChannel), 3); //numChannels
    PutBit(0, 1); //original/copy
    PutBit(0, 1); // home
    /* Variable ADTS header */
    PutBit(0, 1); // copyr. id. bit
    PutBit(0, 1); // copyr. id. start
    PutBit(GetBufSize() + aacHead.nPlayod, 13); //ADTS幀的長度包括ADTS頭和AAC原始流
    PutBit(0x7FF, 11); // buffer fullness (0x7FF for VBR)
    PutBit(0 ,2); //raw data blocks (0+1=1)
}

UInt8 *CADTS::GetBuf()
{
    return m_pBuf;
}

UInt32 CADTS::GetBufSize() const
{
    return m_nBit/BYTE_NUMBIT;
}

int CADTS::PutBit(UInt32 data, int numBit)
{
    int num,maxNum,curNum;
    unsigned long bits;

    if (numBit == 0)
        return 0;

    /* write bits in packets according to buffer byte boundaries */
    num = 0;
    maxNum = BYTE_NUMBIT - m_curBit % BYTE_NUMBIT;
    while (num < numBit) {
        curNum = min(numBit-num,maxNum);
        bits = data>>(numBit-num-curNum);
        if (WriteByte(bits, curNum)) {
            return 1;
        }
        num += curNum;
        maxNum = BYTE_NUMBIT;
    }

    return 0;
}

int CADTS::WriteByte(UInt32 data, int numBit)
{
    long numUsed,idx;

    idx = (m_curBit / BYTE_NUMBIT) % N_ADTS_SIZE;
    numUsed = m_curBit % BYTE_NUMBIT;
#ifndef DRM
    if (numUsed == 0)
        m_pBuf[idx] = 0;
#endif
    m_pBuf[idx] |= (data & ((1<<numBit)-1)) << (BYTE_NUMBIT-numUsed-numBit);
    m_curBit += numBit;

    return 0;
}


eAACSample CADTS::GetSampleIndex(const UInt32 nSample)
{
    eAACSample eSample = E_AAC_SAMPLE_RESERVED;
    static std::map<UInt32 , eAACSample> mpSample;
    if (mpSample.empty())
    {
        mpSample[96000] = E_AAC_SAMPLE_96000_HZ;
        mpSample[88200] = E_AAC_SAMPLE_88200_HZ;
        mpSample[64000] = E_AAC_SAMPLE_64000_HZ;
        mpSample[48000] = E_AAC_SAMPLE_48000_HZ;
        mpSample[44100] = E_AAC_SAMPLE_44100_HZ;
        mpSample[32000] = E_AAC_SAMPLE_32000_HZ;
        mpSample[24000] = E_AAC_SAMPLE_24000_HZ;
        mpSample[22050] = E_AAC_SAMPLE_22050_HZ;
        mpSample[16000] = E_AAC_SAMPLE_16000_HZ;
        mpSample[12000] = E_AAC_SAMPLE_12000_HZ;
        mpSample[11025] = E_AAC_SAMPLE_11025_HZ;
        mpSample[8000]  = E_AAC_SAMPLE_8000_HZ;
        mpSample[7350]  = E_AAC_SAMPLE_7350_HZ;
    };
    if (mpSample.find(nSample) != mpSample.end())
    {
        eSample = mpSample[nSample];
    }
    return eSample;
}

eAACChannel CADTS::GetChannelIndex(const UInt32 nChannel)
{
    eAACChannel eChannel = E_AAC_CHANNEL_RESERVED;
    static std::map<UInt32 , eAACChannel> mpChannel;
    if (mpChannel.empty())
    {
        mpChannel[0] = E_AAC_CHANNEL_SPECIFC_CONFIG;
        mpChannel[1] = E_AAC_CHANNEL_MONO;
        mpChannel[2] = E_AAC_CHANNEL_STEREO;
        mpChannel[3] = E_AAC_CHANNEL_TRIPLE_TRACK;
        mpChannel[4] = E_AAC_CHANNEL_4;
        mpChannel[5] = E_AAC_CHANNEL_5;
        mpChannel[6] = E_AAC_CHANNEL_6;
        mpChannel[8] = E_AAC_CHANNEL_8;
    };
    if (mpChannel.find(nChannel) != mpChannel.end())
    {
        eChannel = mpChannel[nChannel];
    }
    return eChannel;
}

採坑心得

1、協議解析優先考慮成熟的開原始碼,例如ffmpeg,流媒體相關的協議裡面基本都有實現;
2、如果找不到成熟開原始碼做參考,搜尋協議規範文件,不復雜的話照著文件一步步做吧,規範文件比較系統全面,比網上東拼西湊找的靠譜,最後花的時間可能比亂搜一通要少,而且自己解析印象更深刻。

附錄:音訊抓包分析

在這裡插入圖片描述