1. 程式人生 > >解複用FLV檔案(基於FFMPEG解析FLV(h264+aac))

解複用FLV檔案(基於FFMPEG解析FLV(h264+aac))

技術在於交流、溝通,轉載請註明出處並保持作品的完整性。

原文: https://blog.csdn.net/hiwubihe/article/details/82346759

[本系列相關文章]

本篇介紹基於FFMPEG解析FLV檔案,FLV由H264視訊和AAC音訊組成。FFMPEG 解複用出的音視訊AVPacket,直接寫入檔案播放不了。FLV格式最常用的封裝組合就是H264+AAC,FFMPEG解複用的結果AVPacket只包含實際的壓縮音視訊資料,不包括解碼必須的資訊,H264的SPS+PPS,AAC的視訊頭ADTS等。這就需要從FLV中提取SPS+PPS資訊,和ADTS資訊加到音視訊幀前面。

1.FLV檔案AVCDecoderConfigurationRecord

封裝FLV,一般第一個VideoTag就是AVCDecoderConfigurationRecord結構,該結構就是H264的SPS+PPS資訊

 AVCDecoderConfigurationRecord定義如下

aligned(8) class AVCDecoderConfigurationRecord {
	unsigned int(8) configurationVersion = 1;
	unsigned int(8) AVCProfileIndication;
	unsigned int(8) profile_compatibility;
	unsigned int(8) AVCLevelIndication; 
	bit(6) reserved = ‘111111’b;
	unsigned int(2) lengthSizeMinusOne; 
	bit(3) reserved = ‘111’b;
	unsigned int(5) numOfSequenceParameterSets;
	for (i=0; i< numOfSequenceParameterSets;  i++) {
		unsigned int(16) sequenceParameterSetLength ;
		bit(8*sequenceParameterSetLength) sequenceParameterSetNALUnit;
	}
	unsigned int(8) numOfPictureParameterSets;
	for (i=0; i< numOfPictureParameterSets;  i++) {
		unsigned int(16) pictureParameterSetLength;
		bit(8*pictureParameterSetLength) pictureParameterSetNALUnit;
	}
	bit(6) reserved = ‘111111’b;
	unsigned int(2) chroma_format;
	bit(5) reserved = ‘11111’b;
	unsigned int(3) bit_depth_luma_minus8;
	bit(5) reserved = ‘11111’b;
	unsigned int(3) bit_depth_chroma_minus8;
	unsigned int(8) numOfSequenceParameterSetExt;
	for (i=0; i< numOfSequenceParameterSetExt; i++) {
		unsigned int(16) sequenceParameterSetExtLength;
		bit(8*sequenceParameterSetExtLength) sequenceParameterSetExtNALUnit;
	}
}

2.FLV檔案AudioSpecificConfig


封裝FLV,一般第一個AudioTag就是AudioSpecificConfig結構,該結構就是AAC封裝頭ADTS需要的資訊,取樣率,通道數,樣本深度等資訊。

 AudioSpecificConfig在所有封裝AAC的檔案結構中都有,如FLV/MP4。 AudioSpecificConfig結構比較複雜。一般結構就是2個位元組。在MP4檔案中一般叫AES,一般結構如下表                                     

總體結構
5 bits: object type
if (object type == 31)
    6 bits + 32: object type
4 bits: frequency index
if (frequency index == 15)
    24 bits: frequency
4 bits: channel configuration
var bits: AOT Specific Config
ObjectType
  • 0: Null
  • 1: AAC Main
  • 2: AAC LC (Low Complexity)
  • 3: AAC SSR (Scalable Sample Rate)
  • 4: AAC LTP (Long Term Prediction)
  • 5: SBR (Spectral Band Replication)
  • 6: AAC Scalable
  • 7: TwinVQ
  • 8: CELP (Code Excited Linear Prediction)
  • 9: HXVC (Harmonic Vector eXcitation Coding)
  • 10: Reserved
  • 11: Reserved
  • 12: TTSI (Text-To-Speech Interface)
  • 13: Main Synthesis
  • 14: Wavetable Synthesis
  • 15: General MIDI
  • 16: Algorithmic Synthesis and Audio Effects
  • 17: ER (Error Resilient) AAC LC
  • 18: Reserved
  • 19: ER AAC LTP
  • 20: ER AAC Scalable
  • 21: ER TwinVQ
  • 22: ER BSAC (Bit-Sliced Arithmetic Coding)
  • 23: ER AAC LD (Low Delay)
  • 24: ER CELP
  • 25: ER HVXC
  • 26: ER HILN (Harmonic and Individual Lines plus Noise)
  • 27: ER Parametric
  • 28: SSC (SinuSoidal Coding)
  • 29: PS (Parametric Stereo)
  • 30: MPEG Surround
  • 31: (Escape value)
  • 32: Layer-1
  • 33: Layer-2
  • 34: Layer-3
  • 35: DST (Direct Stream Transfer)
  • 36: ALS (Audio Lossless)
  • 37: SLS (Scalable LosslesS)
  • 38: SLS non-core
  • 39: ER AAC ELD (Enhanced Low Delay)
  • 40: SMR (Symbolic Music Representation) Simple
  • 41: SMR Main
  • 42: USAC (Unified Speech and Audio Coding) (no SBR)
  • 43: SAOC (Spatial Audio Object Coding)
  • 44: LD MPEG Surround
  • 45: USAC
取樣率索引
  • 0: 96000 Hz
  • 1: 88200 Hz
  • 2: 64000 Hz
  • 3: 48000 Hz
  • 4: 44100 Hz
  • 5: 32000 Hz
  • 6: 24000 Hz
  • 7: 22050 Hz
  • 8: 16000 Hz
  • 9: 12000 Hz
  • 10: 11025 Hz
  • 11: 8000 Hz
  • 12: 7350 Hz
  • 13: Reserved
  • 14: Reserved
  • 15: frequency is written explictly
Channels
  • 0: Defined in AOT Specifc Config
  • 1: 1 channel: front-center
  • 2: 2 channels: front-left, front-right
  • 3: 3 channels: front-center, front-left, front-right
  • 4: 4 channels: front-center, front-left, front-right, back-center
  • 5: 5 channels: front-center, front-left, front-right, back-left, back-right
  • 6: 6 channels: front-center, front-left, front-right, back-left, back-right, LFE-channel
  • 7: 8 channels: front-center, front-left, front-right, side-left, side-right, back-left, back-right, LFE-channel
  • 8-15: Reserved

解析AES程式碼

typedef struct  

{
	int write_adts;  
	int objecttype;  
	int sample_rate_index;  
	int channel_conf;  

}ADTSContext;

int aac_decode_extradata(ADTSContext *adts, unsigned char *pbuf, int bufsize)  
{  
	int aot, aotext, samfreindex;  
	int i, channelconfig;  
	unsigned char *p = pbuf;  
	if (!adts || !pbuf || bufsize<2)  
	{  
		return -1;  
	}  
	aot = (p[0]>>3)&0x1f;  
	if (aot == 31)  
	{  
		aotext = (p[0]<<3 | (p[1]>>5)) & 0x3f;  
		aot = 32 + aotext;  
		samfreindex = (p[1]>>1) & 0x0f;   
		if (samfreindex == 0x0f)  
		{  
			channelconfig = ((p[4]<<3) | (p[5]>>5)) & 0x0f;  
		}  
		else  
		{  
			channelconfig = ((p[1]<<3)|(p[2]>>5)) & 0x0f;  
		}  
	}  
	else  
	{  
		samfreindex = ((p[0]<<1)|p[1]>>7) & 0x0f;  
		if (samfreindex == 0x0f)  
		{  
			channelconfig = (p[4]>>3) & 0x0f;  
		}  
		else  
		{  
			channelconfig = (p[1]>>3) & 0x0f;  
		}  
	}  
#ifdef AOT_PROFILE_CTRL  
	if (aot < 2) aot = 2;  
#endif  
	adts->objecttype = aot-1;  
	adts->sample_rate_index = samfreindex;  
	adts->channel_conf = channelconfig;  
	adts->write_adts = 1;  
	return 0;  
}  

構造AES程式碼

//獲取音訊AES值
bool GetDecoderSpecificInfo(AdtsHeadInfo stAdtsHeadInfo,unsigned char*pBuffer,unsigned long &lSizeOfDecoderSpecificInfo)
{

	lSizeOfDecoderSpecificInfo = 2;
	memset(pBuffer,0,lSizeOfDecoderSpecificInfo);
	bits_buffer_t bw;

	bits_initwrite (&bw, lSizeOfDecoderSpecificInfo, pBuffer);
	bits_write (&bw, 5, stAdtsHeadInfo.iProfile);
	bits_write (&bw, 4, stAdtsHeadInfo.iSampleRateIndex);
	bits_write (&bw, 4, stAdtsHeadInfo.iChans);

	memcpy(pBuffer, bw.p_data, lSizeOfDecoderSpecificInfo);
	

	return true;
	
}

 

3.FFMPEG處理原理

在av_read_frame函式呼叫中,如果讀到AVCDecoderConfigurationRecord或者AudioSpecificConfig結構時,會把資訊儲存在每個stream的streams[index]->codec->extradata擴充套件資料中。處理H264資料時,提供一個叫 “h264_mp4toannexb”位元流過濾器,在av_read_frame讀取到正常資料包時,需要呼叫位元流過濾器處理每個資料包,其實就是判斷是否是I幀,是I幀的話,解析AVCDecoderConfigurationRecord結構成SPS+PPS,然後把SPS+PPS儲存加到I幀前面。可以參考FFMPEG原始碼,版本3.2.4.r86064。下面程式碼就是迴圈處理接受到的資料,新增SPS/PPS。

static int h264_mp4toannexb_filter(AVBSFContext *ctx, AVPacket *out)
{
    H264BSFContext *s = ctx->priv_data;

    AVPacket *in;
    uint8_t unit_type;
    int32_t nal_size;
    uint32_t cumul_size    = 0;
    const uint8_t *buf;
    const uint8_t *buf_end;
    int            buf_size;
    int ret = 0, i;

    //讀取的資料包複製到in
    ret = ff_bsf_get_packet(ctx, &in);
    if (ret < 0)
        return ret;

    /* nothing to filter */
    //不需要過濾 直接賦值
    if (!s->extradata_parsed)
    {
        av_packet_move_ref(out, in);
        av_packet_free(&in);
        return 0;
    }

    buf      = in->data;
    buf_size = in->size;
    buf_end  = in->data + in->size;

    do
    {
        ret= AVERROR(EINVAL);
        //長度錯誤
        if (buf + s->length_size > buf_end)
            goto fail;
        //獲取NALU長度
        for (nal_size = 0, i = 0; i<s->length_size; i++)
            nal_size = (nal_size << 8) | buf[i];
        //向後移動s->length_size
        buf += s->length_size;
        //NALU type
        unit_type = *buf & 0x1f;
        //長度出錯
        if (nal_size > buf_end - buf || nal_size < 0)
            goto fail;
        //SPS資料 新的IDR標誌 SPS直接拷貝加到快取中
        if (unit_type == 7)
            s->idr_sps_seen = s->new_idr = 1;
        //PPS資料 新的IDR標誌
        else if (unit_type == 8)
        {
            s->idr_pps_seen = s->new_idr = 1;
            /* if SPS has not been seen yet, prepend the AVCC one to PPS */
            //當前是收到pps包 前面卻沒有SPS包,需要從擴充套件資料中把SPS加上
            //如果有sps 直接走到 一般資料拷貝else
            if (!s->idr_sps_seen)
            {
                if (s->sps_offset == -1)
                    av_log(ctx, AV_LOG_WARNING, "SPS not present in the stream, nor in AVCC, stream may be unreadable\n");
                else
                {
                    //把sps和pps 都拷貝到資料快取中
                    if ((ret = alloc_and_copy(out,
                                              ctx->par_out->extradata + s->sps_offset,
                                              s->pps_offset != -1 ? s->pps_offset : ctx->par_out->extradata_size - s->sps_offset,
                                              buf, nal_size)) < 0)
                        goto fail;
                    s->idr_sps_seen = 1;
                    goto next_nal;
                }
            }
        }

        /* if this is a new IDR picture following an IDR picture, reset the idr flag.
         * Just check first_mb_in_slice to be 0 as this is the simplest solution.
         * This could be checking idr_pic_id instead, but would complexify the parsing. */
        if (!s->new_idr && unit_type == 5 && (buf[1] & 0x80))
            s->new_idr = 1;

        /* prepend only to the first type 5 NAL unit of an IDR picture, if no sps/pps are already present */
        //新的IDR
        if (s->new_idr && unit_type == 5 && !s->idr_sps_seen && !s->idr_pps_seen)
        {
            if ((ret=alloc_and_copy(out,
                                    ctx->par_out->extradata, ctx->par_out->extradata_size,
                                    buf, nal_size)) < 0)
                goto fail;
            s->new_idr = 0;
            /* if only SPS has been seen, also insert PPS */
        }
        else if (s->new_idr && unit_type == 5 && s->idr_sps_seen && !s->idr_pps_seen)
        {
            if (s->pps_offset == -1)
            {
                av_log(ctx, AV_LOG_WARNING, "PPS not present in the stream, nor in AVCC, stream may be unreadable\n");
                if ((ret = alloc_and_copy(out, NULL, 0, buf, nal_size)) < 0)
                    goto fail;
            }
            else if ((ret = alloc_and_copy(out,
                                           ctx->par_out->extradata + s->pps_offset, ctx->par_out->extradata_size - s->pps_offset,
                                           buf, nal_size)) < 0)
                goto fail;
        }
        else
        {
            if ((ret=alloc_and_copy(out, NULL, 0, buf, nal_size)) < 0)
                goto fail;
            if (!s->new_idr && unit_type == 1)
            {
                s->new_idr = 1;
                s->idr_sps_seen = 0;
                s->idr_pps_seen = 0;
            }
        }

next_nal:
        buf        += nal_size;
        cumul_size += nal_size + s->length_size;
    }
    while (cumul_size < buf_size);

    ret = av_packet_copy_props(out, in);
    if (ret < 0)
        goto fail;

fail:
    if (ret < 0)
        av_packet_unref(out);
    av_packet_free(&in);

    return ret;
}

4.解析AudioSpecificConfig

AAC也會把AudioSpecificConfig儲存到streams[index]->codec->extradata擴充套件資料中,但是FFMPEG 沒有提供加ADTS的位元流過濾器,需要自己解析AudioSpecificConfig,然後新增到AAC幀前面。

//解析AudioSpecificConfig
int aac_decode_extradata(ADTSContext *adts, unsigned char *pbuf, int bufsize)  
{  
	int aot, aotext, samfreindex;  
	int i, channelconfig;  
	unsigned char *p = pbuf;  
	if (!adts || !pbuf || bufsize<2)  
	{  
		return -1;  
	}  
	aot = (p[0]>>3)&0x1f;  
	if (aot == 31)  
	{  
		aotext = (p[0]<<3 | (p[1]>>5)) & 0x3f;  
		aot = 32 + aotext;  
		samfreindex = (p[1]>>1) & 0x0f;   
		if (samfreindex == 0x0f)  
		{  
			channelconfig = ((p[4]<<3) | (p[5]>>5)) & 0x0f;  
		}  
		else  
		{  
			channelconfig = ((p[1]<<3)|(p[2]>>5)) & 0x0f;  
		}  
	}  
	else  
	{  
		samfreindex = ((p[0]<<1)|p[1]>>7) & 0x0f;  
		if (samfreindex == 0x0f)  
		{  
			channelconfig = (p[4]>>3) & 0x0f;  
		}  
		else  
		{  
			channelconfig = (p[1]>>3) & 0x0f;  
		}  
	}  
#ifdef AOT_PROFILE_CTRL  
	if (aot < 2) aot = 2;  
#endif  
	adts->objecttype = aot-1;  
	adts->sample_rate_index = samfreindex;  
	adts->channel_conf = channelconfig;  
	adts->write_adts = 1;  
	return 0;  
}  

5.DEMO程式

程式運用ffmpeg把h264+aac封裝的FLV,解複用成兩個檔案,h264檔案和AAC檔案。

/*******************************************************************************
Copyright (c) wubihe Tech. Co., Ltd. All rights reserved.
--------------------------------------------------------------------------------

Date Created:	2014-10-25
Author:			wubihe QQ:1269122125 Email:[email protected]
Description:	解複用flv儲存成h264檔案和aac檔案
--------------------------------------------------------------------------------
Modification History
DATE          AUTHOR          DESCRIPTION
--------------------------------------------------------------------------------

********************************************************************************/

#include <stdio.h>

#define __STDC_CONSTANT_MACROS


extern "C"
{
#include "libavformat/avformat.h"
};



//封裝格式MKV/MP4/FLV中如果有AAC的情況,首先這些封裝格式中包含AudioSpecificConfig,
//儲存在音訊流的AVCodecContext->extradata 裡面,需要解析然後封裝ADTS即可



#define DEMUXER_AAC 1
#define DEMUXER_MP3 0

#define  ADTS_HEADER_SIZE (7)

//FLV封裝音視訊 AAC封裝在第一個AAC TAG會封裝一個AudioSpecificConfig結構 
//AudioSpecificConfig解析結果儲存在該結構體中
typedef struct  
{
	int write_adts;  
	int objecttype;  
	int sample_rate_index;  
	int channel_conf;  

}ADTSContext;  


//解析AudioSpecificConfig
int aac_decode_extradata(ADTSContext *adts, unsigned char *pbuf, int bufsize)  
{  
	int aot, aotext, samfreindex;  
	int i, channelconfig;  
	unsigned char *p = pbuf;  
	if (!adts || !pbuf || bufsize<2)  
	{  
		return -1;  
	}  
	aot = (p[0]>>3)&0x1f;  
	if (aot == 31)  
	{  
		aotext = (p[0]<<3 | (p[1]>>5)) & 0x3f;  
		aot = 32 + aotext;  
		samfreindex = (p[1]>>1) & 0x0f;   
		if (samfreindex == 0x0f)  
		{  
			channelconfig = ((p[4]<<3) | (p[5]>>5)) & 0x0f;  
		}  
		else  
		{  
			channelconfig = ((p[1]<<3)|(p[2]>>5)) & 0x0f;  
		}  
	}  
	else  
	{  
		samfreindex = ((p[0]<<1)|p[1]>>7) & 0x0f;  
		if (samfreindex == 0x0f)  
		{  
			channelconfig = (p[4]>>3) & 0x0f;  
		}  
		else  
		{  
			channelconfig = (p[1]>>3) & 0x0f;  
		}  
	}  
#ifdef AOT_PROFILE_CTRL  
	if (aot < 2) aot = 2;  
#endif  
	adts->objecttype = aot-1;  
	adts->sample_rate_index = samfreindex;  
	adts->channel_conf = channelconfig;  
	adts->write_adts = 1;  
	return 0;  
}  

//新增ADTS頭
int aac_set_adts_head(ADTSContext *acfg, unsigned char *buf, int size)  
{         
	unsigned char byte;    
	if (size < ADTS_HEADER_SIZE)  
	{  
		return -1;  
	}       
	buf[0] = 0xff;  
	buf[1] = 0xf1;  
	byte = 0;  
	byte |= (acfg->objecttype & 0x03) << 6;  
	byte |= (acfg->sample_rate_index & 0x0f) << 2;  
	byte |= (acfg->channel_conf & 0x07) >> 2;  
	buf[2] = byte;  
	byte = 0;  
	byte |= (acfg->channel_conf & 0x07) << 6;  
	byte |= (ADTS_HEADER_SIZE + size) >> 11;  
	buf[3] = byte;  
	byte = 0;  
	byte |= (ADTS_HEADER_SIZE + size) >> 3;  
	buf[4] = byte;  
	byte = 0;  
	byte |= ((ADTS_HEADER_SIZE + size) & 0x7) << 5;  
	byte |= (0x7ff >> 6) & 0x1f;  
	buf[5] = byte;  
	byte = 0;  
	byte |= (0x7ff & 0x3f) << 2;  
	buf[6] = byte;     
	return 0;  
}  






int main(int argc, char* argv[])
{

	AVFormatContext *ifmt_ctx = NULL;
	AVPacket pkt;
	int ret, i;
	int videoindex=-1,audioindex=-1;
	#if DEMUXER_AAC
	const char *in_filename    = "titanic.flv";		//Input file URL
	const char *out_filename_v = "titanic.h264";	//Output file URL
	const char *out_filename_a = "titanic.aac";
	#endif

	#if DEMUXER_MP3
	const char *in_filename    = "titanic.flv";		//Input file URL
	const char *out_filename_v = "titanic.h264";	//Output file URL
	const char *out_filename_a = "titanic.mp3";
	#endif




	av_register_all();
	//Input
	if ((ret = avformat_open_input(&ifmt_ctx, in_filename, 0, 0)) < 0) 
	{
		printf( "Could not open input file.");
		return -1;
	}
	if ((ret = avformat_find_stream_info(ifmt_ctx, 0)) < 0) 
	{
		printf( "Failed to retrieve input stream information");
		return -1;
	}

	videoindex=-1;
	for(i=0; i<ifmt_ctx->nb_streams; i++) 
	{
		if(ifmt_ctx->streams[i]->codec->codec_type==AVMEDIA_TYPE_VIDEO)
		{
			videoindex=i;
		}else if(ifmt_ctx->streams[i]->codec->codec_type==AVMEDIA_TYPE_AUDIO)
		{
			audioindex=i;
		}
	}

	FILE *fp_audio=fopen(out_filename_a,"wb+");  
	FILE *fp_video=fopen(out_filename_v,"wb+");  

	//FLV/MP4/MKV等結構中,h264需要h264_mp4toannexb處理。新增SPS/PPS等資訊。FLV封裝時,可以把
	//多個NALU放在一個VIDEO TAG中,結構為4B NALU長度+NALU1+4B NALU長度+NALU2+...,需要做的處理把4B
	//長度換成00000001或者000001

	AVBitStreamFilterContext* h264bsfc =  av_bitstream_filter_init("h264_mp4toannexb"); 

#if DEMUXER_AAC
	ADTSContext stADTSContext;
	unsigned char pAdtsHead[7];
#endif

	while(av_read_frame(ifmt_ctx, &pkt)>=0)
	{
		if(pkt.stream_index==videoindex)
		{

			av_bitstream_filter_filter(h264bsfc, ifmt_ctx->streams[videoindex]->codec, NULL, &pkt.data, &pkt.size, pkt.data, pkt.size, 0);

			printf("Write Video Packet. size:%d\tpts:%lld\n",pkt.size,pkt.pts);
			fwrite(pkt.data,1,pkt.size,fp_video);
		}
		else if(pkt.stream_index==audioindex)
		{
			//AAC在封裝結構MKV/FLV/MP4結構中,需要手動新增ADTS
	#if DEMUXER_AAC
			aac_decode_extradata(&stADTSContext, ifmt_ctx->streams[audioindex]->codec->extradata, ifmt_ctx->streams[audioindex]->codec->extradata_size);
			aac_set_adts_head(&stADTSContext, pAdtsHead, pkt.size);
			fwrite(pAdtsHead, 1, 7, fp_audio);
	#endif
			//一般結構如MP3直接寫檔案即可
			
			printf("Write Audio Packet. size:%d\tpts:%lld\n",pkt.size,pkt.pts);
			fwrite(pkt.data,1,pkt.size,fp_audio);
		}
		av_free_packet(&pkt);
	}


	av_bitstream_filter_close(h264bsfc);  


	fclose(fp_video);
	fclose(fp_audio);

	avformat_close_input(&ifmt_ctx);

	if (ret < 0 && ret != AVERROR_EOF) 
	{
		printf( "Error occurred.\n");
		return -1;
	}
	return 0;
}


6.執行結果

生成titanic.h264和titanic.aac兩個檔案,用ffplay.exe可以驗證播放。


編譯環境:   Win7_64bit+VS2008

DEMO下載地址:https://download.csdn.net/download/hiwubihe/10643142