1. 程式人生 > >最簡單的基於FFMPEG+SDL的音視訊播放器

最簡單的基於FFMPEG+SDL的音視訊播放器

一、概述

        在《最簡單的基於FFMPEG+SDL的音訊播放器》記錄一中,我們實現了音訊的播放。更早前,我們在《最簡單的基於FFMPEG+SDL的視訊播放器》記錄一中,實現了視訊的播放。在實現視訊播放的時候,我們設定了一個延遲40ms,否則視訊就會以解碼的速度去播放,很快速。在音訊播放器中,我們有兩個地方控制了播放音訊的速度。第一個是取樣率(http://wiki.libsdl.org/SDL_AudioSpec),取樣率決定每秒送多少樣本資料幀到音訊裝置,取樣率和播放速度成正比。第二個是呼叫SDL_PauseAudio(0)播放音訊後,有一個迴圈等待緩衝區資料播放完畢,才能進行下一幀資料幀的播放。

                //Play    
                SDL_PauseAudio(0);  
                while (audio_len > 0)//Wait until finish    
                    SDL_Delay(1);//ms  
這兩點就保證了音訊的正常播放,而不用視訊播放人為的去設定一個延遲時間保證播放速度。

        這一次,我們將同時完成視訊播放和音訊播放。但我們將採用不同的方式去實現音訊播放,核心內容跟”音訊播放器“是一樣的,只是我們採用了佇列來儲存解複用後的包,這樣將解複用和解碼兩個過程分開,更加清晰,也有利於將來擴充套件更多的功能。

二、主要內容

1、首先、我們先描述一下我們將要用於儲存包的佇列,該結構體如下:

/*包佇列*/
typedef struct PacketQueue {
	AVPacketList *first_pkt, *last_pkt;
	int nb_packets;//包個數
	int size;//包大小
	SDL_mutex *mutex;//互斥量
	SDL_cond *cond;//條件變數
} PacketQueue;
AVPacketList:AVPacketList是FFMPEG內建的結構體,是包的一個連結串列,結構體如下:
typedef struct AVPacketList {
     AVPacket pkt;
     struct AVPacketList *next;
  } AVPacketList;
nb_packets:佇列中包數量

size:佇列中所有包的大小,以位元組為單位

mutex和cond:互斥量和條件變數,因為音訊播放是在另外一個執行緒中(想想那個callback),所以對於佇列這個公共資源的讀寫需要互斥。如果對於互斥量和條件變數不熟悉可以參考生產者-消費者問題。

2、關於該佇列的一些操作如下:

2.1、初始化佇列

/*初始化佇列*/
void packet_queue_init(PacketQueue *q) {
	memset(q, 0, sizeof(PacketQueue));//將佇列所在記憶體用0填充
	q->mutex = SDL_CreateMutex();//建立互斥量和條件變數
	q->cond = SDL_CreateCond();
}
2.2、入隊
/*入隊操作*/
int packet_queue_put(PacketQueue *q, AVPacket *pkt) {

	AVPacketList *pkt1;
	if (av_dup_packet(pkt) < 0) {//將pkt的記憶體複製到獨立的記憶體中
		return -1;
	}
	pkt1 = (AVPacketList*)av_malloc(sizeof(AVPacketList));
	if (!pkt1) return -1;
	pkt1->pkt = *pkt;
	pkt1->next = NULL;

	SDL_LockMutex(q->mutex);//先鎖住佇列,然後再入隊
	if (!q->last_pkt)
		q->first_pkt = pkt1;
	else
		q->last_pkt->next = pkt1;
	q->last_pkt = pkt1;
	q->nb_packets++;
	q->size += pkt1->pkt.size;
	SDL_CondSignal(q->cond);//喚醒被該條件變數阻塞的執行緒
	SDL_UnlockMutex(q->mutex);//解鎖資源
	return 0;
}

2.3、出隊
/*
出隊操作
q:     佇列
pkt:   出隊的包
block: 是否人工阻塞
return: -1 退出  0 阻塞  1 成功
*/
int packet_queue_get(PacketQueue *q, AVPacket *pkt, int block)
{
	AVPacketList *pkt1;
	int ret;

	SDL_LockMutex(q->mutex);//鎖住佇列,再出隊
	for (;;) {

		if (quit) {//退出,quit是全域性變數,暫時可不管
			ret = -1;
			break;
		}

		pkt1 = q->first_pkt;
		if (pkt1) {//正常出隊
			q->first_pkt = pkt1->next;
			if (!q->first_pkt)
				q->last_pkt = NULL;
			q->nb_packets--;
			q->size -= pkt1->pkt.size;
			*pkt = pkt1->pkt;
			av_free(pkt1);
			ret = 1;
			break;
		}
		else if (!block) {//阻塞
			ret = 0;
			break;
		}
		else {
			SDL_CondWait(q->cond, q->mutex);//阻塞執行緒等待條件變數啟用並解鎖,收到啟用訊號後,再次上鎖
		}
	}
	SDL_UnlockMutex(q->mutex);//解鎖資源
	return ret;
}

程式中的quit是一個全域性變數,保證視訊播放完成後可以正常退出,否則程序將無法結束。程式碼如下:
//當SDL退出時,設定quit=1
SDL_PollEvent(&event);
  switch(event.type) {
  case SDL_QUIT:
    quit = 1;
     ....

剩下的事情就和原來一樣了,讀檔案,讀取流資訊,解複用,解碼......,特別的地方就是如何將解複用的包入隊,和從佇列取包,然後解碼。

main函式原始碼如下:

<pre name="code" class="cpp">int main(int argc, char *argv[]) {

	struct SwsContext * sws_ctx = NULL;
	AVFormatContext *pFormatCtx = NULL;
	int             i, videoStream, audioStream;
	AVPacket        packet;
	int             frameFinished;
	AVCodecContext  *pCodecCtxOrig = NULL;
	AVCodecContext  *pCodecCtx = NULL;
	AVCodec         *pCodec = NULL;
	AVFrame         *pFrame = NULL;
	AVCodecContext  *aCodecCtxOrig = NULL;
	AVCodecContext  *aCodecCtx = NULL;
	AVCodec         *aCodec = NULL;
	SDL_Overlay     *bmp;
	SDL_Surface     *screen;
	SDL_Rect        rect;
	SDL_Event       event;
	SDL_AudioSpec   wanted_spec, spec;

	// Register all formats and codecs
	av_register_all();
	if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_AUDIO | SDL_INIT_TIMER)) {
		fprintf(stderr, "Could not initialize SDL - %s\n", SDL_GetError());
		exit(1);
	}
	//檔案路徑
	char* filepath = "2.mp4";
	// Open video file
	if (avformat_open_input(&pFormatCtx, filepath, NULL, NULL) != 0)
		return -1; // Couldn't open file
	// Retrieve stream information
	if (avformat_find_stream_info(pFormatCtx, NULL) < 0)
		return -1; // Couldn't find stream information

	// Find the first video stream and audio stream
	videoStream = -1;
	audioStream = -1;
	for (i = 0; i < pFormatCtx->nb_streams; i++) {
		if (pFormatCtx->streams[i]->codec->codec_type == AVMEDIA_TYPE_VIDEO &&
			videoStream < 0) {
			videoStream = i;
		}
		if (pFormatCtx->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO &&
			audioStream < 0) {
			audioStream = i;
		}
	}
	if (videoStream == -1)
		return -1; // Didn't find a video stream
	if (audioStream == -1)
		return -1;

	//find the codeccontext
	aCodecCtxOrig = pFormatCtx->streams[audioStream]->codec;
	//get the decoder
	aCodec = avcodec_find_decoder(aCodecCtxOrig->codec_id);
	if (!aCodec) {
		fprintf(stderr, "Unsupported codec!\n");
		return -1;
	}

	// Copy context
	aCodecCtx = avcodec_alloc_context3(aCodec);
	if (avcodec_copy_context(aCodecCtx, aCodecCtxOrig) != 0) {
		fprintf(stderr, "Couldn't copy codec context");
		return -1; // Error copying codec context
	}

	// Set audio settings from codec info
	wanted_spec.freq =   44100;
	wanted_spec.format = AUDIO_S16SYS;
	wanted_spec.channels = aCodecCtx->channels;
	wanted_spec.silence = 0;
	wanted_spec.samples = SDL_AUDIO_BUFFER_SIZE;
	wanted_spec.callback = audio_callback;
	wanted_spec.userdata = aCodecCtx;

	//openaudio device
	if (SDL_OpenAudio(&wanted_spec, &spec) < 0) {
		fprintf(stderr, "SDL_OpenAudio: %s\n", SDL_GetError());
		return -1;
	}
	//open the audio decoder
	avcodec_open2(aCodecCtx, aCodec, NULL);

	packet_queue_init(&audioq);
	//play audio
	SDL_PauseAudio(0);
	//------------------------------------------------------------
	//video part
	// Get a pointer to the codec context for the video stream
	pCodecCtxOrig = pFormatCtx->streams[videoStream]->codec;
	// Find the decoder for the video stream
	pCodec = avcodec_find_decoder(pCodecCtxOrig->codec_id);
	if (pCodec == NULL) {
		fprintf(stderr, "Unsupported codec!\n");
		return -1; // Codec not found
	}

	// Copy context
	pCodecCtx = avcodec_alloc_context3(pCodec);
	if (avcodec_copy_context(pCodecCtx, pCodecCtxOrig) != 0) {
		fprintf(stderr, "Couldn't copy codec context");
		return -1; // Error copying codec context
	}

	// Open codec
	if (avcodec_open2(pCodecCtx, pCodec, NULL) < 0)
		return -1; // Could not open codec

	// Allocate video frame
	pFrame = av_frame_alloc();

	// Make a screen to put our video
	screen = SDL_SetVideoMode(pCodecCtx->width, pCodecCtx->height, 0, 0);
	if (!screen) {
		fprintf(stderr, "SDL: could not set video mode - exiting\n");
		exit(1);
	}

	// Allocate a place to put our YUV image on that screen
	bmp = SDL_CreateYUVOverlay(pCodecCtx->width,
		pCodecCtx->height,
		SDL_YV12_OVERLAY,//YVU模式
		screen);

	// initialize SWS context for software scaling
	sws_ctx = sws_getContext(pCodecCtx->width,
		pCodecCtx->height,
		pCodecCtx->pix_fmt,
		pCodecCtx->width,
		pCodecCtx->height,
		PIX_FMT_YUV420P,
		SWS_BILINEAR,
		NULL,
		NULL,
		NULL
		);

	// Read frames and save first five frames to disk
	while (av_read_frame(pFormatCtx, &packet) >= 0) {
		// Is this a packet from the video stream?
		if (packet.stream_index == videoStream) {
			// Decode video frame
			avcodec_decode_video2(pCodecCtx, pFrame, &frameFinished, &packet);

			// Did we get a video frame?
			if (frameFinished) {
				SDL_LockYUVOverlay(bmp);

				AVPicture pict;
				pict.data[0] = bmp->pixels[0];
				pict.data[1] = bmp->pixels[2];
				pict.data[2] = bmp->pixels[1];

				pict.linesize[0] = bmp->pitches[0];
				pict.linesize[1] = bmp->pitches[2];
				pict.linesize[2] = bmp->pitches[1];

				// Convert the image into YUV format that SDL uses	
				sws_scale(sws_ctx, (uint8_t const * const *)pFrame->data,
					pFrame->linesize, 0, pCodecCtx->height,
					pict.data, pict.linesize);

				SDL_UnlockYUVOverlay(bmp);

				rect.x = 0;
				rect.y = 0;
				rect.w = pCodecCtx->width;
				rect.h = pCodecCtx->height;
				SDL_DisplayYUVOverlay(bmp, &rect);
				av_free_packet(&packet);
				SDL_Delay(20);//延遲一下,防止播放太快
			}
		}
		else if (packet.stream_index == audioStream) {
			packet_queue_put(&audioq, &packet);//音訊包入隊
		}
		else {
			// Free the packet that was allocated by av_read_frame
			av_free_packet(&packet);
		}
		//事件處理,SDL視訊播放完成後,會觸發SDL_QUIT事件
		SDL_PollEvent(&event);
		switch (event.type) {
		case SDL_QUIT:
			quit = 1;
			SDL_Quit();
			exit(0);
			break;
		default:
			break;
		}

	}
	//close the video and audio context
	avcodec_close(pCodecCtxOrig);
	avcodec_close(pCodecCtx);
	avcodec_close(aCodecCtxOrig);
	avcodec_close(aCodecCtx);
	// Close the video file
	avformat_close_input(&pFormatCtx);
	return 0;
}


視訊部分的程式碼如果有不懂的,可以回去看看:最簡單的視訊播放器記錄一最簡單的視訊播放器記錄二

音訊回撥函式如下:

/*
音訊回撥函式
userdata:in 編解碼上下文
stream: out 播放的緩衝區
len:    緩衝區大小
*/
void audio_callback(void *userdata, Uint8 *stream, int len) {

	AVCodecContext *aCodecCtx = (AVCodecContext *)userdata;
	int len1, audio_size;

	static uint8_t audio_buf[(MAX_AUDIO_FRAME_SIZE * 3) / 2];
	static unsigned int audio_buf_size = 0;
	static unsigned int audio_buf_index = 0;

	while (len > 0) {
		if (audio_buf_index >= audio_buf_size) {
			/* We have already sent all our data; get more */
			audio_size = audio_decode_frame(aCodecCtx, audio_buf, sizeof(audio_buf));//解碼包
			if (audio_size < 0) {
				/* If error, output silence */
				audio_buf_size = 1024;
				memset(audio_buf, 0, audio_buf_size);
			}
			else {
				audio_buf_size = audio_size;
			}
			audio_buf_index = 0;
		}
		len1 = audio_buf_size - audio_buf_index;//音訊資料長度
		if (len1 > len)//如果大於要播放的長度,則擷取
			len1 = len;
		memcpy(stream, (uint8_t *)audio_buf + audio_buf_index, len1);//將要播放的內容拷貝至輸出緩衝區
                //記錄剩餘資料
		len -= len1;
		stream += len1;
		audio_buf_index += len1;
	}
}

音訊解碼函式如下:

/*
aCodecCtx:編解碼上下文
audio_buf:out 音訊緩衝區
buf_size: 緩衝區大小
return: -1 錯誤, data_size  音訊原始資料大小
*/
int audio_decode_frame(AVCodecContext *aCodecCtx, uint8_t *audio_buf, int buf_size) {

	static AVPacket pkt;
	static uint8_t *audio_pkt_data = NULL;
	static int audio_pkt_size = 0;
	static AVFrame frame;
	int len1;//解碼消費了包多少位元組資料
        int data_size = 0;
	//新版中avcodec_decode_audio4()解碼後輸出的音訊取樣資料格式為AV_SAMPLE_FMT_FLTP(float, planar)而不再是AV_SAMPLE_FMT_S16(signed 16 bits)。因此         //無法直接使用SDL進行播放,必須使用SwrContext對音訊取樣資料進行轉換之後,再進行輸出播放。
        //輸出引數設定
        uint64_t out_channel_layout = AV_CH_LAYOUT_STEREO;//立體聲
         int out_channels = av_get_channel_layout_nb_channels(out_channel_layout);//根據通道佈局型別獲取通道數 
         int out_nb_samples = 1024;//單個通道樣本個數,需要根據音訊封裝格式動態改變,不然某些格式的音訊檔案播放會有雜音 
         AVSampleFormat out_sample_fmt = AV_SAMPLE_FMT_S16;//取樣格式,SDL可以播放此種格式
           int out_sample_rate = 44100;//輸出時取樣率,CD一般為44100HZ 
           //獲取聲道佈局,一些codec的channel_layout可能會丟失,這裡需要重新獲取一次 
          uint64_t in_channel_layout = av_get_default_channel_layout(aCodecCtx->channels); 
         //輸出快取 
          uint8_t *out_buffer_audio = (uint8_t *)av_malloc(MAX_AUDIO_FRAME_SIZE * 2);//*2是保證輸出快取大於輸入資料大小
         //音訊格式轉換設定 
         struct SwrContext *au_convert_ctx; 
        au_convert_ctx = swr_alloc(); 
        au_convert_ctx = swr_alloc_set_opts(au_convert_ctx, out_channel_layout, out_sample_fmt, out_sample_rate,in_channel_layout, aCodecCtx->sample_fmt, aCodecCtx->sample_rate, 0, NULL); 
      swr_init(au_convert_ctx); 
      for (;;) { 
      while (audio_pkt_size > 0) { //不停的解碼,直到一個包的資料都被解碼完畢 
        int got_frame = 0; 
       len1 = avcodec_decode_audio4(aCodecCtx, &frame, &got_frame, &pkt); 
       if (len1 < 0) { /* if error, skip frame */ 
          audio_pkt_size = 0; 
          break;
          } 
       audio_pkt_data += len1;//剩餘資料起始位置 
       audio_pkt_size -= len1;//剩餘的位元組數 
       data_size = 0; if (got_frame) { //動態改變輸出樣本數,保證有些檔案不會出現雜音 
          if (out_nb_samples != frame.nb_samples){out_nb_samples = frame.nb_samples;} 
          //計算原始資料所需空間 
          data_size = av_samples_get_buffer_size(NULL,aCodecCtx->channels,out_nb_samples,out_sample_fmt,1); 
         //轉換格式,否則SDL不能播放 
          swr_convert(au_convert_ctx, &out_buffer_audio, MAX_AUDIO_FRAME_SIZE, (const uint8_t **)frame.data, frame.nb_samples); 
          assert(data_size <= buf_size); 
          memcpy(audio_buf, out_buffer_audio, data_size);//將資料拷貝到輸出緩衝區 
          av_free(au_convert_ctx);
         } 
       if (data_size <= 0) { /* No data yet, get more frames */ 
         continue;
         }
         /* We have data, return it and come back for more later */ 
        return data_size;
        }
       if (pkt.data) av_free_packet(&pkt); 
       if (quit) { //退出
          return -1;
           } //從佇列獲取包
        if (packet_queue_get(&audioq, &pkt, 1) < 0) {return -1;} 
       audio_pkt_data = pkt.data;//<span style="font-family:Arial,Helvetica,sans-serif">audio_pkt_data 指向資料地址</span> 
       audio_pkt_size = pkt.size;//<span style="font-family:Arial,Helvetica,sans-serif">audio_pkt_size=包大小</span>

}}

執行結果:

        執行後,可以看到視訊和音訊都可以播放了。但是視訊和音訊並不同步,因為視訊我們採用的是延遲20ms,所以看起來有點偏快。音訊是正常的,因為有采樣率控制著播放的速度。如果視訊延遲時間再慢一點,就會影響到音訊的播放,聽起來就會有卡頓。        

        所以,如果還要繼續優化,那麼視訊播放應該在另外的執行緒中進行。而且還要考慮視訊和音訊的同步,比如將視訊同步到音訊。

       最後再將整個原始碼貼上來:

extern"C"{
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libswscale/swscale.h>
#include "include/sdl/SDL.h"
#include "include/sdl/SDL_thread.h"
#include "include/libavutil/time.h"
#include "include/libavutil/avstring.h"
#include "libswresample/swresample.h"
}

#pragma comment(lib, "lib/avformat.lib")
#pragma comment(lib, "lib/avcodec.lib")
#pragma comment(lib, "lib/avutil.lib")
#pragma comment(lib, "lib/swscale.lib")
#pragma comment(lib, "lib/swresample.lib")
#pragma comment(lib, "lib/SDL.lib")
#pragma comment(lib, "lib/SDLmain.lib")

#include <stdio.h>
#include<stdlib.h>
#include<string.h>
#include <assert.h>

#define SDL_AUDIO_BUFFER_SIZE 1024
#define MAX_AUDIO_FRAME_SIZE  192000
/*包佇列*/
typedef struct PacketQueue {
	AVPacketList *first_pkt, *last_pkt;
	int nb_packets;//包個數
	int size;//包大小
	SDL_mutex *mutex;//互斥量
	SDL_cond *cond;//條件量
} PacketQueue;


PacketQueue audioq;//音訊包佇列
int quit = 0;//是否退出

void packet_queue_init(PacketQueue *q) {
	memset(q, 0, sizeof(PacketQueue));
	q->mutex = SDL_CreateMutex();
	q->cond = SDL_CreateCond();
}
int packet_queue_put(PacketQueue *q, AVPacket *pkt) {

	AVPacketList *pkt1;
	if (av_dup_packet(pkt) < 0) {
		return -1;
	}
	pkt1 = (AVPacketList*)av_malloc(sizeof(AVPacketList));
	if (!pkt1)
		return -1;
	pkt1->pkt = *pkt;
	pkt1->next = NULL;

	SDL_LockMutex(q->mutex);
	if (!q->last_pkt)
		q->first_pkt = pkt1;
	else
		q->last_pkt->next = pkt1;
	q->last_pkt = pkt1;
	q->nb_packets++;
	q->size += pkt1->pkt.size;
	SDL_CondSignal(q->cond);
	SDL_UnlockMutex(q->mutex);
	return 0;
}
int packet_queue_get(PacketQueue *q, AVPacket *pkt, int block)
{
	AVPacketList *pkt1;
	int ret;

	SDL_LockMutex(q->mutex);

	for (;;) {

		if (quit) {
			ret = -1;
			break;
		}

		pkt1 = q->first_pkt;
		if (pkt1) {
			q->first_pkt = pkt1->next;
			if (!q->first_pkt)
				q->last_pkt = NULL;
			q->nb_packets--;
			q->size -= pkt1->pkt.size;
			*pkt = pkt1->pkt;
			av_free(pkt1);
			ret = 1;
			break;
		}
		else if (!block) {
			ret = 0;
			break;
		}
		else {
			SDL_CondWait(q->cond, q->mutex);
		}
	}
	SDL_UnlockMutex(q->mutex);
	return ret;
}




int audio_decode_frame(AVCodecContext *aCodecCtx, uint8_t *audio_buf, int buf_size) {

	static AVPacket pkt;
	static uint8_t *audio_pkt_data = NULL;
	static int audio_pkt_size = 0;
	static AVFrame frame;
	int len1, data_size = 0;
	//音訊轉換資料格式,否則輸出是雜音
	//---------------------------------------------------------------------------
	//輸出引數設定  
	uint64_t out_channel_layout = AV_CH_LAYOUT_STEREO;//立體聲  
	int out_channels = av_get_channel_layout_nb_channels(out_channel_layout);//根據通道佈局型別獲取通道數
	int out_nb_samples = 1024;//單個通道樣本個數,需要根據音訊封裝格式動態改變,不然不同格式的檔案音訊播放速度會不同
	AVSampleFormat out_sample_fmt = AV_SAMPLE_FMT_S16;//取樣格式  
	int out_sample_rate = 44100;//輸出時取樣率,CD一般為44100HZ  
	//獲取聲道佈局,一些codec的channel_layout可能會丟失,這裡需要重新獲取一次
	uint64_t in_channel_layout = av_get_default_channel_layout(aCodecCtx->channels);
	//輸出快取
	uint8_t *out_buffer_audio = (uint8_t *)av_malloc(MAX_AUDIO_FRAME_SIZE * 2);//*2是保證輸出快取大於輸入資料大小  
	//音訊格式轉換設定   
	struct SwrContext *au_convert_ctx;
	au_convert_ctx = swr_alloc();
	au_convert_ctx = swr_alloc_set_opts(au_convert_ctx, out_channel_layout, out_sample_fmt, out_sample_rate,
		in_channel_layout, aCodecCtx->sample_fmt, aCodecCtx->sample_rate, 0, NULL);
	swr_init(au_convert_ctx);
	for (;;) {
		while (audio_pkt_size > 0) {
			int got_frame = 0;
			len1 = avcodec_decode_audio4(aCodecCtx, &frame, &got_frame, &pkt);
			if (len1 < 0) {
				/* if error, skip frame */
				audio_pkt_size = 0;
				break;
			}
			audio_pkt_data += len1;
			audio_pkt_size -= len1;
			data_size = 0;
			if (got_frame) {
				//FIX:FLAC,MP3,AAC Different number of samples  
				if (out_nb_samples != frame.nb_samples){
					out_nb_samples = frame.nb_samples;
				}
				data_size = av_samples_get_buffer_size(NULL,
					aCodecCtx->channels,
					out_nb_samples,
					out_sample_fmt,
					1);
				//轉換格式,否則是雜音
				swr_convert(au_convert_ctx, &out_buffer_audio, MAX_AUDIO_FRAME_SIZE, (const uint8_t **)frame.data, frame.nb_samples);
				assert(data_size <= buf_size);
				memcpy(audio_buf, out_buffer_audio, data_size);
				av_free(au_convert_ctx);
			}
			if (data_size <= 0) {
				/* No data yet, get more frames */
				continue;
			}
			/* We have data, return it and come back for more later */
			return data_size;
		}
		if (pkt.data)
			av_free_packet(&pkt);

		if (quit) {
			return -1;
		}

		if (packet_queue_get(&audioq, &pkt, 1) < 0) {
			return -1;
		}
		audio_pkt_data = pkt.data;
		audio_pkt_size = pkt.size;
	}
}

void audio_callback(void *userdata, Uint8 *stream, int len) {

	AVCodecContext *aCodecCtx = (AVCodecContext *)userdata;
	int len1, audio_size;

	static uint8_t audio_buf[(MAX_AUDIO_FRAME_SIZE * 3) / 2];
	static unsigned int audio_buf_size = 0;
	static unsigned int audio_buf_index = 0;

	while (len > 0) {
		if (audio_buf_index >= audio_buf_size) {
			/* We have already sent all our data; get more */
			audio_size = audio_decode_frame(aCodecCtx, audio_buf, sizeof(audio_buf));
			if (audio_size < 0) {
				/* If error, output silence */
				audio_buf_size = 1024; // arbitrary?
				memset(audio_buf, 0, audio_buf_size);
			}
			else {
				audio_buf_size = audio_size;
			}
			audio_buf_index = 0;
		}
		len1 = audio_buf_size - audio_buf_index;
		if (len1 > len)
			len1 = len;
		memcpy(stream, (uint8_t *)audio_buf + audio_buf_index, len1);
		len -= len1;
		stream += len1;
		audio_buf_index += len1;
	    SDL_Delay(1);
	}
}

int main(int argc, char *argv[]) {

	struct SwsContext * sws_ctx = NULL;
	AVFormatContext *pFormatCtx = NULL;
	int             i, videoStream, audioStream;
	AVPacket        packet;
	int             frameFinished;
	AVCodecContext  *pCodecCtxOrig = NULL;
	AVCodecContext  *pCodecCtx = NULL;
	AVCodec         *pCodec = NULL;
	AVFrame         *pFrame = NULL;
	AVCodecContext  *aCodecCtxOrig = NULL;
	AVCodecContext  *aCodecCtx = NULL;
	AVCodec         *aCodec = NULL;
	SDL_Overlay     *bmp;
	SDL_Surface     *screen;
	SDL_Rect        rect;
	SDL_Event       event;
	SDL_AudioSpec   wanted_spec, spec;

	// Register all formats and codecs
	av_register_all();
	if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_AUDIO | SDL_INIT_TIMER)) {
		fprintf(stderr, "Could not initialize SDL - %s\n", SDL_GetError());
		exit(1);
	}
	//檔案路徑
	char* filepath = "2.mp4";
	// Open video file
	if (avformat_open_input(&pFormatCtx, filepath, NULL, NULL) != 0)
		return -1; // Couldn't open file
	// Retrieve stream information
	if (avformat_find_stream_info(pFormatCtx, NULL) < 0)
		return -1; // Couldn't find stream information

	// Find the first video stream and audio stream
	videoStream = -1;
	audioStream = -1;
	for (i = 0; i < pFormatCtx->nb_streams; i++) {
		if (pFormatCtx->streams[i]->codec->codec_type == AVMEDIA_TYPE_VIDEO &&
			videoStream < 0) {
			videoStream = i;
		}
		if (pFormatCtx->streams[i]->codec->codec_type == AVMEDIA_TYPE_AUDIO &&
			audioStream < 0) {
			audioStream = i;
		}
	}
	if (videoStream == -1)
		return -1; // Didn't find a video stream
	if (audioStream == -1)
		return -1;

	//find the codeccontext
	aCodecCtxOrig = pFormatCtx->streams[audioStream]->codec;
	//get the decoder
	aCodec = avcodec_find_decoder(aCodecCtxOrig->codec_id);
	if (!aCodec) {
		fprintf(stderr, "Unsupported codec!\n");
		return -1;
	}

	// Copy context
	aCodecCtx = avcodec_alloc_context3(aCodec);
	if (avcodec_copy_context(aCodecCtx, aCodecCtxOrig) != 0) {
		fprintf(stderr, "Couldn't copy codec context");
		return -1; // Error copying codec context
	}

	// Set audio settings from codec info
	wanted_spec.freq =   44100;
	wanted_spec.format = AUDIO_S16SYS;
	wanted_spec.channels = aCodecCtx->channels;
	wanted_spec.silence = 0;
	wanted_spec.samples = SDL_AUDIO_BUFFER_SIZE;
	wanted_spec.callback = audio_callback;
	wanted_spec.userdata = aCodecCtx;

	//openaudio device
	if (SDL_OpenAudio(&wanted_spec, &spec) < 0) {
		fprintf(stderr, "SDL_OpenAudio: %s\n", SDL_GetError());
		return -1;
	}
	//open the audio decoder
	avcodec_open2(aCodecCtx, aCodec, NULL);

	packet_queue_init(&audioq);
	//play audio
	SDL_PauseAudio(0);
	//------------------------------------------------------------
	//video part
	// Get a pointer to the codec context for the video stream
	pCodecCtxOrig = pFormatCtx->streams[videoStream]->codec;
	// Find the decoder for the video stream
	pCodec = avcodec_find_decoder(pCodecCtxOrig->codec_id);
	if (pCodec == NULL) {
		fprintf(stderr, "Unsupported codec!\n");
		return -1; // Codec not found
	}

	// Copy context
	pCodecCtx = avcodec_alloc_context3(pCodec);
	if (avcodec_copy_context(pCodecCtx, pCodecCtxOrig) != 0) {
		fprintf(stderr, "Couldn't copy codec context");
		return -1; // Error copying codec context
	}

	// Open codec
	if (avcodec_open2(pCodecCtx, pCodec, NULL) < 0)
		return -1; // Could not open codec

	// Allocate video frame
	pFrame = av_frame_alloc();

	// Make a screen to put our video
	screen = SDL_SetVideoMode(pCodecCtx->width, pCodecCtx->height, 0, 0);
	if (!screen) {
		fprintf(stderr, "SDL: could not set video mode - exiting\n");
		exit(1);
	}

	// Allocate a place to put our YUV image on that screen
	bmp = SDL_CreateYUVOverlay(pCodecCtx->width,
		pCodecCtx->height,
		SDL_YV12_OVERLAY,//YVU模式
		screen);

	// initialize SWS context for software scaling
	sws_ctx = sws_getContext(pCodecCtx->width,
		pCodecCtx->height,
		pCodecCtx->pix_fmt,
		pCodecCtx->width,
		pCodecCtx->height,
		PIX_FMT_YUV420P,
		SWS_BILINEAR,
		NULL,
		NULL,
		NULL
		);

	// Read frames and save first five frames to disk
	while (av_read_frame(pFormatCtx, &packet) >= 0) {
		// Is this a packet from the video stream?
		if (packet.stream_index == videoStream) {
			// Decode video frame
			avcodec_decode_video2(pCodecCtx, pFrame, &frameFinished, &packet);

			// Did we get a video frame?
			if (frameFinished) {
				SDL_LockYUVOverlay(bmp);

				AVPicture pict;
				pict.data[0] = bmp->pixels[0];
				pict.data[1] = bmp->pixels[2];
				pict.data[2] = bmp->pixels[1];

				pict.linesize[0] = bmp->pitches[0];
				pict.linesize[1] = bmp->pitches[2];
				pict.linesize[2] = bmp->pitches[1];

				// Convert the image into YUV format that SDL uses	
				sws_scale(sws_ctx, (uint8_t const * const *)pFrame->data,
					pFrame->linesize, 0, pCodecCtx->height,
					pict.data, pict.linesize);

				SDL_UnlockYUVOverlay(bmp);

				rect.x = 0;
				rect.y = 0;
				rect.w = pCodecCtx->width;
				rect.h = pCodecCtx->height;
				SDL_DisplayYUVOverlay(bmp, &rect);
				av_free_packet(&packet);
				SDL_Delay(20);//延遲一下,防止播放太快
			}
		}
		else if (packet.stream_index == audioStream) {
			packet_queue_put(&audioq, &packet);//音訊包入隊
		}
		else {
			// Free the packet that was allocated by av_read_frame
			av_free_packet(&packet);
		}
		
		SDL_PollEvent(&event);
		switch (event.type) {
		case SDL_QUIT:
			quit = 1;
			SDL_Quit();
			exit(0);
			break;
		default:
			break;
		}

	}
	//close the video and audio context
	avcodec_close(pCodecCtxOrig);
	avcodec_close(pCodecCtx);
	avcodec_close(aCodecCtxOrig);
	avcodec_close(aCodecCtx);
	// Close the video file
	avformat_close_input(&pFormatCtx);
	return 0;
}

參考連結:
http://dranger.com/ffmpeg/tutorial03.html