ffmpeg作为视频编解码的基石,其重要性不言而喻,本篇介绍ffmpeg在VideoBistoury中的封装结构。
通过ffmpeg_frame 统一管理解码后的音频帧、视频帧
class FFrame{
public:
BYTE *data;
bool hasVideo = false;
bool hasAudio = false;
int length;
int width;
int height;
double
int nChannels;
int sampleRate;
uint64_t channel_layout;
}
抓帧神器
注意使用 extern “C”
extern "C" {
#include "libavcodec/avcodec.h" 编解码器
#include "libavformat/avformat.h" 封装格式
#include "libavutil/avutil.h"
#include "libswscale/swscale.h" 用于图像变换
#include "libswresample/swresample.h" 用于音频变换
};
#include "ffmpeg_frame.h"
private:
char *m_video_path;
AVFormatContext *av_fmt_ctx;
int video_index = -1;
int audio_index = -1;
AVCodecContext *video_codec_ctx;
AVCodecContext *audio_codec_ctx;
AVCodec *video_codec, *audio_codec;
AVPacket *packet;
AVFrame *video_frame;
AVFrame *video_RGB_frame;
AVFrame *audio_frame;
SwsContext *sws_ctx;
SwrContext *swr_ctx;
uint8_t *rgb_frame_buffer;
int rgb_picture_size;
double current_pts = 0;
/**audio**/
int out_audio_samplerate = 0;
int out_audio_channels = 0;
uint64_t out_audio_channel_layout;
AVSampleFormat out_audio_sample_fmt = AV_SAMPLE_FMT_S16;
状态机,参考 播放器状态机
prepare:采集视频基本信息,包括size、avformat、avstream、avcodec、timebase
av_register_all();
avcodec_register_all();
av_fmt_ctx = avformat_alloc_context();
int ret;
ret = avformat_open_input(&av_fmt_ctx, m_video_path, NULL, NULL);
printf("opengl video result :%d\n", ret);
if (ret < 0) {
printf("open video failed %d\n", ret);
return;
}
for (int i = 0; i < av_fmt_ctx->nb_streams; ++i) {
if (av_fmt_ctx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
video_index = i;
}
if (av_fmt_ctx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
audio_index = i;
}
}
if (video_index < 0 || audio_index < 0) {
printf("can not find video index or audio index\n");
return;
}
video_time_base = av_q2d(av_fmt_ctx->streams[video_index]->time_base);
video_frame_rate = av_q2d(av_fmt_ctx->streams[video_index]->r_frame_rate);
auto video_codec_par = av_fmt_ctx->streams[video_index]->codecpar;
auto audio_codec_par = av_fmt_ctx->streams[audio_index]->codecpar;
video_codec = avcodec_find_decoder(video_codec_par->codec_id);
audio_codec = avcodec_find_decoder(audio_codec_par->codec_id);
video_codec_ctx = avcodec_alloc_context3(video_codec);
audio_codec_ctx = avcodec_alloc_context3(audio_codec);
width = video_codec_ctx->width;
height = video_codec_ctx->height;
avcodec_parameters_to_context(video_codec_ctx, video_codec_par);
avcodec_parameters_to_context(audio_codec_ctx, audio_codec_par);
if (avcodec_open2(video_codec_ctx, video_codec, NULL) < 0) {
printf("open video codec failed\n");
}
if (avcodec_open2(audio_codec_ctx, audio_codec, NULL) < 0) {
printf("open audio codec failed\n");
}
开辟解码前后的中间缓存(packet、YUV帧video_frame、RGB帧video_RGB_frame、audio_frame)
构造帧格式转换的context(视频帧sws_ctx、音频帧swr_ctx)
packet = av_packet_alloc();
av_init_packet(packet);
video_frame = av_frame_alloc();
video_RGB_frame = av_frame_alloc();
audio_frame = av_frame_alloc();
sws_ctx = sws_getContext(video_codec_ctx->width, video_codec_ctx->height, getPixFormat(),
video_codec_ctx->width, video_codec_ctx->height, AV_PIX_FMT_RGB24, SWS_BICUBIC,
NULL, NULL, NULL);
rgb_picture_size = avpicture_get_size(AV_PIX_FMT_RGB24, video_codec_ctx->width, video_codec_ctx->height);
rgb_frame_buffer = (uint8_t *) (av_mallocz(rgb_picture_size));
avpicture_fill((AVPicture *) video_RGB_frame, rgb_frame_buffer, AV_PIX_FMT_RGB24,
video_codec_ctx->width, video_codec_ctx->height);
/**audio**/
auto in_audio_channels = av_get_channel_layout_nb_channels(audio_codec_ctx->channel_layout);
auto in_audio_sample_fmt = audio_codec_ctx->sample_fmt;
auto in_audio_samplerate = audio_codec_ctx->sample_rate;
auto in_audio_channel_layout = audio_codec_ctx->channel_layout;
out_audio_channels = in_audio_channels;
out_audio_samplerate = in_audio_samplerate;
out_audio_sample_fmt = AV_SAMPLE_FMT_S16;
out_audio_channel_layout = audio_codec_ctx->channel_layout;
out_audio_length = av_samples_get_buffer_size(NULL, out_audio_channels, NB_SAMPLES, out_audio_sample_fmt, 1);
swr_ctx = swr_alloc_set_opts(NULL,
out_audio_channel_layout, out_audio_sample_fmt, out_audio_samplerate,
in_audio_channel_layout, in_audio_sample_fmt, in_audio_samplerate,
0, 0);
swr_init(swr_ctx);
av_dump_format(av_fmt_ctx, 0, m_video_path, 0);
视频帧解码为YUV、YUV转RGB,对于PC抓取的数据是倒的,这里需要颠倒处理;这一步可以拆开,在显示时进行YUV转RGB,毕竟YUV内存占用量比RGB小不少
音频帧解码、重采样格式转换;
pts设置;
时间基转化与设置
FFrame *FFmpegCapturer::captureFrame() {
int ret;
int got_picture;
int got_frame;
double pts = 0;
ret = av_read_frame(av_fmt_ctx, packet);
if (ret < 0) {
printf("read no frame\n");
return NULL;
}
auto fframe = new FFrame();
// video
if (packet->stream_index == video_index) {
ret = avcodec_decode_video2(video_codec_ctx, video_frame, &got_picture, packet);
// ret = avcodec_send_packet(video_codec_ctx, packet);
if (ret < 0) {
printf("decode video failed\n");
}
if (packet->dts == AV_NOPTS_VALUE
&& video_frame->opaque
&& *(uint64_t *) video_frame->opaque != AV_NOPTS_VALUE) {
pts = *(uint64_t *) video_frame->opaque;
} else if (packet->dts != AV_NOPTS_VALUE) {
pts = packet->dts;
} else {
pts = 0;
}
pts *= av_q2d(av_fmt_ctx->streams[video_index]->time_base);
if (got_picture) {
// auto pts = av_frame_get_best_effort_timestamp(video_frame);
video_frame->data[0] += video_frame->linesize[0] * (video_codec_ctx->height - 1);
video_frame->linesize[0] *= -1;
video_frame->data[1] += video_frame->linesize[1] * (video_codec_ctx->height / 2 - 1);
video_frame->linesize[1] *= -1;
video_frame->data[2] += video_frame->linesize[2] * (video_codec_ctx->height / 2 - 1);
video_frame->linesize[2] *= -1;
sws_scale(sws_ctx, (const uint8_t *const *) (video_frame->data),
video_frame->linesize, 0, video_codec_ctx->height,
video_RGB_frame->data, video_RGB_frame->linesize);
fframe->hasVideo = true;
fframe->width = video_codec_ctx->width;
fframe->height = video_codec_ctx->height;
fframe->length = rgb_picture_size;
fframe->data = (BYTE *) av_mallocz(rgb_picture_size);
fframe->pts = current_pts + pts;
// printf("frame linesize %d length %d \n", video_RGB_frame->linesize[0], rgb_picture_size);
memcpy(fframe->data, video_RGB_frame->data[0], rgb_picture_size);
}
}
// audio
if (packet->stream_index == audio_index) {
ret = avcodec_decode_audio4(audio_codec_ctx, audio_frame, &got_frame, packet);
if (ret < 0) {
printf("decode audio failed\n");
}
if (packet->dts == AV_NOPTS_VALUE
&& audio_frame->opaque
&& *(uint64_t *) audio_frame->opaque != AV_NOPTS_VALUE) {
pts = *(uint64_t *) audio_frame->opaque;
} else if (packet->dts != AV_NOPTS_VALUE) {
pts = packet->dts;
} else {
pts = 0;
}
// av_rescale_q()时间基转换
pts *= av_q2d(av_fmt_ctx->streams[audio_index]->time_base);//转换为秒
if (got_frame) {
// todo
uint8_t *out_buffer = (uint8_t *) av_mallocz(MAX_AUDIO_FRAME_SZIE * 2);
int len = swr_convert(swr_ctx, &out_buffer, MAX_AUDIO_FRAME_SZIE,
(const uint8_t **) (audio_frame->data), audio_frame->nb_samples);
fframe->hasAudio = true;
fframe->data = out_buffer;
fframe->nChannels = out_audio_channels;
fframe->sampleRate = out_audio_samplerate;
fframe->length = av_samples_get_buffer_size(0, out_audio_channels, len, out_audio_sample_fmt, 1);
fframe->pts = current_pts + pts;
memcpy(fframe->data, out_buffer, fframe->length);
}
}
// current_pts += pts;
av_packet_unref(packet);
return fframe;
}