问题：

使用libav*将原始帧(D3D11Texture2D)编码为rtsp流

葛修永

2023-03-14

我已经设法使用libav*和directX纹理创建了一个rtsp流（我正在使用Bitblit方法从GDI API中获取该纹理）。下面是我创建实时rtsp流的方法：

创建输出上下文和流（跳过此处的检查）

AVFORMAT_ALLOC_OUTPUT_CONTEXT2(&OFMT_CTX,NULL,“RTSP”,rtsp_url)；//RTSP
vid_codec=avcodec_find_encoder(ofmt_ctx->offormat->video_codec)；
vid_stream=avformat_new_stream(ofmt_ctx,vid_codec)；
vid_codec_ctx=avcodec_alloc_context3(vid_codec)；

设置编解码器参数

codec_ctx->codec_tag = 0;
codec_ctx->codec_id = ofmt_ctx->oformat->video_codec;
//codec_ctx->codec_type = AVMEDIA_TYPE_VIDEO;
codec_ctx->width = width;   codec_ctx->height = height;
codec_ctx->gop_size = 12;
 //codec_ctx->gop_size = 40;
 //codec_ctx->max_b_frames = 3;
codec_ctx->pix_fmt = target_pix_fmt; // AV_PIX_FMT_YUV420P
codec_ctx->framerate = { stream_fps, 1 };
codec_ctx->time_base = { 1, stream_fps};
if (fctx->oformat->flags & AVFMT_GLOBALHEADER)
 {
     codec_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
 }

初始化视频流

if (avcodec_parameters_from_context(stream->codecpar, codec_ctx) < 0)
{
 Debug::Error("Could not initialize stream codec parameters!");
 return false;
}

AVDictionary* codec_options = nullptr;
if (codec->id == AV_CODEC_ID_H264) {
 av_dict_set(&codec_options, "profile", "high", 0);
 av_dict_set(&codec_options, "preset", "fast", 0);
 av_dict_set(&codec_options, "tune", "zerolatency", 0);
}
// open video encoder
int ret = avcodec_open2(codec_ctx, codec, &codec_options);
if (ret<0) {
 Debug::Error("Could not open video encoder: ", avcodec_get_name(codec->id), " error ret: ", AVERROR(ret));
 return false;
}

stream->codecpar->extradata = codec_ctx->extradata;
stream->codecpar->extradata_size = codec_ctx->extradata_size;

开始流式传输

// Create new frame and allocate buffer
AVFrame* AllocateFrameBuffer(AVCodecContext* codec_ctx, double width, double height)
{
 AVFrame* frame = av_frame_alloc();
 std::vector<uint8_t> framebuf(av_image_get_buffer_size(codec_ctx->pix_fmt, width, height, 1));
 av_image_fill_arrays(frame->data, frame->linesize, framebuf.data(), codec_ctx->pix_fmt, width, height, 1);
 frame->width = width;
 frame->height = height;
 frame->format = static_cast<int>(codec_ctx->pix_fmt);
 //Debug::Log("framebuf size: ", framebuf.size(), "  frame format: ", frame->format);
 return frame;
}

void RtspStream(AVFormatContext* ofmt_ctx, AVStream* vid_stream, AVCodecContext* vid_codec_ctx, char* rtsp_url)
{
 printf("Output stream info:\n");
 av_dump_format(ofmt_ctx, 0, rtsp_url, 1);

 const int width = WindowManager::Get().GetWindow(RtspStreaming::WindowId())->GetTextureWidth();
 const int height = WindowManager::Get().GetWindow(RtspStreaming::WindowId())->GetTextureHeight();

 //DirectX BGRA to h264 YUV420p
 SwsContext* conversion_ctx = sws_getContext(width, height, src_pix_fmt,
     vid_stream->codecpar->width, vid_stream->codecpar->height, target_pix_fmt, 
     SWS_BICUBIC | SWS_BITEXACT, nullptr, nullptr, nullptr);
if (!conversion_ctx)
{
     Debug::Error("Could not initialize sample scaler!");
     return;
}

 AVFrame* frame = AllocateFrameBuffer(vid_codec_ctx,vid_codec_ctx->width,vid_codec_ctx->height);
 if (!frame) {
     Debug::Error("Could not allocate video frame\n");
     return;
 }


 if (avformat_write_header(ofmt_ctx, NULL) < 0) {
     Debug::Error("Error occurred when writing header");
     return;
 }
 if (av_frame_get_buffer(frame, 0) < 0) {
     Debug::Error("Could not allocate the video frame data\n");
     return;
 }

 int frame_cnt = 0;
 //av start time in microseconds
 int64_t start_time_av = av_gettime();
 AVRational time_base = vid_stream->time_base;
 AVRational time_base_q = { 1, AV_TIME_BASE };

 // frame pixel data info
 int data_size = width * height * 4;
 uint8_t* data = new uint8_t[data_size];
//    AVPacket* pkt = av_packet_alloc();

 while (RtspStreaming::IsStreaming())
 {
     /* make sure the frame data is writable */
     if (av_frame_make_writable(frame) < 0)
     {
         Debug::Error("Can't make frame writable");
         break;
     }

     //get copy/ref of the texture
     //uint8_t* data = WindowManager::Get().GetWindow(RtspStreaming::WindowId())->GetBuffer();
     if (!WindowManager::Get().GetWindow(RtspStreaming::WindowId())->GetPixels(data, 0, 0, width, height))
     {
         Debug::Error("Failed to get frame buffer. ID: ", RtspStreaming::WindowId());
         std::this_thread::sleep_for (std::chrono::seconds(2));
         continue;
     }
     //printf("got pixels data\n");
     // convert BGRA to yuv420 pixel format
     int srcStrides[1] = { 4 * width };
     if (sws_scale(conversion_ctx, &data, srcStrides, 0, height, frame->data, frame->linesize) < 0)
     {
         Debug::Error("Unable to scale d3d11 texture to frame. ", frame_cnt);
         break;
     }
     //Debug::Log("frame pts: ", frame->pts, "  time_base:", av_rescale_q(1, vid_codec_ctx->time_base, vid_stream->time_base));
     frame->pts = frame_cnt++; 
     //frame_cnt++;
     //printf("scale conversion done\n");

     //encode to the video stream
     int ret = avcodec_send_frame(vid_codec_ctx, frame);
     if (ret < 0)
     {
         Debug::Error("Error sending frame to codec context! ",frame_cnt);
         break;
     }

     AVPacket* pkt = av_packet_alloc();
     //av_init_packet(pkt);
     ret = avcodec_receive_packet(vid_codec_ctx, pkt);
     if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
     {
         //av_packet_unref(pkt);
         av_packet_free(&pkt);
         continue;
     }
     else if (ret < 0)
     {
         Debug::Error("Error during receiving packet: ",AVERROR(ret));
         //av_packet_unref(pkt);
         av_packet_free(&pkt);
         break;
     }

     if (pkt->pts == AV_NOPTS_VALUE)
     {
         //Write PTS
         //Duration between 2 frames (us)
         int64_t calc_duration = (double)AV_TIME_BASE / av_q2d(vid_stream->r_frame_rate);
         //Parameters
         pkt->pts = (double)(frame_cnt * calc_duration) / (double)(av_q2d(time_base) * AV_TIME_BASE);
         pkt->dts = pkt->pts;
         pkt->duration = (double)calc_duration / (double)(av_q2d(time_base) * AV_TIME_BASE);
     }
     int64_t pts_time = av_rescale_q(pkt->dts, time_base, time_base_q);
     int64_t now_time = av_gettime() - start_time_av;

     if (pts_time > now_time)
         av_usleep(pts_time - now_time);

     //pkt.pts = av_rescale_q_rnd(pkt.pts, in_stream->time_base, out_stream->time_base, (AVRounding)(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX));
     //pkt.dts = av_rescale_q_rnd(pkt.dts, in_stream->time_base, out_stream->time_base, (AVRounding)(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX));
     //pkt.duration = av_rescale_q(pkt.duration, in_stream->time_base, out_stream->time_base);
     //pkt->pos = -1;

     //write frame and send
     if (av_interleaved_write_frame(ofmt_ctx, pkt)<0)
     {
         Debug::Error("Error muxing packet, frame number:",frame_cnt);
         break;
     }

     //Debug::Log("RTSP streaming...");
     //sstd::this_thread::sleep_for(std::chrono::milliseconds(1000/20));
     //av_packet_unref(pkt);
     av_packet_free(&pkt);
 }

 //av_free_packet(pkt);
 delete[] data;

 /* Write the trailer, if any. The trailer must be written before you
  * close the CodecContexts open when you wrote the header; otherwise
  * av_write_trailer() may try to use memory that was freed on
  * av_codec_close(). */
 av_write_trailer(ofmt_ctx);
 av_frame_unref(frame);
 av_frame_free(&frame);
 printf("streaming thread CLOSED!\n");
}

现在，这允许我连接到我的rtsp服务器并维护连接。然而，在rtsp客户端，我得到的是灰色或单个静态帧，如下所示：

如果您能提供以下问题的帮助，我将不胜感激：

首先，尽管继续连接到服务器并更新帧，但为什么流不工作？
视频编解码器。默认rtsp格式使用Mpeg4编解码器，是否可以使用H264？当我手动将其设置为AV_CODEC_ID_H264时，程序在avcodec_open2处失败，返回值为-22。
我是否需要为每个帧创建并分配新的“AVFrame”和“AVPacket”？或者我可以为此重用全局变量吗？
我是否需要显式定义一些用于实时流的代码？（就像在ffmpeg中一样，我们使用“-re”标志）。

null

更新

在测试时，我发现我可以使用ffplay播放流，而它被卡在VLC播放器上。下面是ffplay日志上的快照

慕容高卓

2023-03-14

基本构造和初始化似乎是好的。找到以下对您问题的答复

尽管继续连接到服务器并更新帧，但为什么流仍不工作？

如果您得到一个错误或中断的流，您可能需要检查您的数据包的表示和解压缩时间戳（PTS/DTS）。

 AVRational time_base = vid_stream->time_base;
 AVRational time_base_q = { 1, AV_TIME_BASE };

null

我不明白为什么不...RTSP只是一种通过网络传输数据包的协议。因此您应该可以使用AV_CODEC_ID_H264对流进行编码。

我是否需要为每个帧创建并分配新的“AVFrame”和“AVPacket”？或者我可以为此重用全局变量吗？

在libav中，在编码过程中，单个分组用于编码视频帧，而在单个分组中可以有多个音频帧。我应该参考一下，但目前似乎找不到任何出处。但无论如何，重点是您每次都需要创建新的数据包。

我是否需要显式定义一些用于实时流的代码？（就像在ffmpeg中一样，我们使用“-re”标志）。

您不需要为实时流添加任何其他内容。虽然您可能希望实现它以限制传递给编码器的帧更新的数量并节省一些性能。

使用libav*将原始帧(D3D11Texture2D)编码为rtsp流

共有1个答案

相关问答

相关文章

相关阅读

相关工具

相关文档