一、ffmpeg使用dxva2
二、解码
三、渲染
ffplay本身是支持设置解码器的,比如设置h264_qsv、hevc_cuvid等就可以使用硬解功能,实际测试确实是有效的,cpu使用率也是有所下降。但是这并不是最佳的方案,在Windows上更好的方案是使用dxva2解码然后使用d3d9渲染,这种方法不仅极大降低cpu使用率、gpu使用率也有所下降、同时解码速度也比较快。但是ffplay本身是不支持使用dxva2的,所以这个时候就要我们进行拓展了。
dxva2解码渲染包含2个步骤:解码和渲染。之所以是很优的方案是因为,解码和渲染都是显卡中处理,解码的数据不需要取出到内存,直接在显存转换然后渲染。ffmpeg有包含dxva2的示例代码,但没有显卡渲染功能,性能还是和设置解码器没有区别。我们需要参考的是项目名称叫Win32Project1的ffmpeg_dxva2解码渲染的博文(暂时没找到当时那篇,就不贴其他类似链接了)
头文件如下:
- /*
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
- #ifndef FFMPEG_DXVA2_H
- #define FFMPEG_DXVA2_H
-
- //#include "windows.h"
-
- extern "C"{
- #include "libavcodec/avcodec.h"
- #include "libavutil/pixfmt.h"
- #include "libavutil/rational.h"
- }
-
- enum HWAccelID {
- HWACCEL_NONE = 0,
- HWACCEL_AUTO,
- HWACCEL_VDPAU,
- HWACCEL_DXVA2,
- HWACCEL_VDA,
- HWACCEL_VIDEOTOOLBOX,
- HWACCEL_QSV,
- };
-
- typedef struct AVStream AVStream;
- typedef struct AVCodecContext AVCodecContext;
- typedef struct AVCodec AVCodec;
- typedef struct AVFrame AVFrame;
- typedef struct AVDictionary AVDictionary;
-
- typedef struct InputStream {
- int file_index;
- AVStream *st;
- int discard; /* true if stream data should be discarded */
- int user_set_discard;
- int decoding_needed; /* non zero if the packets must be decoded in 'raw_fifo', see DECODING_FOR_* */
- #define DECODING_FOR_OST 1
- #define DECODING_FOR_FILTER 2
-
- AVCodecContext *dec_ctx;
- AVCodec *dec;
- AVFrame *decoded_frame;
- AVFrame *filter_frame; /* a ref of decoded_frame, to be sent to filters */
-
- int64_t start; /* time when read started */
- /* predicted dts of the next packet read for this stream or (when there are
- * several frames in a packet) of the next frame in current packet (in AV_TIME_BASE units) */
- int64_t next_dts;
- int64_t dts; ///< dts of the last packet read for this stream (in AV_TIME_BASE units)
-
- int64_t next_pts; ///< synthetic pts for the next decode frame (in AV_TIME_BASE units)
- int64_t pts; ///< current pts of the decoded frame (in AV_TIME_BASE units)
- int wrap_correction_done;
-
- int64_t filter_in_rescale_delta_last;
-
- int64_t min_pts; /* pts with the smallest value in a current stream */
- int64_t max_pts; /* pts with the higher value in a current stream */
- int64_t nb_samples; /* number of samples in the last decoded audio frame before looping */
-
- double ts_scale;
- int saw_first_ts;
- int showed_multi_packet_warning;
- AVDictionary *decoder_opts;
- AVRational framerate; /* framerate forced with -r */
- int top_field_first;
- int guess_layout_max;
-
- int autorotate;
- int resample_height;
- int resample_width;
- int resample_pix_fmt;
-
- int resample_sample_fmt;
- int resample_sample_rate;
- int resample_channels;
- uint64_t resample_channel_layout;
-
- int fix_sub_duration;
- struct { /* previous decoded subtitle and related variables */
- int got_output;
- int ret;
- AVSubtitle subtitle;
- } prev_sub;
-
- struct sub2video {
- int64_t last_pts;
- int64_t end_pts;
- AVFrame *frame;
- int w, h;
- } sub2video;
-
- int dr1;
-
- /* decoded data from this stream goes into all those filters
- * currently video and audio only */
- //InputFilter **filters;
- //int nb_filters;
-
- //int reinit_filters;
-
- /* hwaccel options */
- enum HWAccelID hwaccel_id;
- char *hwaccel_device;
-
- /* hwaccel context */
- enum HWAccelID active_hwaccel_id;
- void *hwaccel_ctx;
- void(*hwaccel_uninit)(AVCodecContext *s);
- int(*hwaccel_get_buffer)(AVCodecContext *s, AVFrame *frame, int flags);
- int(*hwaccel_retrieve_data)(AVCodecContext *s, AVFrame *frame);
- enum AVPixelFormat hwaccel_pix_fmt;
- enum AVPixelFormat hwaccel_retrieved_pix_fmt;
-
- /* stats */
- // combined size of all the packets read
- uint64_t data_size;
- /* number of packets successfully read for this stream */
- uint64_t nb_packets;
- // number of frames/samples retrieved from the decoder
- uint64_t frames_decoded;
- uint64_t samples_decoded;
- } InputStream;
-
-
- int dxva2_init(AVCodecContext *s, HWND hwnd);
- int dxva2_retrieve_data_call(AVCodecContext *s, AVFrame *frame);
-
- #endif /* FFMPEG_DXVA2_H */
-
修改ffplay解码功能需要在stream_component_open中进行:
引用Win32Project1的ffmpeg_dxva2.h头文件
#include "ffmpeg_dxva2.h"
定义一个枚举说明硬件加速类型
- /// <summary>
- /// 硬件加速选项
- /// </summary>
- typedef enum
- {
- AC_HARDWAREACCELERATETYPE_DISABLED,
- AC_HARDWAREACCELERATETYPE_AUTO,
- //使用dxva解码,仅在Windows有效,成功启动:started、display事件的pixformat为AC_PIXELFORMAT_DXVA2_VLD,render事件的data[3]为d3d9的surface对象。
- AC_HARDWAREACCELERATETYPE_DXVA
- }ACHardwareAccelerateType;
在VideoState中添加如下字段硬件加速类型,以及Win32Project1的InputStream对象
- ACHardwareAccelerateType hwaccel;
- InputStream* ist;
添加相应接口
- //设置硬件加速类型
- void ac_play_setHardwareAccelerateType(ACPlay play, ACHardwareAccelerateType value) {
- VideoState* s = (VideoState*)play;
- s->hwaccel = value;
- }
2、初始化
在stream_component_open的avcodec_open2上一行,加入判断hwaccel初始化dxva逻辑。dxva2_init就是Win32Project1中的方法,此方法一定要有hwnd,这个hwnd必须是渲染窗口的。如果不想设置hwnd达到相同性能则需要另外做修改,本文就不深入讨论了。
- if (is->hwaccel == AC_HARDWAREACCELERATETYPE_AUTO || is->hwaccel == AC_HARDWAREACCELERATETYPE_DXVA)
- {
- switch (codec->id)
- //dxva2支持的格式
- {
- case AV_CODEC_ID_MPEG2VIDEO:
- case AV_CODEC_ID_H264:
- case AV_CODEC_ID_VC1:
- case AV_CODEC_ID_WMV3:
- case AV_CODEC_ID_HEVC:
- case AV_CODEC_ID_VP9:
- //while (1)
- {
- avctx->thread_count = 1; // Multithreading is apparently not compatible with hardware decoding
- is->ist = av_mallocz(sizeof(InputStream));
- is->ist->hwaccel_id = HWACCEL_AUTO;
- is->ist->active_hwaccel_id = HWACCEL_AUTO;
- is->ist->hwaccel_device = "dxva2";
- is->ist->dec = codec;
- is->ist->dec_ctx = avctx;
- avctx->opaque = is->ist;
- if (dxva2_init(avctx, is->hwnd) == 0)
- {
- avctx->get_buffer2 = is->ist->hwaccel_get_buffer;
- avctx->get_format = GetHwFormat;
- avctx->thread_safe_callbacks = 1;
- avctx->pix_fmt = AV_PIX_FMT_DXVA2_VLD;
- }
- else
- {
- av_free(is->ist);
- is->ist = NULL;
- }
- }
- break;
- }
- }
将解码的avframe的格式设置为AV_PIX_FMT_DXVA2_VLD,上述代码中的GetHwFormat具体如下:
- static enum AVPixelFormat GetHwFormat(AVCodecContext* s, const enum AVPixelFormat* pix_fmts)
- {
- InputStream* ist = (InputStream*)s->opaque;
- ist->active_hwaccel_id = HWACCEL_DXVA2;
- ist->hwaccel_pix_fmt = AV_PIX_FMT_DXVA2_VLD;
- return ist->hwaccel_pix_fmt;
- }
-
在stream_close中加入如下反初始化代码,其中dxva2_uninit2是Win32Project1中的dxva2_uninit将其参数类型改为了InputStream*。
- if (is->ist)
- {
- dxva2_uninit2(is->ist);
- av_free(is->ist);
- is->ist = NULL;
- }
有了上述的解码设置之后,解码出来的数据将是d3d9的surface,这个对象在avframe.data[3]中,我们需要对它进行处理,将其显示到界面上。幸运的是Win32Project1包含了这部分功能,我们只需要调用方法就可以了。
在video_display的SDL_RenderClear上一行加入如下代码。
- Frame* vp;
- vp = frame_queue_peek_last(&is->pictq);
- if (vp->format == AV_PIX_FMT_DXVA2_VLD)
- {
- dxva2_retrieve_data_call(is->viddec.avctx, vp->frame);
- return;
- }
以上就是今天要讲的内容,通过上述方法实现的dxva2硬解渲染性能非常好,直观的感受就是渲染4k视频cpu使用率不超过1%,当然此时gpu使用率可能是50%左右的,但是设置硬解编码器的效果就是10%的cpu使用率,gpu使用率也是50%左右。原因在Win32Project1_ffmpeg_dxva2的博文中有说明,这里就不重复了。总的来说,ffplay支持xva2硬解渲染后就有更广泛和实际的应用了,比如直接解决了实时流多路渲染性能不足的问题等。
原文链接:ffplay使用dxva2实现硬解渲染 - 资料 - 我爱音视频网 - 构建全国最权威的音视频技术交流分享论坛
本文福利,免费领取C++音视频学习提升资料,内容包括(C/C++,Linux 服务器开发,面试题,FFmpeg ,webRTC ,rtmp ,hls ,rtsp ,ffplay ,srs)↓↓↓↓↓↓文章底部↓↓↓↓见下面