Reputation: 31
Input frames start on the GPU as ID3D11Texture2D
pointers.
I encode them to H264 using FFMPEG + NVENC. NVENC works perfectly if I download the textures to CPU memory as format AV_PIX_FMT_BGR0
, but I'd like to cut out the CPU texture download entirely, and pass the GPU memory pointer directly into the encoder in native format. I write frames like this:
int write_gpu_video_frame(ID3D11Texture2D* gpuTex, AVFormatContext* oc, OutputStream* ost) {
AVFrame *hw_frame = ost->hw_frame;
printf("gpuTex address = 0x%x\n", &gpuTex);
hw_frame->data[0] = (uint8_t *) gpuTex;
hw_frame->data[1] = (uint8_t *) (intptr_t) 0;
hw_frame->pts = ost->next_pts++;
return write_frame(oc, ost->enc, ost->st, hw_frame);
// write_frame is identical to sample code in ffmpeg repo
}
Running the code with this modification gives the following error:
gpuTex address = 0x4582f6d0
[h264_nvenc @ 00000191233e1bc0] Error registering an input resource: invalid call (9):
[h264_nvenc @ 00000191233e1bc0] Could not register an input HW frame
Error sending a frame to the encoder: Unknown error occurred
Here's some supplemental code used in setting up and configuring the hw context and encoder:
/* A few config flags */
#define ENABLE_NVENC TRUE
#define USE_D3D11 TRUE // Skip downloading textures to CPU memory and send it straight to NVENC
/* Init hardware frame context */
static int set_hwframe_ctx(AVCodecContext* ctx, AVBufferRef* hw_device_ctx) {
AVBufferRef* hw_frames_ref;
AVHWFramesContext* frames_ctx = NULL;
int err = 0;
if (!(hw_frames_ref = av_hwframe_ctx_alloc(hw_device_ctx))) {
fprintf(stderr, "Failed to create HW frame context.\n");
throw;
}
frames_ctx = (AVHWFramesContext*) (hw_frames_ref->data);
frames_ctx->format = AV_PIX_FMT_D3D11;
frames_ctx->sw_format = AV_PIX_FMT_NV12;
frames_ctx->width = STREAM_WIDTH;
frames_ctx->height = STREAM_HEIGHT;
//frames_ctx->initial_pool_size = 20;
if ((err = av_hwframe_ctx_init(hw_frames_ref)) < 0) {
fprintf(stderr, "Failed to initialize hw frame context. Error code: %s\n", av_err2str(err));
av_buffer_unref(&hw_frames_ref);
throw;
}
ctx->hw_frames_ctx = av_buffer_ref(hw_frames_ref);
if (!ctx->hw_frames_ctx)
err = AVERROR(ENOMEM);
av_buffer_unref(&hw_frames_ref);
return err;
}
/* Add an output stream. */
static void add_video_stream(
OutputStream* ost,
AVFormatContext* oc,
const AVCodec** codec,
enum AVCodecID codec_id,
int width,
int height
) {
AVCodecContext* c;
int i;
bool nvenc = false;
/* find the encoder */
if (ENABLE_NVENC) {
printf("Getting nvenc encoder\n");
*codec = avcodec_find_encoder_by_name("h264_nvenc");
nvenc = true;
}
if (!ENABLE_NVENC || *codec == NULL) {
printf("Getting standard encoder\n");
avcodec_find_encoder(codec_id);
nvenc = false;
}
if (!(*codec)) {
fprintf(stderr, "Could not find encoder for '%s'\n",
avcodec_get_name(codec_id));
exit(1);
}
ost->st = avformat_new_stream(oc, NULL);
if (!ost->st) {
fprintf(stderr, "Could not allocate stream\n");
exit(1);
}
ost->st->id = oc->nb_streams - 1;
c = avcodec_alloc_context3(*codec);
if (!c) {
fprintf(stderr, "Could not alloc an encoding context\n");
exit(1);
}
ost->enc = c;
printf("Using video codec %s\n", avcodec_get_name(codec_id));
c->codec_id = codec_id;
c->bit_rate = 4000000;
/* Resolution must be a multiple of two. */
c->width = STREAM_WIDTH;
c->height = STREAM_HEIGHT;
/* timebase: This is the fundamental unit of time (in seconds) in terms
* of which frame timestamps are represented. For fixed-fps content,
* timebase should be 1/framerate and timestamp increments should be
* identical to 1. */
ost->st->time_base = {1, STREAM_FRAME_RATE};
c->time_base = ost->st->time_base;
c->gop_size = 12; /* emit one intra frame every twelve frames at most */
if (nvenc && USE_D3D11) {
const std::string hw_device_name = "d3d11va";
AVHWDeviceType device_type = av_hwdevice_find_type_by_name(hw_device_name.c_str());
// set up hw device context
AVBufferRef *hw_device_ctx;
// const char* device = "0"; // Default GPU (may be integrated in the case of switchable graphics!)
const char* device = "1";
ret = av_hwdevice_ctx_create(&hw_device_ctx, device_type, device, nullptr, 0);
if (ret < 0) {
fprintf(stderr, "Could not create hwdevice context; %s", av_err2str(ret));
}
set_hwframe_ctx(c, hw_device_ctx);
c->pix_fmt = AV_PIX_FMT_D3D11;
} else if (nvenc && !USE_D3D11)
c->pix_fmt = AV_PIX_FMT_BGR0;
else
c->pix_fmt = STREAM_PIX_FMT;
if (c->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
/* just for testing, we also add B-frames */
c->max_b_frames = 2;
}
if (c->codec_id == AV_CODEC_ID_MPEG1VIDEO) {
/* Needed to avoid using macroblocks in which some coeffs overflow.
* This does not happen with normal video, it just happens here as
* the motion of the chroma plane does not match the luma plane. */
c->mb_decision = 2;
}
/* Some formats want stream headers to be separate. */
if (oc->oformat->flags & AVFMT_GLOBALHEADER)
c->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
}
Upvotes: 3
Views: 1214
Reputation: 1
I found if encode and decode cuda type data with same "hw_device_ctx" works
Upvotes: 0