Reputation: 11
I am using ffmpeg libavformat library to write a video only webm file. I recieve VP8 encoded rtp stream on my server. I have successfully grouped the rtp byte stream (from rtp payload) into individual frames, and constructed a AVPacket. I am NOT re-encoding the payload to VP8 here as it is already vp8 encoded.
I am writing the AVPacket to the file using av_write_interleaved() method. Though I am getting a webm file as output, it is not playing at all. When I checked for the info on the file using mkv tool's 'mkvinfo' command, I found the following info :
+ EBML head
|+ EBML version: 1
|+ EBML read version: 1
|+ EBML maximum ID length: 4
|+ EBML maximum size length: 8
|+ Doc type: webm
|+ Doc type version: 2
|+ Doc type read version: 2
+ Segment, size 2142500
|+ Seek head (subentries will be skipped)
|+ EbmlVoid (size: 170)
|+ Segment information
| + Timestamp scale: 1000000
| + Multiplexing application: Lavf58.0.100
| + Writing application: Lavf58.0.100
| + Duration: 78918744.480s (21921:52:24.480)
|+ Segment tracks
| + A track
| + Track number: 1 (track ID for mkvmerge & mkvextract: 0)
| + Track UID: 1
| + Lacing flag: 0
| + Name: Video Track
| + Language: eng
| + Codec ID: V_VP8
| + Track type: video
| + Default duration: 1.000ms (1000.000 frames/fields per second for a
video track)
| + Video track
| + Pixel width: 640
| + Pixel height: 480
|+ Tags
| + Tag
| + Targets
| + Simple
| + Name: ENCODER
| + String: Lavf58.0.100
| + Tag
| + Targets
| + TrackUID: 1
| + Simple
| + Name: DURATION
| + String: 21921:52:24.4800000
|+ Cluster
As we can see, the duration of the stream is very disproportionately high. (My valid stream duration should be around 8-10 secs). And, the frame rate in the track info is also not what I am setting it to be. I am setting frame rate as 25 fps.
I am applying av_scale_q(rtpTimeStamp, codec_timebase, stream_timebase) and setting the rescaled rtpTimeStamp as pts and dts values. My guess is my way of setting pts and dts is wrong. Please help me how to set pts and dts values on the AVPacket, so as get a working webm file with proper meta info on it.
EDIT :
The following is the code I call to init the library :
#define STREAM_FRAME_RATE 25
#define STREAM_PIX_FMT AV_PIX_FMT_YUV420P
typedef struct OutputStream {
AVStream *st;
AVCodecContext *enc;
AVFrame *frame;
} OutputStream;
typedef struct WebMWriter {
OutputStream *audioStream, *videoStream;
AVFormatContext *ctx;
AVOutputFormat *outfmt;
AVCodec *audioCodec, *videoCodec;
} WebMWriter;
static OutputStream audioStream = { 0 }, videoStream = { 0 };
WebMWriter *init(char *filename)
{
av_register_all();
AVFormatContext *ctx = NULL;
AVCodec *audioCodec = NULL, *videoCodec = NULL;
const char *fmt_name = NULL;
const char *file_name = filename;
int alloc_status = avformat_alloc_output_context2(&ctx, NULL, fmt_name, file_name);
if(!ctx)
return NULL;
AVOutputFormat *fmt = (*ctx).oformat;
AVDictionary *video_opt = NULL;
av_dict_set(&video_opt, "language", "eng", 0);
av_dict_set(&video_opt, "title", "Video Track", 0);
if(fmt->video_codec != AV_CODEC_ID_NONE)
{
addStream(&videoStream, ctx, &videoCodec, AV_CODEC_ID_VP8, video_opt);
}
if(videoStream.st)
openVideo1(&videoStream, videoCodec, NULL);
av_dump_format(ctx, 0, file_name, 1);
int ret = -1;
/* open the output file, if needed */
if (!(fmt->flags & AVFMT_NOFILE)) {
ret = avio_open(&ctx->pb, file_name, AVIO_FLAG_WRITE);
if (ret < 0) {
printf("Could not open '%s': %s\n", file_name, av_err2str(ret));
return NULL;
}
}
/* Write the stream header, if any. */
AVDictionary *format_opt = NULL;
ret = avformat_write_header(ctx, &format_opt);
if (ret < 0) {
fprintf(stderr, "Error occurred when opening output file: %s\n",
av_err2str(ret));
return NULL;
}
WebMWriter *webmWriter = malloc(sizeof(struct WebMWriter));
webmWriter->ctx = ctx;
webmWriter->outfmt = fmt;
webmWriter->audioStream = &audioStream;
webmWriter->videoStream = &videoStream;
webmWriter->videoCodec = videoCodec;
return webmWriter;
}
The following is the openVideo() method :
void openVideo1(OutputStream *out_st, AVCodec *codec, AVDictionary *opt_arg)
{
AVCodecContext *codec_ctx = out_st->enc;
int ret = -1;
AVDictionary *opt = NULL;
if(opt_arg != NULL)
{
av_dict_copy(&opt, opt_arg, 0);
ret = avcodec_open2(codec_ctx, codec, &opt);
}
else
{
ret = avcodec_open2(codec_ctx, codec, NULL);
}
/* copy the stream parameters to the muxer */
ret = avcodec_parameters_from_context(out_st->st->codecpar, codec_ctx);
if (ret < 0) {
printf("Could not copy the stream parameters\n");
exit(1);
}
}
The following is the addStream() method :
void addStream(OutputStream *out_st, AVFormatContext *ctx, AVCodec **cdc, enum AVCodecID codecId, AVDictionary *opt_arg)
{
(*cdc) = avcodec_find_encoder(codecId);
if(!(*cdc)) {
exit(1);
}
/*as we are passing a NULL AVCodec cdc, So AVCodecContext codec_ctx will not be allocated, we have to do it explicitly */
AVStream *st = avformat_new_stream(ctx, *cdc);
if(!st) {
exit(1);
}
out_st->st = st;
st->id = ctx->nb_streams-1;
AVDictionary *opt = NULL;
av_dict_copy(&opt, opt_arg, 0);
st->metadata = opt;
AVCodecContext *codec_ctx = st->codec;
if (!codec_ctx) {
fprintf(stderr, "Could not alloc an encoding context\n");
exit(1);
}
out_st->enc = codec_ctx;
codec_ctx->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL;
switch ((*cdc)->type) {
case AVMEDIA_TYPE_AUDIO:
codec_ctx->codec_id = codecId;
codec_ctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
codec_ctx->bit_rate = 64000;
codec_ctx->sample_rate = 48000;
codec_ctx->channels = 2;//1;
codec_ctx->channel_layout = AV_CH_LAYOUT_STEREO;
codec_ctx->codec_type = AVMEDIA_TYPE_AUDIO;
codec_ctx->time_base = (AVRational){1,STREAM_FRAME_RATE};
break;
case AVMEDIA_TYPE_VIDEO:
codec_ctx->codec_id = codecId;
codec_ctx->bit_rate = 90000;
codec_ctx->width = 640;
codec_ctx->height = 480;
codec_ctx->time_base = (AVRational){1,STREAM_FRAME_RATE};
codec_ctx->gop_size = 12;
codec_ctx->pix_fmt = STREAM_PIX_FMT;
codec_ctx->codec_type = AVMEDIA_TYPE_VIDEO;
break;
default:
break;
}
/* Some formats want stream headers to be separate. */
if (ctx->oformat->flags & AVFMT_GLOBALHEADER)
codec_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
}
The following is the code I call to write a frame of data to the file :
int writeVideoStream(AVFormatContext *ctx, AVStream *st, uint8_t *data, int size, long frameTimeStamp, int isKeyFrame, AVCodecContext *codec_ctx)
{
AVRational rat = st->time_base;
AVPacket pkt = {0};
av_init_packet(&pkt);
void *opaque = NULL;
int flags = AV_BUFFER_FLAG_READONLY;
AVBufferRef *bufferRef = av_buffer_create(data, size, NULL, opaque, flags);
pkt.buf = bufferRef;
pkt.data = data;
pkt.size = size;
pkt.stream_index = st->index;
pkt.pts = pkt.dts = frameTimeStamp;
pkt.pts = av_rescale_q(pkt.pts, codec_ctx->time_base, st->time_base);
pkt.dts = av_rescale_q(pkt.dts, codec_ctx->time_base, st->time_base);
if(isKeyFrame == 1)
pkt.flags |= AV_PKT_FLAG_KEY;
int ret = av_interleaved_write_frame(ctx, &pkt);
return ret;
}
NOTE : Here 'frameTimeStamp' is the rtp timeStamp on the rtp packet of that frame.
EDIT 2.0 :
My updated addStream() method with codecpars changes :
void addStream(OutputStream *out_st, AVFormatContext *ctx, AVCodec **cdc, enum AVCodecID codecId, AVDictionary *opt_arg)
{
(*cdc) = avcodec_find_encoder(codecId);
if(!(*cdc)) {
printf("@@@@@ couldnt find codec \n");
exit(1);
}
AVStream *st = avformat_new_stream(ctx, *cdc);
if(!st) {
printf("@@@@@ couldnt init stream\n");
exit(1);
}
out_st->st = st;
st->id = ctx->nb_streams-1;
AVCodecParameters *codecpars = st->codecpar;
codecpars->codec_id = codecId;
codecpars->codec_type = (*cdc)->type;
AVDictionary *opt = NULL;
av_dict_copy(&opt, opt_arg, 0);
st->metadata = opt;
//av_dict_free(&opt);
AVCodecContext *codec_ctx = st->codec;
if (!codec_ctx) {
fprintf(stderr, "Could not alloc an encoding context\n");
exit(1);
}
out_st->enc = codec_ctx;
//since opus is experimental codec
//codec_ctx->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL;
switch ((*cdc)->type) {
case AVMEDIA_TYPE_AUDIO:
codec_ctx->codec_id = codecId;
codec_ctx->sample_fmt = AV_SAMPLE_FMT_FLTP;//AV_SAMPLE_FMT_U8 or AV_SAMPLE_FMT_S16;
codec_ctx->bit_rate = 64000;
codec_ctx->sample_rate = 48000;
codec_ctx->channels = 2;//1;
codec_ctx->channel_layout = AV_CH_LAYOUT_STEREO; //AV_CH_LAYOUT_MONO;
codec_ctx->codec_type = AVMEDIA_TYPE_AUDIO;
codec_ctx->time_base = (AVRational){1,STREAM_FRAME_RATE};
codecpars->format = codec_ctx->sample_fmt;
codecpars->channels = codec_ctx->channels;
codecpars->sample_rate = codec_ctx->sample_rate;
break;
case AVMEDIA_TYPE_VIDEO:
codec_ctx->codec_id = codecId;
codec_ctx->bit_rate = 90000;
codec_ctx->width = 640;
codec_ctx->height = 480;
codec_ctx->time_base = (AVRational){1,STREAM_FRAME_RATE};
codec_ctx->gop_size = 12;
codec_ctx->pix_fmt = STREAM_PIX_FMT;
//codec_ctx->max_b_frames = 1;
codec_ctx->codec_type = AVMEDIA_TYPE_VIDEO;
codec_ctx->framerate = av_inv_q(codec_ctx->time_base);
st->avg_frame_rate = codec_ctx->framerate;//(AVRational){25000, 1000};
codecpars->format = codec_ctx->pix_fmt;
codecpars->width = codec_ctx->width;
codecpars->height = codec_ctx->height;
codecpars->sample_aspect_ratio = (AVRational){codec_ctx->width, codec_ctx->height};
break;
default:
break;
}
codecpars->bit_rate = codec_ctx->bit_rate;
int ret = avcodec_parameters_to_context(codec_ctx, codecpars);
if (ret < 0) {
printf("Could not copy the stream parameters\n");
exit(1);
}
/* Some formats want stream headers to be separate. */
if (ctx->oformat->flags & AVFMT_GLOBALHEADER)
codec_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
}
Upvotes: 1
Views: 6668
Reputation: 1988
I think you are right about caltulating pts/dts is the problem, use this formula to manually calculate timestamps, see if it works, then you can do it with av_rescale_q
.
Here is my tested formula (for raw (yuv) output):
int64_t frameTime;
int64_t frameDuration;
frameDuration = video_st->time_base.den / video_fps; // i.e. 25
frameTime = frame_count * frameDuration;
pkt->pts = frameTime / video_st->time_base.num;
pkt->duration = frameDuration;
pkt->dts = pkt->pts;
pkt->stream_index = video_st->index;
Use this before av_interleaved_write_frame
.
Note: frame_count
here is a counter that increases after every video frame output (with av_interleaved_write_frame).
Upvotes: 1