Kris Xia
Kris Xia

Reputation: 31

ffmpeg record video plays too fast

I'm a college student and I am studying FFmpeg now.

I have wrote a software that can record desktops and audio('virtual-audio-capturer') with FFmpeg.And I am now writing Audio and Video Synchronization. I met some problems that video recording plays too fast.

When I look for audio and video synchronization help on the Internet,I find a formula for calculating PTS :

pts = n * ((1 / timbase)/ fps)

When I use this formula,I find a phenomenon.

1.The higher frame rate is,the faster the video playback speed.

2.The slower the frame rate, the faster the video playback.

Also I find while the framerate is 10,the video playback speed will be right.

Why has this situation happened?

I have thought this question for three days. I really hope someone can help me solve this problem.

I really appreciate the help.

#include "stdafx.h"

#ifdef  __cplusplus
extern "C"
{
#endif
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libswscale/swscale.h"
#include "libavdevice/avdevice.h"
#include "libavutil/audio_fifo.h"

#include "libavfilter/buffersink.h"
#include "libavfilter/buffersrc.h"
#include "libavutil/imgutils.h"
#include "libavutil/mathematics.h"
#include "libavutil/samplefmt.h"
#include "libavutil/time.h"
#include "libavutil/opt.h"
#include "libavutil/pixdesc.h"
#include "libavutil/file.h"
#include "libavutil/mem.h"
#include "libavutil/frame.h"
#include "libavfilter/avfilter.h"
#include "libswresample/swresample.h"

#pragma comment(lib, "avcodec.lib")
#pragma comment(lib, "avformat.lib")
#pragma comment(lib, "avutil.lib")
#pragma comment(lib, "avdevice.lib")
#pragma comment(lib, "avfilter.lib")

#pragma comment(lib, "avfilter.lib")
#pragma comment(lib, "postproc.lib")
#pragma comment(lib, "swresample.lib")
#pragma comment(lib, "swscale.lib")
#ifdef __cplusplus
};
#endif

AVFormatContext *pFormatCtx_Video = NULL, *pFormatCtx_Audio = NULL, *pFormatCtx_Out = NULL;

AVCodecContext *outVideoCodecCtx = NULL;
AVCodecContext *outAudioCodecCtx = NULL;

AVStream *pVideoStream = NULL, *pAudioStream = NULL;

AVCodec *outAVCodec;
AVCodec *outAudioCodec;

AVCodecContext  *pCodecCtx_Video;
AVCodec         *pCodec_Video;
AVFifoBuffer    *fifo_video = NULL;
AVAudioFifo     *fifo_audio = NULL;
int VideoIndex, AudioIndex;
int codec_id;

CRITICAL_SECTION AudioSection, VideoSection;



SwsContext *img_convert_ctx;
int frame_size = 0;

uint8_t *picture_buf = NULL, *frame_buf = NULL;

bool bCap = true;

DWORD WINAPI ScreenCapThreadProc( LPVOID lpParam );
DWORD WINAPI AudioCapThreadProc( LPVOID lpParam );

int OpenVideoCapture()
{
    AVInputFormat *ifmt=av_find_input_format("gdigrab");
    AVDictionary *options = NULL;
    av_dict_set(&options, "framerate", "60", NULL);
    if(avformat_open_input(&pFormatCtx_Video, "desktop", ifmt, &options)!=0)
    {
        printf("Couldn't open input stream.(无法打开视频输入流)\n");
        return -1;
    }
    if(avformat_find_stream_info(pFormatCtx_Video,NULL)<0)
    {
        printf("Couldn't find stream information.(无法获取视频流信息)\n");
        return -1;
    }
    if (pFormatCtx_Video->streams[0]->codec->codec_type != AVMEDIA_TYPE_VIDEO)
    {
        printf("Couldn't find video stream information.(无法获取视频流信息)\n");
        return -1;
    }
    pCodecCtx_Video = pFormatCtx_Video->streams[0]->codec;
    pCodec_Video = avcodec_find_decoder(pCodecCtx_Video->codec_id);
    if(pCodec_Video == NULL)
    {
        printf("Codec not found.(没有找到解码器)\n");
        return -1;
    }
    if(avcodec_open2(pCodecCtx_Video, pCodec_Video, NULL) < 0)
    {
        printf("Could not open codec.(无法打开解码器)\n");
        return -1;
    }

    av_dump_format(pFormatCtx_Video, 0, NULL, 0);

    img_convert_ctx = sws_getContext(pCodecCtx_Video->width, pCodecCtx_Video->height, pCodecCtx_Video->pix_fmt, 
        pCodecCtx_Video->width, pCodecCtx_Video->height, PIX_FMT_YUV420P, SWS_BICUBIC, NULL, NULL, NULL); 

    frame_size = avpicture_get_size(pCodecCtx_Video->pix_fmt, pCodecCtx_Video->width, pCodecCtx_Video->height);
    fifo_video = av_fifo_alloc(30 * avpicture_get_size(AV_PIX_FMT_YUV420P, pCodecCtx_Video->width, pCodecCtx_Video->height));

    return 0;
}

static char *dup_wchar_to_utf8(wchar_t *w)
{
    char *s = NULL;
    int l = WideCharToMultiByte(CP_UTF8, 0, w, -1, 0, 0, 0, 0);
    s = (char *) av_malloc(l);
    if (s)
        WideCharToMultiByte(CP_UTF8, 0, w, -1, s, l, 0, 0);
    return s;
}

int OpenAudioCapture()
{
    AVInputFormat *pAudioInputFmt = av_find_input_format("dshow");
    char * psDevName = dup_wchar_to_utf8(L"audio=virtual-audio-capturer");

    if (avformat_open_input(&pFormatCtx_Audio, psDevName, pAudioInputFmt,NULL) < 0)
    {
        printf("Couldn't open input stream.(无法打开音频输入流)\n");
        return -1;
    }

    if(avformat_find_stream_info(pFormatCtx_Audio,NULL)<0)  
        return -1; 

    if(pFormatCtx_Audio->streams[0]->codec->codec_type != AVMEDIA_TYPE_AUDIO)
    {
        printf("Couldn't find video stream information.(无法获取音频流信息)\n");
        return -1;
    }

    AVCodec *tmpCodec = avcodec_find_decoder(pFormatCtx_Audio->streams[0]->codec->codec_id);
    if(0 > avcodec_open2(pFormatCtx_Audio->streams[0]->codec, tmpCodec, NULL))
    {
        printf("can not find or open audio decoder!\n");
    }

    av_dump_format(pFormatCtx_Audio, 0, NULL, 0);

    return 0;
}

int OpenOutPut()
{
    AVStream *pVideoStream = NULL, *pAudioStream = NULL;
    const char *outFileName = "test.mp4";
    avformat_alloc_output_context2(&pFormatCtx_Out, NULL, NULL, outFileName);

    if (pFormatCtx_Video->streams[0]->codec->codec_type == AVMEDIA_TYPE_VIDEO)
    {
        VideoIndex = 0;
        pVideoStream = avformat_new_stream(pFormatCtx_Out, NULL);
        if (!pVideoStream)
        {
            printf("can not new stream for output!\n");
            return -1;
        }

        outVideoCodecCtx = avcodec_alloc_context3(outAVCodec);
        if ( !outVideoCodecCtx )
        {
            printf("Error : avcodec_alloc_context3()\n");
            return -1;
        }

        //set codec context param
        outVideoCodecCtx = pVideoStream->codec;
        outVideoCodecCtx->codec_id = AV_CODEC_ID_MPEG4;
        outVideoCodecCtx->width = pFormatCtx_Video->streams[0]->codec->width;
        outVideoCodecCtx->height = pFormatCtx_Video->streams[0]->codec->height;
        outVideoCodecCtx->time_base = pFormatCtx_Video->streams[0]->codec->time_base;
        outVideoCodecCtx->pix_fmt = AV_PIX_FMT_YUV420P;
        outVideoCodecCtx->codec_type = AVMEDIA_TYPE_VIDEO;

        if (codec_id == AV_CODEC_ID_H264)
        {
            av_opt_set(outVideoCodecCtx->priv_data, "preset", "slow", 0);
        }

        outAVCodec = avcodec_find_encoder(AV_CODEC_ID_MPEG4);
        if( !outAVCodec )
        {
            printf("\n\nError : avcodec_find_encoder()");
            return -1;
        }
        if (pFormatCtx_Out->oformat->flags & AVFMT_GLOBALHEADER)
            outVideoCodecCtx->flags |=CODEC_FLAG_GLOBAL_HEADER;

        if ((avcodec_open2(outVideoCodecCtx,outAVCodec, NULL)) < 0)
        {
            printf("can not open the encoder\n");
            return -1;
        }
    }

    if(pFormatCtx_Audio->streams[0]->codec->codec_type == AVMEDIA_TYPE_AUDIO)
    {
        AVCodecContext *pOutputCodecCtx;
        AudioIndex = 1;
        pAudioStream = avformat_new_stream(pFormatCtx_Out, NULL);

        pAudioStream->codec->codec = avcodec_find_encoder(pFormatCtx_Out->oformat->audio_codec);

        pOutputCodecCtx = pAudioStream->codec;

        pOutputCodecCtx->sample_rate = pFormatCtx_Audio->streams[0]->codec->sample_rate;
        pOutputCodecCtx->channel_layout = pFormatCtx_Out->streams[0]->codec->channel_layout;
        pOutputCodecCtx->channels = av_get_channel_layout_nb_channels(pAudioStream->codec->channel_layout);
        if(pOutputCodecCtx->channel_layout == 0)
        {
            pOutputCodecCtx->channel_layout = AV_CH_LAYOUT_STEREO;
            pOutputCodecCtx->channels = av_get_channel_layout_nb_channels(pOutputCodecCtx->channel_layout);

        }
        pOutputCodecCtx->sample_fmt = pAudioStream->codec->codec->sample_fmts[0];
        AVRational time_base={1, pAudioStream->codec->sample_rate};
        pAudioStream->time_base = time_base;
        //audioCodecCtx->time_base = time_base;

        pOutputCodecCtx->codec_tag = 0;  
        if (pFormatCtx_Out->oformat->flags & AVFMT_GLOBALHEADER)  
            pOutputCodecCtx->flags |= CODEC_FLAG_GLOBAL_HEADER;

        if (avcodec_open2(pOutputCodecCtx, pOutputCodecCtx->codec, 0) < 0)
        {
            printf("编码器打开失败,退出程序\n");
            return -1;
        }
    }

    if (!(pFormatCtx_Out->oformat->flags & AVFMT_NOFILE))
    {
        if(avio_open(&pFormatCtx_Out->pb, outFileName, AVIO_FLAG_WRITE) < 0)
        {
            printf("can not open output file handle!\n");
            return -1;
        }
    }

    if(avformat_write_header(pFormatCtx_Out, NULL) < 0)
    {
        printf("can not write the header of the output file!\n");
        return -1;
    }

    return 0;
}

int _tmain(int argc, _TCHAR* argv[])
{
    av_register_all();
    avdevice_register_all();
    if (OpenVideoCapture() < 0)
    {
        return -1;
    }
    if (OpenAudioCapture() < 0)
    {
        return -1;
    }
    if (OpenOutPut() < 0)
    {
        return -1;
    }
//  int fps;
    /*printf("输入帧率:");
    scanf_s("%d",&fps);
    if ( NULL == fps)
    {
        fps = 10;
    }*/

    InitializeCriticalSection(&VideoSection);
    InitializeCriticalSection(&AudioSection);

    AVFrame *picture = av_frame_alloc();
    int size = avpicture_get_size(pFormatCtx_Out->streams[VideoIndex]->codec->pix_fmt, 
        pFormatCtx_Out->streams[VideoIndex]->codec->width, pFormatCtx_Out->streams[VideoIndex]->codec->height);
    picture_buf = new uint8_t[size];

    avpicture_fill((AVPicture *)picture, picture_buf, 
        pFormatCtx_Out->streams[VideoIndex]->codec->pix_fmt, 
        pFormatCtx_Out->streams[VideoIndex]->codec->width, 
        pFormatCtx_Out->streams[VideoIndex]->codec->height);



    //star cap screen thread
    CreateThread( NULL, 0, ScreenCapThreadProc, 0, 0, NULL);
    //star cap audio thread
    CreateThread( NULL, 0, AudioCapThreadProc, 0, 0, NULL);
    int64_t cur_pts_v=0,cur_pts_a=0;
    int VideoFrameIndex = 0, AudioFrameIndex = 0;

    while(1)
    {
        if (_kbhit() != 0 && bCap)
        {
            bCap = false;
            Sleep(2000);
        }
        if (fifo_audio && fifo_video)
        {
            int sizeAudio = av_audio_fifo_size(fifo_audio);
            int sizeVideo = av_fifo_size(fifo_video);
            //缓存数据写完就结束循环
            if (av_audio_fifo_size(fifo_audio) <= pFormatCtx_Out->streams[AudioIndex]->codec->frame_size && 
                av_fifo_size(fifo_video) <= frame_size && !bCap)
            {
                break;
            }
        }

        if(av_compare_ts(cur_pts_v, pFormatCtx_Out->streams[VideoIndex]->time_base, 
                         cur_pts_a,pFormatCtx_Out->streams[AudioIndex]->time_base) <= 0)
        {
            if (av_fifo_size(fifo_video) < frame_size && !bCap)
            {
                cur_pts_v = 0x7fffffffffffffff;
            }
            if(av_fifo_size(fifo_video) >= size)
            {
                EnterCriticalSection(&VideoSection);
                av_fifo_generic_read(fifo_video, picture_buf, size, NULL); //将数据从avfifobuffer馈送到用户提供的回调。
                LeaveCriticalSection(&VideoSection);

                avpicture_fill((AVPicture *)picture, picture_buf,
                    pFormatCtx_Out->streams[VideoIndex]->codec->pix_fmt,
                    pFormatCtx_Out->streams[VideoIndex]->codec->width,
                    pFormatCtx_Out->streams[VideoIndex]->codec->height); //根据指定的图像参数和提供的图像数据缓冲区设置图片字段。

                //pts = n * ((1 / timbase)/ fps);
                //picture->pts = VideoFrameIndex * ((pFormatCtx_Video->streams[0]->time_base.den / pFormatCtx_Video->streams[0]->time_base.num) / 24);
                picture->pts = VideoFrameIndex * ((outVideoCodecCtx->time_base.den * 100000 / outVideoCodecCtx->time_base.num) / 180);

                int got_picture = 0;
                AVPacket pkt;
                av_init_packet(&pkt);

                pkt.data = NULL;
                pkt.size = 0;
                //从帧中获取输入的原始视频数据
                int ret = avcodec_encode_video2(pFormatCtx_Out->streams[VideoIndex]->codec, &pkt, picture, &got_picture);
                if(ret < 0)
                {
                    continue;
                }

                if (got_picture==1)
                {
                    pkt.stream_index = VideoIndex;
                    /*int count = 1;
                    pkt.pts = pkt.dts = count * ((pFormatCtx_Video->streams[0]->time_base.den / pFormatCtx_Video->streams[0]->time_base.num) / 15);
                    count++;*/

                    //x = pts * (timebase1.num / timebase1.den )* (timebase2.den / timebase2.num);

                    pkt.pts = av_rescale_q_rnd(pkt.pts, pFormatCtx_Video->streams[0]->time_base, 
                        pFormatCtx_Out->streams[VideoIndex]->time_base, (AVRounding)(AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX));  
                    pkt.dts = av_rescale_q_rnd(pkt.dts,  pFormatCtx_Video->streams[0]->time_base, 
                        pFormatCtx_Out->streams[VideoIndex]->time_base, (AVRounding)(AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX)); 


                    pkt.duration = ((pFormatCtx_Out->streams[0]->time_base.den / pFormatCtx_Out->streams[0]->time_base.num) / 60);
                    //pkt.duration = 1000/60;
                    //pkt.pts = pkt.dts = Count * (ofmt_ctx->streams[stream_index]->time_base.den) /ofmt_ctx->streams[stream_index]->time_base.num / 10;

                    //Count++;


                    cur_pts_v = pkt.pts;

                    ret = av_interleaved_write_frame(pFormatCtx_Out, &pkt);
                    //delete[] pkt.data;
                    av_free_packet(&pkt);
                }
                VideoFrameIndex++;
            }
        }
        else
        {
            if (NULL == fifo_audio)
            {
                continue;//还未初始化fifo
            }
            if (av_audio_fifo_size(fifo_audio) < pFormatCtx_Out->streams[AudioIndex]->codec->frame_size && !bCap)
            {
                cur_pts_a = 0x7fffffffffffffff;
            }
            if(av_audio_fifo_size(fifo_audio) >= 
                (pFormatCtx_Out->streams[AudioIndex]->codec->frame_size > 0 ? pFormatCtx_Out->streams[AudioIndex]->codec->frame_size : 1024))
            {
                AVFrame *frame;
                frame = av_frame_alloc();
                frame->nb_samples = pFormatCtx_Out->streams[AudioIndex]->codec->frame_size>0 ? pFormatCtx_Out->streams[AudioIndex]->codec->frame_size: 1024;
                frame->channel_layout = pFormatCtx_Out->streams[AudioIndex]->codec->channel_layout;
                frame->format = pFormatCtx_Out->streams[AudioIndex]->codec->sample_fmt;
                frame->sample_rate = pFormatCtx_Out->streams[AudioIndex]->codec->sample_rate;
                av_frame_get_buffer(frame, 0);

                EnterCriticalSection(&AudioSection);
                av_audio_fifo_read(fifo_audio, (void **)frame->data, 
                    (pFormatCtx_Out->streams[AudioIndex]->codec->frame_size > 0 ? pFormatCtx_Out->streams[AudioIndex]->codec->frame_size : 1024));
                LeaveCriticalSection(&AudioSection);

                AVPacket pkt_out;
                av_init_packet(&pkt_out);
                int got_picture = -1;
                pkt_out.data = NULL;
                pkt_out.size = 0;

                frame->pts = AudioFrameIndex * pFormatCtx_Out->streams[AudioIndex]->codec->frame_size;
                if (avcodec_encode_audio2(pFormatCtx_Out->streams[AudioIndex]->codec, &pkt_out, frame, &got_picture) < 0)
                {
                    printf("can not decoder a frame");
                }
                av_frame_free(&frame);
                if (got_picture) 
                {
                    pkt_out.stream_index = AudioIndex;
                    pkt_out.pts = AudioFrameIndex * pFormatCtx_Out->streams[AudioIndex]->codec->frame_size;
                    pkt_out.dts = AudioFrameIndex * pFormatCtx_Out->streams[AudioIndex]->codec->frame_size;
                    pkt_out.duration = pFormatCtx_Out->streams[AudioIndex]->codec->frame_size;

                    cur_pts_a = pkt_out.pts;

                    int ret = av_interleaved_write_frame(pFormatCtx_Out, &pkt_out);
                    av_free_packet(&pkt_out);
                }
                AudioFrameIndex++;
            }
        }
    }

    delete[] picture_buf;

    av_fifo_free(fifo_video);
    av_audio_fifo_free(fifo_audio);

    av_write_trailer(pFormatCtx_Out);

    avio_close(pFormatCtx_Out->pb);
    avformat_free_context(pFormatCtx_Out);

    if (pFormatCtx_Video != NULL)
    {
        avformat_close_input(&pFormatCtx_Video);
        pFormatCtx_Video = NULL;
    }
    if (pFormatCtx_Audio != NULL)
    {
        avformat_close_input(&pFormatCtx_Audio);
        pFormatCtx_Audio = NULL;
    }

    return 0;
}

DWORD WINAPI ScreenCapThreadProc( LPVOID lpParam )
{
    AVPacket packet;
    int got_picture;
    AVFrame *pFrame;
    pFrame=av_frame_alloc();

    AVFrame *picture = av_frame_alloc();
    int size = avpicture_get_size(pFormatCtx_Out->streams[VideoIndex]->codec->pix_fmt, 
        pFormatCtx_Out->streams[VideoIndex]->codec->width, 
        pFormatCtx_Out->streams[VideoIndex]->codec->height);

    avpicture_fill((AVPicture *)picture, picture_buf, 
        pFormatCtx_Out->streams[VideoIndex]->codec->pix_fmt, 
        pFormatCtx_Out->streams[VideoIndex]->codec->width, 
        pFormatCtx_Out->streams[VideoIndex]->codec->height);

    FILE *p = NULL;
    p = fopen("proc_test.yuv", "wb+");
    av_init_packet(&packet);
    int height = pFormatCtx_Out->streams[VideoIndex]->codec->height;
    int width = pFormatCtx_Out->streams[VideoIndex]->codec->width;
    int y_size=height*width;
    while(bCap)
    {
        packet.data = NULL;
        packet.size = 0;
        if (av_read_frame(pFormatCtx_Video, &packet) < 0)
        {
            continue;
        }
        if(packet.stream_index == 0)
        {
            if (avcodec_decode_video2(pCodecCtx_Video, pFrame, &got_picture, &packet) < 0)
            {
                printf("Decode Error.(解码错误)\n");
                continue;
            }
            if (got_picture)
            {
                sws_scale(img_convert_ctx, 
                    (const uint8_t* const*)pFrame->data,
                    pFrame->linesize, 
                    0, 
                    pFormatCtx_Out->streams[VideoIndex]->codec->height,
                    picture->data,
                    picture->linesize);

                if (av_fifo_space(fifo_video) >= size)
                {
                    EnterCriticalSection(&VideoSection);                    
                    av_fifo_generic_write(fifo_video, picture->data[0], y_size, NULL);
                    av_fifo_generic_write(fifo_video, picture->data[1], y_size/4, NULL);
                    av_fifo_generic_write(fifo_video, picture->data[2], y_size/4, NULL);
                    LeaveCriticalSection(&VideoSection);
                }
            }
        }
        av_free_packet(&packet);
    }
    av_frame_free(&pFrame);
    av_frame_free(&picture);
    return 0;
}

DWORD WINAPI AudioCapThreadProc( LPVOID lpParam )
{
    AVPacket pkt;
    AVFrame *frame;
    frame = av_frame_alloc();
    int gotframe;
    while(bCap)
    {
        pkt.data = NULL;
        pkt.size = 0;
        if(av_read_frame(pFormatCtx_Audio,&pkt) < 0)
        {
            continue;
        }

        if (avcodec_decode_audio4(pFormatCtx_Audio->streams[0]->codec, frame, &gotframe, &pkt) < 0)
        {
            av_frame_free(&frame);
            printf("can not decoder a frame");
            break;
        }
        av_free_packet(&pkt);

        if (!gotframe)
        {
            printf("没有获取到数据,继续下一次");
            continue;
        }

        if (NULL == fifo_audio)
        {
            fifo_audio = av_audio_fifo_alloc(pFormatCtx_Audio->streams[0]->codec->sample_fmt, 
                pFormatCtx_Audio->streams[0]->codec->channels, 30 * frame->nb_samples);
        }

        int buf_space = av_audio_fifo_space(fifo_audio);
        if (av_audio_fifo_space(fifo_audio) >= frame->nb_samples)
        {
            EnterCriticalSection(&AudioSection);
            av_audio_fifo_write(fifo_audio, (void **)frame->data, frame->nb_samples);
            LeaveCriticalSection(&AudioSection);
        }
    }
    av_frame_free(&frame);
    return 0;
}

Maybe there is another way to calculate PTS and DTS

I hope whatever the frame rate is,video playback speed is right.Not too fast or too slow.

Upvotes: 1

Views: 3464

Answers (2)

Philip Ashmore
Philip Ashmore

Reputation: 21

With my camera, it reports 30 fps, but actually delivers 15 fps. It's identified as "HD WEB CAMERA". With this discrepency, you will get video that plays at 2x speed. I checked with ffmpeg, which catches this, and it replays the video at normal speed. I looked into how ffmpeg does this, and it appears that it creates a filter graph that somehow fixes the problem.

Upvotes: 0

Kris Xia
Kris Xia

Reputation: 31

Finally, I found out the reason for the problem.

The frame rate of video generated by video recording is determined by the recorded video stream.My computer's maximum frame rate for gdigrab is 10 frames,so if I set it more than 10 frames, the playback speed will be fast.And if i set it less than 10 frames, the playback speed will be slow.But i run code on my computer where I play games.Whatever i choose 10 frames or 15 frames,the palyback speed will be correct.

Still,i don't know the reason why my gdigrab's framerate is only 0-10 frames.There are a number of factors that affect the frame rate of video recording,such as CPU Graphics, card,display and Memory.

Here is the final code capture screen and audio with FFMPEG

#include "stdafx.h"

#ifdef  __cplusplus
extern "C"
{
#endif
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
#include "libswscale/swscale.h"
#include "libavdevice/avdevice.h"
#include "libavutil/audio_fifo.h"

#include "libavfilter/buffersink.h"
#include "libavfilter/buffersrc.h"
#include "libavutil/imgutils.h"
#include "libavutil/mathematics.h"
#include "libavutil/samplefmt.h"
#include "libavutil/time.h"
#include "libavutil/opt.h"
#include "libavutil/pixdesc.h"
#include "libavutil/file.h"
#include "libavutil/mem.h"
#include "libavutil/frame.h"
#include "libavfilter/avfilter.h"
#include "libswresample/swresample.h"

#pragma comment(lib, "avcodec.lib")
#pragma comment(lib, "avformat.lib")
#pragma comment(lib, "avutil.lib")
#pragma comment(lib, "avdevice.lib")
#pragma comment(lib, "avfilter.lib")

#pragma comment(lib, "avfilter.lib")
#pragma comment(lib, "postproc.lib")
#pragma comment(lib, "swresample.lib")
#pragma comment(lib, "swscale.lib")
#ifdef __cplusplus
};
#endif

AVFormatContext *pFormatCtx_Video = NULL, *pFormatCtx_Audio = NULL, *pFormatCtx_Out = NULL;

AVCodecContext *outVideoCodecCtx = NULL;
AVCodecContext *outAudioCodecCtx = NULL;

AVStream *pVideoStream = NULL, *pAudioStream = NULL;

AVCodec *outAVCodec;
AVCodec *outAudioCodec;

AVCodecContext  *pCodecCtx_Video;
AVCodec         *pCodec_Video;
AVFifoBuffer    *fifo_video = NULL;
AVAudioFifo     *fifo_audio = NULL;
int VideoIndex, AudioIndex;
int codec_id;

CRITICAL_SECTION AudioSection, VideoSection;

SwsContext *img_convert_ctx;
int frame_size = 0;

uint8_t *picture_buf = NULL;

bool bCap = true;

DWORD WINAPI ScreenCapThreadProc( LPVOID lpParam );
DWORD WINAPI AudioCapThreadProc( LPVOID lpParam );

static char *dup_wchar_to_utf8(wchar_t *w)
{
    char *s = NULL;
    int l = WideCharToMultiByte(CP_UTF8, 0, w, -1, 0, 0, 0, 0);
    s = (char *) av_malloc(l);
    if (s)
        WideCharToMultiByte(CP_UTF8, 0, w, -1, s, l, 0, 0);
    return s;
}

int OpenVideoCapture()
{
    int fps = 10;
    char opt;
    printf("选择视频播放帧率:\n");
    printf("A   5帧\n");
    printf("B   10帧\n");
    printf("C   15帧\n");
    printf("D   20帧\n");
    printf("E   25帧\n");
    printf("F   30帧\n");
    opt = getchar();
    AVDictionary *options = NULL;
    switch (opt)
    {
    case 'A':
        fps = 5;
        av_dict_set(&options, "framerate", "5", 0);
        break;
    case 'a':
        fps = 5;
        av_dict_set(&options, "framerate", "5", 0);
        break;
    case 'B':
        fps = 10;
        av_dict_set(&options, "framerate", "10", 0);
        break;
    case 'b':
        fps = 10;
        av_dict_set(&options, "framerate", "10", 0);
        break;
    case 'C':
        fps = 15;
        av_dict_set(&options, "framerate", "15", 0);
        break;
    case 'c':
        fps = 15;
        av_dict_set(&options, "framerate", "15", 0);
        break;
    case 'D':
        fps = 20;
        av_dict_set(&options, "framerate", "20", 0);
        break;
    case 'd':
        fps = 20;
        av_dict_set(&options, "framerate", "20", 0);
        break;
    case 'E':
        fps = 25;
        av_dict_set(&options, "framerate", "25", 0);
        break;
    case 'e':
        fps = 25;
        av_dict_set(&options, "framerate", "25", 0);
        break;
    case 'F':
        fps = 30;
        av_dict_set(&options, "framerate", "30", 0);
        break;
    case 'f':
        fps = 30;
        av_dict_set(&options, "framerate", "30", 0);
        break;
    default:
        printf("选项输入错误\n");
        return -1;
    }
    //AVDictionary *options = NULL;
    //av_dict_set(&options, "framerate", "15", 0);
    AVInputFormat *ifmt=av_find_input_format("gdigrab");
    if(avformat_open_input(&pFormatCtx_Video, "desktop", ifmt, &options) != 0)
    {
        printf("Couldn't open input stream.(无法打开视频输入流)\n");
        return -1;
    }
    if(avformat_find_stream_info(pFormatCtx_Video,NULL)<0)
    {
        printf("Couldn't find stream information.(无法获取视频流信息)\n");
        return -1;
    }
    if (pFormatCtx_Video->streams[0]->codec->codec_type != AVMEDIA_TYPE_VIDEO)
    {
        printf("Couldn't find video stream information.(无法获取视频流信息)\n");
        return -1;
    }
    pCodecCtx_Video = pFormatCtx_Video->streams[0]->codec;
    pCodec_Video = avcodec_find_decoder(pCodecCtx_Video->codec_id);
    if(pCodec_Video == NULL)
    {
        printf("Codec not found.(没有找到解码器)\n");
        return -1;
    }
    if(avcodec_open2(pCodecCtx_Video, pCodec_Video, NULL) < 0)
    {
        printf("Could not open codec.(无法打开解码器)\n");
        return -1;
    }

    av_dump_format(pFormatCtx_Video, 0, NULL, 0);

    img_convert_ctx = sws_getContext(pCodecCtx_Video->width, 
        pCodecCtx_Video->height, 
        pCodecCtx_Video->pix_fmt, 
        pCodecCtx_Video->width,
        pCodecCtx_Video->height, 
        PIX_FMT_YUV420P, 
        SWS_BICUBIC, NULL, NULL, NULL); 

    frame_size = avpicture_get_size(pCodecCtx_Video->pix_fmt, pCodecCtx_Video->width, pCodecCtx_Video->height);
    fifo_video = av_fifo_alloc(60 * avpicture_get_size(AV_PIX_FMT_YUV420P, pCodecCtx_Video->width, pCodecCtx_Video->height));

    av_dict_free(&options);

    return 0;
}

int OpenAudioCapture()
{
    AVInputFormat *pAudioInputFmt = av_find_input_format("dshow");
    AVDictionary *opt = NULL;
    char * psDevName = dup_wchar_to_utf8(L"audio=virtual-audio-capturer");
    if (avformat_open_input(&pFormatCtx_Audio, psDevName, pAudioInputFmt, &opt) < 0)
    {
        printf("Couldn't open input stream.(无法打开音频输入流)\n");
        return -1;
    }

    if(avformat_find_stream_info(pFormatCtx_Audio,NULL)<0)  
        return -1; 

    if(pFormatCtx_Audio->streams[0]->codec->codec_type != AVMEDIA_TYPE_AUDIO)
    {
        printf("Couldn't find video stream information.(无法获取音频流信息)\n");
        return -1;
    }

    AVCodec *tmpCodec = avcodec_find_decoder(pFormatCtx_Audio->streams[0]->codec->codec_id);
    if(0 > avcodec_open2(pFormatCtx_Audio->streams[0]->codec, tmpCodec, NULL))
    {
        printf("can not find or open audio decoder!\n");
    }

    av_dump_format(pFormatCtx_Audio, 0, NULL, 0);

    return 0;
}

int OpenOutPut()
{
    AVStream *pVideoStream = NULL, *pAudioStream = NULL;
    const char *outFileName = "test.mp4";
    avformat_alloc_output_context2(&pFormatCtx_Out, NULL, NULL, outFileName);
    if (pFormatCtx_Video->streams[0]->codec->codec_type == AVMEDIA_TYPE_VIDEO)
    {
        VideoIndex = 0;
        pVideoStream = avformat_new_stream(pFormatCtx_Out, NULL);
        if (!pVideoStream)
        {
            printf("can not new stream for output!\n");
            return -1;
        }

        outVideoCodecCtx = avcodec_alloc_context3(outAVCodec);
        if ( !outVideoCodecCtx )
        {
            printf("Error : avcodec_alloc_context3()\n");
            return -1;
        }

        outVideoCodecCtx = pVideoStream->codec;
        outVideoCodecCtx->codec_id = AV_CODEC_ID_MPEG4;
        outVideoCodecCtx->width = pFormatCtx_Video->streams[0]->codec->width;
        outVideoCodecCtx->height = pFormatCtx_Video->streams[0]->codec->height;
        outVideoCodecCtx->time_base = pFormatCtx_Video->streams[0]->codec->time_base;;
        outVideoCodecCtx->pix_fmt = AV_PIX_FMT_YUV420P;
        outVideoCodecCtx->codec_type = AVMEDIA_TYPE_VIDEO;

        if (codec_id == AV_CODEC_ID_H264)
        {
            av_opt_set(outVideoCodecCtx->priv_data, "preset", "slow", 0);
        }

        outAVCodec = avcodec_find_encoder(AV_CODEC_ID_MPEG4);
        if( !outAVCodec )
        {
            printf("\n\nError : avcodec_find_encoder()");
            return -1;
        }
        if (pFormatCtx_Out->oformat->flags & AVFMT_GLOBALHEADER)
            outVideoCodecCtx->flags |=CODEC_FLAG_GLOBAL_HEADER;

        if ((avcodec_open2(outVideoCodecCtx,outAVCodec, NULL)) < 0)
        {
            printf("can not open the encoder\n");
            return -1;
        }
    }
    if(pFormatCtx_Audio->streams[0]->codec->codec_type == AVMEDIA_TYPE_AUDIO)
    {
        AVCodecContext *pOutputCodecCtx;
        AudioIndex = 1;
        pAudioStream = avformat_new_stream(pFormatCtx_Out, NULL);

        pAudioStream->codec->codec = avcodec_find_encoder(pFormatCtx_Out->oformat->audio_codec);

        pOutputCodecCtx = pAudioStream->codec;

        pOutputCodecCtx->sample_rate = pFormatCtx_Audio->streams[0]->codec->sample_rate;
        pOutputCodecCtx->channel_layout = pFormatCtx_Out->streams[0]->codec->channel_layout;
        pOutputCodecCtx->channels = av_get_channel_layout_nb_channels(pAudioStream->codec->channel_layout);
        if(pOutputCodecCtx->channel_layout == 0)
        {
            pOutputCodecCtx->channel_layout = AV_CH_LAYOUT_STEREO;
            pOutputCodecCtx->channels = av_get_channel_layout_nb_channels(pOutputCodecCtx->channel_layout);

        }
        pOutputCodecCtx->sample_fmt = pAudioStream->codec->codec->sample_fmts[0];
        AVRational time_base={1, pAudioStream->codec->sample_rate};
        pAudioStream->time_base = time_base;

        pOutputCodecCtx->codec_tag = 0;  
        if (pFormatCtx_Out->oformat->flags & AVFMT_GLOBALHEADER)  
            pOutputCodecCtx->flags |= CODEC_FLAG_GLOBAL_HEADER;

        if (avcodec_open2(pOutputCodecCtx, pOutputCodecCtx->codec, 0) < 0)
        {
            printf("编码器打开失败,退出程序\n");
            return -1;
        }
    }

    if (!(pFormatCtx_Out->oformat->flags & AVFMT_NOFILE))
    {
        if(avio_open(&pFormatCtx_Out->pb, outFileName, AVIO_FLAG_WRITE) < 0)
        {
            printf("can not open output file handle!\n");
            return -1;
        }
    }
    if(avformat_write_header(pFormatCtx_Out, NULL) < 0)
    {
        printf("can not write the header of the output file!\n");
        return -1;
    }

    return 0;
}

int _tmain(int argc, _TCHAR* argv[])
{
    av_register_all();
    avdevice_register_all();
    if (OpenVideoCapture() < 0)
    {
        return -1;
    }
    if (OpenAudioCapture() < 0)
    {
        return -1;
    }
    if (OpenOutPut() < 0)
    {
        return -1;
    }

    InitializeCriticalSection(&VideoSection);
    InitializeCriticalSection(&AudioSection);

    AVFrame *picture = av_frame_alloc();
    int size = avpicture_get_size(pFormatCtx_Out->streams[VideoIndex]->codec->pix_fmt, 
        pFormatCtx_Out->streams[VideoIndex]->codec->width, pFormatCtx_Out->streams[VideoIndex]->codec->height);
    picture_buf = new uint8_t[size];

    avpicture_fill((AVPicture *)picture, picture_buf, 
        pFormatCtx_Out->streams[VideoIndex]->codec->pix_fmt, 
        pFormatCtx_Out->streams[VideoIndex]->codec->width, 
        pFormatCtx_Out->streams[VideoIndex]->codec->height);

    //star cap screen thread
    CreateThread( NULL, 0, ScreenCapThreadProc, 0, 0, NULL);
    //star cap audio thread
    CreateThread( NULL, 0, AudioCapThreadProc, 0, 0, NULL);
    int64_t cur_pts_v=0,cur_pts_a=0;
    int64_t VideoFrameIndex = 0, AudioFrameIndex = 0;
    int64_t count = 1;
    int64_t video_pre_pts = 0;

    while(1)
    {
        if (_kbhit() != 0 && bCap)
        {
            bCap = false;
            Sleep(2000);
        }
        if (fifo_audio && fifo_video)
        {
            int sizeAudio = av_audio_fifo_size(fifo_audio);
            int sizeVideo = av_fifo_size(fifo_video);
            //缓存数据写完就结束循环
            if (av_audio_fifo_size(fifo_audio) <= pFormatCtx_Out->streams[AudioIndex]->codec->frame_size && 
                av_fifo_size(fifo_video) <= frame_size && !bCap)
            {
                break;
            }
        }

        if(av_compare_ts(cur_pts_v, pFormatCtx_Out->streams[VideoIndex]->time_base, cur_pts_a,pFormatCtx_Out->streams[AudioIndex]->time_base) <= 0)
        {
            if (av_fifo_size(fifo_video) < frame_size && !bCap)
            {
                cur_pts_v = 0x7fffffffffffffff;
            }
            if(av_fifo_size(fifo_video) >= size)
            {
                //将数据从avfifobuffer馈送到用户提供的回调。
                EnterCriticalSection(&VideoSection);
                av_fifo_generic_read(fifo_video, picture_buf, size, NULL); 
                LeaveCriticalSection(&VideoSection);

                //根据指定的图像参数和提供的图像数据缓冲区设置图片字段。
                avpicture_fill((AVPicture *)picture, picture_buf,
                    pFormatCtx_Out->streams[VideoIndex]->codec->pix_fmt,
                    pFormatCtx_Out->streams[VideoIndex]->codec->width,
                    pFormatCtx_Out->streams[VideoIndex]->codec->height); 

                //pts = n * ((1 / timbase)/ fps);
                //picture->pts = VideoFrameIndex * ((pFormatCtx_Video->streams[0]->time_base.den / pFormatCtx_Video->streams[0]->time_base.num) / 15);
                picture->pts = av_rescale_q(VideoFrameIndex,outVideoCodecCtx->time_base,pFormatCtx_Video->streams[0]->time_base);
                printf("picture->pts: %d\n",picture->pts);

                int got_picture = 0;
                AVPacket pkt;
                av_init_packet(&pkt);

                pkt.data = NULL;
                pkt.size = 0;
                //从帧中获取输入的原始视频数据
                int ret = avcodec_encode_video2(pFormatCtx_Out->streams[VideoIndex]->codec, &pkt, picture, &got_picture);
                if(ret < 0)
                {
                    continue;
                }

                if (got_picture==1)
                {
                    pkt.stream_index = VideoIndex;

                    //pFormatCtx_Video //pFormatCtx_Out
                    pkt.pts = av_rescale_q_rnd(pkt.pts, pFormatCtx_Video->streams[0]->time_base, pFormatCtx_Out->streams[VideoIndex]->time_base, (AVRounding)(AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX)); 
                    printf("pkt.pts = %d\n",pkt.pts);
                    pkt.dts = av_rescale_q_rnd(pkt.dts, pFormatCtx_Video->streams[0]->time_base, pFormatCtx_Out->streams[VideoIndex]->time_base, (AVRounding)(AV_ROUND_NEAR_INF|AV_ROUND_PASS_MINMAX)); 
                    printf("pkt.dts = %d\n",pkt.dts);

                    pkt.duration = ((pFormatCtx_Out->streams[0]->time_base.den / pFormatCtx_Out->streams[0]->time_base.num) / outVideoCodecCtx->time_base.den);
                    //pkt.duration = 1;
                    //pkt.duration = av_rescale_q(pkt.duration,outVideoCodecCtx->time_base,pFormatCtx_Video->streams[0]->time_base);

                    printf("pkt.duration = %d\n",pkt.duration);
                    pkt.pos = -1;

                    cur_pts_v = pkt.pts;

                    ret = av_interleaved_write_frame(pFormatCtx_Out, &pkt);
                    if (ret<0)
                    {
                        printf("Error muxing packet\n");
                        break;
                    }
                    av_free_packet(&pkt);
                }
                VideoFrameIndex++;
            }
        }
        else
        {
            if (NULL == fifo_audio)
            {
                continue;//还未初始化fifo
            }
            if (av_audio_fifo_size(fifo_audio) < pFormatCtx_Out->streams[AudioIndex]->codec->frame_size && !bCap)
            {
                cur_pts_a = 0x7fffffffffffffff;
            }
            if(av_audio_fifo_size(fifo_audio) >= 
                (pFormatCtx_Out->streams[AudioIndex]->codec->frame_size > 0 ? pFormatCtx_Out->streams[AudioIndex]->codec->frame_size : 1024))
            {
                AVFrame *frame;
                frame = av_frame_alloc();
                frame->nb_samples = pFormatCtx_Out->streams[AudioIndex]->codec->frame_size>0 ? pFormatCtx_Out->streams[AudioIndex]->codec->frame_size: 1024;
                frame->channel_layout = pFormatCtx_Out->streams[AudioIndex]->codec->channel_layout;
                frame->format = pFormatCtx_Out->streams[AudioIndex]->codec->sample_fmt;
                frame->sample_rate = pFormatCtx_Out->streams[AudioIndex]->codec->sample_rate;
                av_frame_get_buffer(frame, 0);

                EnterCriticalSection(&AudioSection);
                av_audio_fifo_read(fifo_audio, (void **)frame->data, 
                    (pFormatCtx_Out->streams[AudioIndex]->codec->frame_size > 0 ? pFormatCtx_Out->streams[AudioIndex]->codec->frame_size : 1024));
                LeaveCriticalSection(&AudioSection);

                AVPacket pkt_out;
                av_init_packet(&pkt_out);
                int got_picture = -1;
                pkt_out.data = NULL;
                pkt_out.size = 0;

                frame->pts = AudioFrameIndex * pFormatCtx_Out->streams[AudioIndex]->codec->frame_size;
                if (avcodec_encode_audio2(pFormatCtx_Out->streams[AudioIndex]->codec, &pkt_out, frame, &got_picture) < 0)
                {
                    printf("can not decoder a frame");
                }
                av_frame_free(&frame);
                if (got_picture)
                {
                    pkt_out.stream_index = AudioIndex;
                    pkt_out.pts = AudioFrameIndex * pFormatCtx_Out->streams[AudioIndex]->codec->frame_size;
                    pkt_out.dts = AudioFrameIndex * pFormatCtx_Out->streams[AudioIndex]->codec->frame_size;
                    pkt_out.duration = pFormatCtx_Out->streams[AudioIndex]->codec->frame_size;

                    cur_pts_a = pkt_out.pts;

                    int ret = av_interleaved_write_frame(pFormatCtx_Out, &pkt_out);
                    if (ret<0)
                    {
                        printf("Error muxing packet\n");
                        break;
                    }
                    av_free_packet(&pkt_out);
                }
                AudioFrameIndex++;
            }
        }

    }

    delete[] picture_buf;

    av_fifo_free(fifo_video);
    av_audio_fifo_free(fifo_audio);

    av_write_trailer(pFormatCtx_Out);

    avio_close(pFormatCtx_Out->pb);
    avformat_free_context(pFormatCtx_Out);

    if (pFormatCtx_Video != NULL)
    {
        avformat_close_input(&pFormatCtx_Video);
        pFormatCtx_Video = NULL;
    }
    if (pFormatCtx_Audio != NULL)
    {
        avformat_close_input(&pFormatCtx_Audio);
        pFormatCtx_Audio = NULL;
    }

    return 0;
}

DWORD WINAPI ScreenCapThreadProc( LPVOID lpParam )
{
    AVPacket packet;
    int got_picture;
    AVFrame *pFrame;
    pFrame=av_frame_alloc();

    AVFrame *picture = av_frame_alloc();
    int size = avpicture_get_size(pFormatCtx_Out->streams[VideoIndex]->codec->pix_fmt, 
        pFormatCtx_Out->streams[VideoIndex]->codec->width, 
        pFormatCtx_Out->streams[VideoIndex]->codec->height);

    avpicture_fill((AVPicture *)picture, picture_buf, 
        pFormatCtx_Out->streams[VideoIndex]->codec->pix_fmt, 
        pFormatCtx_Out->streams[VideoIndex]->codec->width, 
        pFormatCtx_Out->streams[VideoIndex]->codec->height);

    av_init_packet(&packet);
    int height = pFormatCtx_Out->streams[VideoIndex]->codec->height;
    int width = pFormatCtx_Out->streams[VideoIndex]->codec->width;
    int y_size=height*width;
    while(bCap)
    {
        packet.data = NULL;
        packet.size = 0;
        if (av_read_frame(pFormatCtx_Video, &packet) < 0)
        {
            continue;
        }
        if(packet.stream_index == 0)
        {
            if (avcodec_decode_video2(pCodecCtx_Video, pFrame, &got_picture, &packet) < 0)
            {
                printf("Decode Error.(解码错误)\n");
                continue;
            }
            if (got_picture)
            {
                sws_scale(img_convert_ctx, 
                    (const uint8_t* const*)pFrame->data,
                    pFrame->linesize, 
                    0, 
                    pFormatCtx_Out->streams[VideoIndex]->codec->height,
                    picture->data,
                    picture->linesize);

                if (av_fifo_space(fifo_video) >= size)
                {
                    EnterCriticalSection(&VideoSection);                    
                    av_fifo_generic_write(fifo_video, picture->data[0], y_size, NULL);
                    av_fifo_generic_write(fifo_video, picture->data[1], y_size/4, NULL);
                    av_fifo_generic_write(fifo_video, picture->data[2], y_size/4, NULL);
                    LeaveCriticalSection(&VideoSection);
                }
            }
        }
        av_free_packet(&packet);
    }
    av_frame_free(&pFrame);
    av_frame_free(&picture);
    return 0;
}

DWORD WINAPI AudioCapThreadProc( LPVOID lpParam )
{
    AVPacket pkt;
    AVFrame *frame;
    frame = av_frame_alloc();
    int gotframe;
    while(bCap)
    {
        pkt.data = NULL;
        pkt.size = 0;
        if(av_read_frame(pFormatCtx_Audio,&pkt) < 0)
        {
            continue;
        }

        if (avcodec_decode_audio4(pFormatCtx_Audio->streams[0]->codec, frame, &gotframe, &pkt) < 0)
        {
            av_frame_free(&frame);
            printf("can not decoder a frame");
            break;
        }
        av_free_packet(&pkt);

        if (!gotframe)
        {
            printf("没有获取到数据,继续下一次");
            continue;
        }

        if (NULL == fifo_audio)
        {
            fifo_audio = av_audio_fifo_alloc(pFormatCtx_Audio->streams[0]->codec->sample_fmt, 
                pFormatCtx_Audio->streams[0]->codec->channels, 30 * frame->nb_samples);
        }

        int buf_space = av_audio_fifo_space(fifo_audio);
        if (av_audio_fifo_space(fifo_audio) >= frame->nb_samples)
        {
            EnterCriticalSection(&AudioSection);
            av_audio_fifo_write(fifo_audio, (void **)frame->data, frame->nb_samples);
            LeaveCriticalSection(&AudioSection);
        }
    }
    av_frame_free(&frame);
    return 0;
}

Upvotes: 2

Related Questions