Keith
Keith

Reputation: 91

How do I force an IDR-frame using the x264 C API?

I am attempting to use an external bool signal to force the next encoded frame to be an IDR-frame using the x264 C API. I am using the "baseline" profile with the "ultrafast" and "zerolatency" presets. I tried to use the input pic settings prior to encoding, as in this code snippet, but this has not worked. My class Open() and Encode() methods are shown here. Any help will be appreciated.

int X264Encoder::Open(void)
{
  if (_x264VideoEncoder != NULL)
    Close();

  // Set up default parameters.
  if (x264_param_default_preset(&_param, _encoderSpeedPreset.c_str(), "zerolatency") < 0)  // 0=success, -1=failed
    { _errStr = "X264Encoder::Open: Default parameter preset failed with " + _encoderSpeedPreset; return(0); }

  // Set non-default params.
  _param.i_bitdepth       = 8;
  _param.i_csp            = _colourSpaceMapping[_colourSpace]; // Input colour space
  if(!_param.i_csp)
    { _errStr = "X264Encoder::Open: Incompatible colour space " + to_string(_colourSpace); return(0); }
  _param.i_width          = _width;
  _param.i_height         = _height;
  _param.i_fps_num        = _videoRateNumerator;
  _param.i_fps_den        = _videoRateDenominator;

  _param.rc.i_bitrate     = _avgBitsPerSecond / 1000; // bitrate units are in kbits/s

  _param.i_threads        = 1;
  _param.b_vfr_input      = 0;  // VFR input.  If 1, use timebase and timestamps for ratecontrol purposes. If 0, use fps only.
  _param.b_repeat_headers = 1;  // Put SPS/PPS before each keyframe
  _param.b_annexb         = 1;  // If set, place start codes (4 bytes) before NAL units, otherwise place size (4 bytes) before NAL units.

  // Apply profile restrictions.
  if (x264_param_apply_profile(&_param, _profile.c_str()) < 0)  // 0=success, -1=failed
    { _errStr = "X264Encoder::Open: Unable to set profile " + _profile; return(0); }

  // Initialise the encoder input pic buffer.
  if (x264_picture_alloc(&_picIn, _param.i_csp, _param.i_width, _param.i_height) < 0)
    { _errStr = "X264Encoder::Open: Unable to alloc input picture buffer"; return(0); }
  _inPicIsAllocated = true;

  // Instantiate the encoder.
  _x264VideoEncoder = x264_encoder_open(&_param);
  if (_x264VideoEncoder == NULL)
  {
    _errStr = "X264Encoder::Open: Unable to instantiate the encoder"; 
    // Clean up before exit.
    x264_picture_clean(&_picIn);
    _inPicIsAllocated = false;
    return(0);
  }//end if !_x264VideoEncoder...

  // Frame counting for pts timestamps.
  _frameNum     = 0;
  _lastPicType  = 0; // IDR-frame

  d.clear();

  return(1);
}//end Open.

int X264Encoder::Encode(void* pSrc, void* pCmp, void* codeParameter)
{
  _encodedFrameSize = 0;

  // Validation house work.
  if(!Ready())
    { _errStr = "X264Encoder::Encode: Not ready"; return(0); }

  if(!pSrc || !pCmp)
    { _errStr = "X264Encoder::Encode: Invalid function parameter list"; return(0); }

  // Load input image. 
  if(_param.i_csp != X264_CSP_I420) // Can only process I420 input colour space.
    { _errStr = "X264Encoder::Encode: I420 colour space required"; return(0); }
  uint32_t lumSize = _width * _height;
  uint32_t chrSize = lumSize / 4;
  // Transfer the input source image into the x264 picture img structure.
  uint8_t* pImg = static_cast<uint8_t*>(pSrc);
  memcpy_s(_picIn.img.plane[0], lumSize, pImg, lumSize);
  pImg += lumSize;
  memcpy_s(_picIn.img.plane[1], chrSize, pImg, chrSize);
  pImg += chrSize;
  memcpy_s(_picIn.img.plane[2], chrSize, pImg, chrSize);

  // Encode single frame
  _picIn.i_pts = _frameNum;
  if (_idrFrameRequired) 
  {  
    _picIn.i_type = X264_TYPE_IDR; 
    //... and clear the signal.
    _idrFrameRequired = false; 
  }//end if _idrFrameRequired...
  else 
    _picIn.i_type = X264_TYPE_AUTO;

  _encodedFrameSize = x264_encoder_encode(_x264VideoEncoder, &_nal, &_nalCnt, &_picIn, &_picOut);
  if (_encodedFrameSize > 0)
  {
    // Write the encoded stream to the output.
    uint8_t* pOut = static_cast<uint8_t*>(pCmp);
    memcpy_s(pOut, _encodedFrameSize, _nal->p_payload, _encodedFrameSize);
  }//end else if _encodedFrameSize...
  else
    { _errStr = "X264Encoder::Encode: Encode process failed"; return(0); }

  _lastPicType = 1; // Non-IDR
  if (_picOut.i_type == X264_TYPE_IDR)
    _lastPicType = 0; // IDR

  d.push_back({ _encodedFrameSize, _lastPicType });

  _frameNum++;
  return(1);
}//end Encode...

Upvotes: 0

Views: 611

Answers (1)

Keith
Keith

Reputation: 91

The x264 wrapper class using a common interface, ICodecv3, for consistent usage of other codecs might be useful for others. This is the class definition and implementation:

    // Class to encapsulate the X264 encoder.
    
    #pragma once
    
    #include <inttypes.h>
    #include <stdint.h>
    #include <x264.h>
    #include <ICodecv3.h>
    
    #include <string>
    #include <vector>
    #include <chrono>
    #include <unordered_map>
    
    class X264Encoder : public ICodecv3
    {
    public:
      X264Encoder(void);
      ~X264Encoder(void);
    
      //--------------- ICodecv3 Interface ----------------------------------------------------
      int             GetParameter(const std::string type, std::string& value);
      int             SetParameter(const std::string type, const std::string& value);
      void        GetParameterName(int ordinal, std::string& name) {}
      std::string GetErrorStrEx(void) { std::string errStr = _errStr; _errStr.clear(); return(errStr); } // Clear after use
      int         Ready(void) { if (_x264VideoEncoder != NULL) return(1); return(0); }
      long        GetCompressedBitLengthL(void) { return(_encodedFrameSize*8); }
      long        GetCompressedByteLengthL(void) { return(_encodedFrameSize); }
      void*       GetReference(int refNum) { return(NULL); }
      void        Restart(void) { _idrFrameRequired = true; }
      int         Open(void);
      int         Close(void);
      int         Encode(void* pSrc, void* pCmp, void* codeParameter);
      int         Decode(void* pCmp, int bitLength, void* pDst) { return(0); } // Decoding is not implemented.
    
      //--------------- ICodecv2 Interface ----------------------------------------------------
      // Version 2 has been superceded by version 3 and therefore none are implemented. For 
      // backward compatibility only.
      int         GetParameter(const char* type, int* length, void* value)      { return(0); }
      void        GetParameterName(int ordinal, const char** name, int* length) {}
      int         SetParameter(const char* type, const char* value)             { return(0); }
      char*       GetErrorStr(void)                                             { return(NULL); }
      int         GetCompressedBitLength(void)                                  { return(0); }
      int         GetCompressedByteLength(void)                                 { return(0); }
      int         Code(void* pSrc, void* pCmp, int codeParameter)               { return(0); }
    
      //--------------- Implementation specific Interface --------------------------------------
    
    
    private:
    
    public:
    
    private:
      x264_t*         _x264VideoEncoder;
      x264_param_t    _param;
      x264_picture_t  _picIn;
      bool            _inPicIsAllocated;
      x264_picture_t  _picOut;
      int             _frameNum;  // Used for internal pts timestamp incremented after each encode.
      long            _encodedFrameSize;
      bool            _idrFrameRequired;  // Signal an IDR-frame to the Encode() method
      x264_nal_t*     _nal;
      int             _nalCnt;
    
      std::string     _errStr;
    
      uint32_t        _colourSpace;
      uint32_t        _width;
      uint32_t        _height;
      uint32_t        _videoRateNumerator;
      uint32_t        _videoRateDenominator;
      uint32_t        _avgBitsPerSecond;
      uint32_t        _lastPicType; // 0=IDR-frame, 1=P/D-frame
    
      // Implementation specific params.
      std::string     _encoderSpeedPreset;
      std::string     _profile;
    
      std::unordered_map<std::string, void*>  _parameters = // Hold all the settings for the encoder.
      {
        { "incolour", &_colourSpace },
        { "width", &_width },
        { "height", &_height },
        { "frame rate numerator", &_videoRateNumerator },
        { "frame rate denominator", &_videoRateDenominator },
        { "avg bits per second", &_avgBitsPerSecond },
        { "encoder speed preset", &_encoderSpeedPreset },
        { "profile", &_profile },
        { "last pic coding type", &_lastPicType } // Read only.
      };
    
      std::unordered_map<uint32_t, int> _colourSpaceMapping = // Translate external "incolour" to x264 csp
      {
        { 17, X264_CSP_I420 },
        { 18, X264_CSP_NV12 },
        {  0, X264_CSP_BGR  },
        {  1, X264_CSP_BGRA }
      };
    
    };//end X264Encoder.


// Class to encapsulate the X264 encoder.

#include <X264Encoder.h>
#include <memory.h>

using namespace std;

X264Encoder::X264Encoder(void) :
  _x264VideoEncoder(NULL),
  _nal(NULL),
  _nalCnt(0),
  _errStr(""),
  _width(0),
  _height(0),
  _videoRateNumerator(25),
  _videoRateDenominator(1),
  _avgBitsPerSecond(0),
  _frameNum(0),
  _encodedFrameSize(0),
  _idrFrameRequired(true),
  _lastPicType(0),
  _inPicIsAllocated(false),
  _encoderSpeedPreset("fast"),
  _profile("baseline")
{
  // Initialise the _param member with something.
  x264_param_default_preset(&_param, _encoderSpeedPreset.c_str(), NULL);
}//end constructor.

X264Encoder::~X264Encoder(void)
{
  Close();
}//end destructor.

int X264Encoder::GetParameter(const string type, string& value)
{
  // _parameters map is never empty so no check required.
  void* vp = _parameters[type];
  if (vp != NULL)
  {
    if ((type == "encoder speed preset") || (type == "profile"))
      value = *((string*)vp);
//    else if (type == "timestamp units")  // int32_t type
//      value = to_string(*((int32_t*)vp));
//    else if (type == "timestamp offset")  // int64_t type
//      value = to_string(*((int64_t*)vp));
    else
      value = to_string(*((uint32_t*)vp));  // For parameter values that are all uint32_t
  }//end if vp...
  else
    return(0);

  return(1);
}//end GetParameter.

int X264Encoder::SetParameter(const string type, const string& value)
{
  if (value.empty()) return(0);

  // _parameters is never empty so no check required.
  void* vp = _parameters[type];
  if (vp != NULL)
  {
    if ((type == "encoder speed preset") || (type == "profile"))
      *((string*)vp) = value;
    //    else if (type == "timestamp units")  // int32_t type
    //      *((int32_t*)vp) = stoi(value);
    //    else if (type == "timestamp offset")  // int64_t type
    //      *((int64_t*)vp) = stoi(value);
    else if (type == "last pic coding type")  // Read only - do not set.
      return(1);
    else
      *((uint32_t*)vp) = stoi(value);
  }//end if vp...
  else
    return(0);

  return(1);
}//end SetParameter.

// Open the X264 encoder.
// Any non-default parameters must be set prior to calling this method. Create a default set and 
// modify them with the new parameters. then the encoder is instantiated.
// Return 1=success, 0=failure.
int X264Encoder::Open(void)
{
  if (_x264VideoEncoder != NULL)
    Close();

  // Set up default parameters.
  if (x264_param_default_preset(&_param, _encoderSpeedPreset.c_str(), "zerolatency") < 0)  // 0=success, -1=failed
    { _errStr = "X264Encoder::Open: Default parameter preset failed with " + _encoderSpeedPreset; return(0); }

  // Set non-default params.
  _param.i_bitdepth       = 8;
  _param.i_csp            = _colourSpaceMapping[_colourSpace]; // Input colour space
  if(!_param.i_csp)
    { _errStr = "X264Encoder::Open: Incompatible colour space " + to_string(_colourSpace); return(0); }
  _param.i_width          = _width;
  _param.i_height         = _height;
  _param.i_fps_num        = _videoRateNumerator;
  _param.i_fps_den        = _videoRateDenominator;

  _param.rc.i_bitrate     = _avgBitsPerSecond / 1000; // bitrate units are in kbits/s

  _param.i_threads        = 1;
  _param.b_vfr_input      = 0;  // VFR input.  If 1, use timebase and timestamps for ratecontrol purposes. If 0, use fps only.
  _param.b_repeat_headers = 1;  // Put SPS/PPS before each keyframe
  _param.b_annexb         = 1;  // If set, place start codes (4 bytes) before NAL units, otherwise place size (4 bytes) before NAL units.

  // Apply profile restrictions.
  if (x264_param_apply_profile(&_param, _profile.c_str()) < 0)  // 0=success, -1=failed
    { _errStr = "X264Encoder::Open: Unable to set profile " + _profile; return(0); }

  // Initialise the encoder input pic buffer.
  if (x264_picture_alloc(&_picIn, _param.i_csp, _param.i_width, _param.i_height) < 0)
    { _errStr = "X264Encoder::Open: Unable to alloc input picture buffer"; return(0); }
  _inPicIsAllocated = true;

  // Instantiate the encoder.
  _x264VideoEncoder = x264_encoder_open(&_param);
  if (_x264VideoEncoder == NULL)
  {
    _errStr = "X264Encoder::Open: Unable to instantiate the encoder"; 
    // Clean up before exit.
    x264_picture_clean(&_picIn);
    _inPicIsAllocated = false;
    return(0);
  }//end if !_x264VideoEncoder...

  // Frame counting for pts timestamps.
  _frameNum     = 0;
  _lastPicType  = 0; // IDR-frame

  return(1);
}//end Open.

// Close the encoder.
int X264Encoder::Close(void)
{
  if (_x264VideoEncoder != NULL)
  {
    x264_encoder_close(_x264VideoEncoder);
    _x264VideoEncoder = NULL;
  }//end if _x264VideoEncoder...

  // Input pic structure.
  if (_inPicIsAllocated)
    x264_picture_clean(&_picIn);
  _inPicIsAllocated = false;

  return(1);
}//end Close.

int X264Encoder::Encode(void* pSrc, void* pCmp, void* codeParameter)
{
  _encodedFrameSize = 0;

  // Validation house work.
  if(!Ready())
    { _errStr = "X264Encoder::Encode: Not ready"; return(0); }

  if(!pSrc || !pCmp)
    { _errStr = "X264Encoder::Encode: Invalid function parameter list"; return(0); }

  // Load input image. 
  if(_param.i_csp != X264_CSP_I420) // Can only process I420 input colour space.
    { _errStr = "X264Encoder::Encode: I420 colour space required"; return(0); }
  uint32_t lumSize = _width * _height;
  uint32_t chrSize = lumSize / 4;
  // Transfer the input source image into the x264 picture img structure.
  uint8_t* pImg = static_cast<uint8_t*>(pSrc);
  memcpy_s(_picIn.img.plane[0], lumSize, pImg, lumSize);
  pImg += lumSize;
  memcpy_s(_picIn.img.plane[1], chrSize, pImg, chrSize);
  pImg += chrSize;
  memcpy_s(_picIn.img.plane[2], chrSize, pImg, chrSize);

  // Encode single frame
  _picIn.i_pts = _frameNum;
  if (_idrFrameRequired) 
  {  
    _picIn.i_type = X264_TYPE_IDR; 
    //... and clear the signal.
    _idrFrameRequired = false; 
  }//end if _idrFrameRequired...
  else 
    _picIn.i_type = X264_TYPE_AUTO;

  _encodedFrameSize = x264_encoder_encode(_x264VideoEncoder, &_nal, &_nalCnt, &_picIn, &_picOut);
  if (_encodedFrameSize > 0)
  {
    // Write the encoded stream to the output.
    uint8_t* pOut = static_cast<uint8_t*>(pCmp);
    memcpy_s(pOut, _encodedFrameSize, _nal->p_payload, _encodedFrameSize);
  }//end else if _encodedFrameSize...
  else
    { _errStr = "X264Encoder::Encode: Encode process failed"; return(0); }

  _lastPicType = 1; // Non-IDR
  if (_picOut.i_type == X264_TYPE_IDR)
    _lastPicType = 0; // IDR

  _frameNum++;
  return(1);
}//end Encode...

I hope this is of use to someone...

Upvotes: 1

Related Questions