Wrap audio data of the pcm_alaw type into an MKA audio file using the ffmpeg API

Question

Imagine that in my project, I receive RTP packets with the payload type-8, for later saving this load as the Nth part of the audio track. I extract this load from the RTP packet and save it to a temporary buffer:

...

while ((rtp = receiveRtpPackets()).withoutErrors()) {
   payloadData.push(rtp.getPayloadData());
}

audioGenerator.setPayloadData(payloadData);
audioGenerator.recordToFile();

...

After filling a temporary buffer of a certain size with this payload, I process this buffer, namely, extract the entire payload and encode it using ffmpeg for further saving to an audio file in Matroska format. But I have a problem. Since the payload of the RTP packet is type 8, I have to save the raw audio data of the pcm_alaw format to mka audio format. But when saving raw data pcm_alaw to an audio file, I get these messages from the library:

...

[libopus @ 0x18eff60] Queue input is backward in time
[libopus @ 0x18eff60] Queue input is backward in time
[libopus @ 0x18eff60] Queue input is backward in time
[libopus @ 0x18eff60] Queue input is backward in time

...

When you open an audio file in vlc, nothing is played (the audio track timestamp is missing).

The task of my project is to simply take pcm_alaw data and pack it in a container, in mka format. The best way to determine the codec is to use the av_guess_codec() function, which in turn automatically selects the desired codec ID. But how do I pack the raw data into the container correctly, I do not know.

It is important to note that I can get as raw data any format of this data (audio formats only) defined by the RTP packet type (All types of RTP packet payload). All I know is that in any case, I have to pack the audio data in an mka container.

I also attach the code (borrowed from this resource) that I use:

audiogenerater.h

extern "C"
{
#include "libavformat/avformat.h"
#include "libavcodec/avcodec.h"
#include "libswresample/swresample.h"
}

class AudioGenerater
{
public:

    AudioGenerater();
   ~AudioGenerater() = default;

    void generateAudioFileWithOptions(
            QString        fileName,
            QByteArray     pcmData,
            int            channel,
            int            bitRate,
            int            sampleRate,
            AVSampleFormat format);
            
private:

    // init Format
    bool initFormat(QString audioFileName);

private:

    AVCodec         *m_AudioCodec        = nullptr;
    AVCodecContext  *m_AudioCodecContext = nullptr;
    AVFormatContext *m_FormatContext     = nullptr;
    AVOutputFormat  *m_OutputFormat      = nullptr;
};

audiogenerater.cpp

AudioGenerater::AudioGenerater()
{
    av_register_all();
    avcodec_register_all();
}

AudioGenerater::~AudioGenerater()
{
    // ... 
}

bool AudioGenerater::initFormat(QString audioFileName)
{
    // Create an output Format context
    int result = avformat_alloc_output_context2(&m_FormatContext, nullptr, nullptr, audioFileName.toLocal8Bit().data());
    if (result < 0) {
        return false;
    }

    m_OutputFormat = m_FormatContext->oformat;

    // Create an audio stream
    AVStream* audioStream = avformat_new_stream(m_FormatContext, m_AudioCodec);
    if (audioStream == nullptr) {
        avformat_free_context(m_FormatContext);
        return false;
    }

    // Set the parameters in the stream
    audioStream->id = m_FormatContext->nb_streams - 1;
    audioStream->time_base = { 1, 8000 };
    result = avcodec_parameters_from_context(audioStream->codecpar, m_AudioCodecContext);
    if (result < 0) {
        avformat_free_context(m_FormatContext);
        return false;
    }

    // Print FormatContext information
    av_dump_format(m_FormatContext, 0, audioFileName.toLocal8Bit().data(), 1);

    // Open file IO
    if (!(m_OutputFormat->flags & AVFMT_NOFILE)) {
        result = avio_open(&m_FormatContext->pb, audioFileName.toLocal8Bit().data(), AVIO_FLAG_WRITE);
        if (result < 0) {
            avformat_free_context(m_FormatContext);
            return false;
        }
    }

    return true;
}

void AudioGenerater::generateAudioFileWithOptions(
    QString _fileName,
    QByteArray _pcmData,
    int _channel,
    int _bitRate,
    int _sampleRate,
    AVSampleFormat _format)
{
    AVFormatContext* oc;
    if (avformat_alloc_output_context2(
            &oc, nullptr, nullptr, _fileName.toStdString().c_str())
        < 0) {
        qDebug() << "Error in line: " << __LINE__;
        return;
    }
    if (!oc) {
        printf("Could not deduce output format from file extension: using mka.
");
        avformat_alloc_output_context2(
            &oc, nullptr, "mka", _fileName.toStdString().c_str());
    }
    if (!oc) {
        qDebug() << "Error in line: " << __LINE__;
        return;
    }
    AVOutputFormat* fmt = oc->oformat;
    if (fmt->audio_codec == AV_CODEC_ID_NONE) {
        qDebug() << "Error in line: " << __LINE__;
        return;
    }

    AVCodecID codecID = av_guess_codec(
        fmt, nullptr, _fileName.toStdString().c_str(), nullptr, AVMEDIA_TYPE_AUDIO);
    // Find Codec
    m_AudioCodec = avcodec_find_encoder(codecID);
    if (m_AudioCodec == nullptr) {
        qDebug() << "Error in line: " << __LINE__;
        return;
    }
    // Create an encoder context
    m_AudioCodecContext = avcodec_alloc_context3(m_AudioCodec);
    if (m_AudioCodecContext == nullptr) {
        qDebug() << "Error in line: " << __LINE__;
        return;
    }

    // Setting parameters
    m_AudioCodecContext->bit_rate = _bitRate;
    m_AudioCodecContext->sample_rate = _sampleRate;
    m_AudioCodecContext->sample_fmt = _format;
    m_AudioCodecContext->channels = _channel;

    m_AudioCodecContext->channel_layout = av_get_default_channel_layout(_channel);
    m_AudioCodecContext->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;

    // Turn on the encoder
    int result = avcodec_open2(m_AudioCodecContext, m_AudioCodec, nullptr);
    if (result < 0) {
        avcodec_free_context(&m_AudioCodecContext);
        if (m_FormatContext != nullptr)
            avformat_free_context(m_FormatContext);
        return;
    }

    // Create a package
    if (!initFormat(_fileName)) {
        avcodec_free_context(&m_AudioCodecContext);
        if (m_FormatContext != nullptr)
            avformat_free_context(m_FormatContext);
        return;
    }

    // write to the file header
    result = avformat_write_header(m_FormatContext, nullptr);
    if (result < 0) {
        avcodec_free_context(&m_AudioCodecContext);
        if (m_FormatContext != nullptr)
            avformat_free_context(m_FormatContext);
        return;
    }

    // Create Frame
    AVFrame* frame = av_frame_alloc();
    if (frame == nullptr) {
        avcodec_free_context(&m_AudioCodecContext);
        if (m_FormatContext != nullptr)
            avformat_free_context(m_FormatContext);
        return;
    }

    int nb_samples = 0;
    if (m_AudioCodecContext->codec->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE) {
        nb_samples = 10000;
    }
    else {
        nb_samples = m_AudioCodecContext->frame_size;
    }

    // Set the parameters of the Frame
    frame->nb_samples = nb_samples;
    frame->format = m_AudioCodecContext->sample_fmt;
    frame->channel_layout = m_AudioCodecContext->channel_layout;

    // Apply for data memory
    result = av_frame_get_buffer(frame, 0);
    if (result < 0) {
        av_frame_free(&frame);
        {
            avcodec_free_context(&m_AudioCodecContext);
            if (m_FormatContext != nullptr)
                avformat_free_context(m_FormatContext);
            return;
        }
    }

    // Set the Frame to be writable
    result = av_frame_make_writable(frame);
    if (result < 0) {
        av_frame_free(&frame);
        {
            avcodec_free_context(&m_AudioCodecContext);
            if (m_FormatContext != nullptr)
                avformat_free_context(m_FormatContext);
            return;
        }
    }

    int perFrameDataSize = frame->linesize[0];
    int count = _pcmData.size() / perFrameDataSize;
    bool needAddOne = false;
    if (_pcmData.size() % perFrameDataSize != 0) {
        count++;
        needAddOne = true;
    }

    int frameCount = 0;
    for (int i = 0; i < count; ++i) {
        // Create a Packet
        AVPacket* pkt = av_packet_alloc();
        if (pkt == nullptr) {
            avcodec_free_context(&m_AudioCodecContext);
            if (m_FormatContext != nullptr)
                avformat_free_context(m_FormatContext);
            return;
        }
        av_init_packet(pkt);

        if (i == count - 1)
            perFrameDataSize = _pcmData.size() % perFrameDataSize;

        // Synthesize WAV files
        memset(frame->data[0], 0, perFrameDataSize);
        memcpy(frame->data[0], &(_pcmData.data()[perFrameDataSize * i]), perFrameDataSize);

        frame->pts = frameCount++;
        // send Frame
        result = avcodec_send_frame(m_AudioCodecContext, frame);
        if (result < 0)
            continue;

        // Receive the encoded Packet
        result = avcodec_receive_packet(m_AudioCodecContext, pkt);
        if (result < 0) {
            av_packet_free(&pkt);
            continue;
        }

        // write to file
        av_packet_rescale_ts(pkt, m_AudioCodecContext->time_base, m_FormatContext->streams[0]->time_base);
        pkt->stream_index = 0;
        result = av_interleaved_write_frame(m_FormatContext, pkt);
        if (result < 0)
            continue;

        av_packet_free(&pkt);
    }

    // write to the end of the file
    av_write_trailer(m_FormatContext);
    // Close file IO
    avio_closep(&m_FormatContext->pb);
    // Release Frame memory
    av_frame_free(&frame);

    avcodec_free_context(&m_AudioCodecContext);
    if (m_FormatContext != nullptr)
        avformat_free_context(m_FormatContext);
}

main.cpp

int main(int argc, char **argv)
{
    av_log_set_level(AV_LOG_TRACE);

    QFile file("rawDataOfPcmAlawType.bin");
    if (!file.open(QIODevice::ReadOnly)) {
        return EXIT_FAILURE;
    }
    QByteArray rawData(file.readAll());

    AudioGenerater generator;
    generator.generateAudioFileWithOptions(
               "test.mka",
               rawData,
               1, 
               64000, 
               8000,
               AV_SAMPLE_FMT_S16);

    return 0;
}

It is IMPORTANT you help me find the most appropriate way to record pcm_alaw or a different data format in an MKA audio file.

I ask everyone who knows anything to help (there is too little time left to implement this project)

ItIgnoramus · Accepted Answer

These useful links will help you:

A good overview of the data processing sequence in libav: ffmpeg-libav-tutorial
Examples from the ffmpeg developers themselves: avio_reading, resampling_audio, transcode_aac

Wrap audio data of the pcm_alaw type into an MKA audio file using the ffmpeg API

Answers (1)

Related Questions