Reputation: 181

Decode Audio from Memory - C++

I have two functions:

a internet-socket function which gets mp3-data and writes it to file ,
a function which decodes mp3-files.

However, I would rather decode the data, which is currently written to disk, be decoded in-memory by the decode function.

My decode function looks like this, and it is all initialized via

    avformat_open_input(AVCodecContext, filename, NULL, NULL)

How can I read in the AVCodecContext without a filename, and instead using only the in-memory buffer?

Upvotes: 4

Answers (2)

GMasucci

Reputation: 2882

I thought I would post some code to illustrate how to achieve this, I have tried to comment but am pressed for time, however it should all be relatively straightforward stuff. Return values are based on interpolation of the associated message into a hex version of 1337 speak converted to decimal values, and I have tried to keep it as light as possible in tone:)

#include <iostream>

extern "C"
{
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libavutil/avutil.h>
};

std::string tooManyChannels =   "The audio stream (and its frames) has/have too many channels to properly fit in\n to frame->data. Therefore, to access the audio data, you need to use\nframe->extended_data to access the audio data."
                                "It is a  planar store, so\neach channel is in a different element.\n"
                                " E.G.: frame->extended_data[0] has the data for channel 1\n"
                                "       frame->extended_data[1] has the data for channel 2\n"
                                "And so on.\n";

std::string nonPlanar = "Either the audio data is not planar, or there is not enough room in\n"
                        "frame->data to store all the channel data.  Either use\n"
                        "frame->data\n or \nframe->extended_data to access the audio data\n"
                        "both should just point to the same data in this instance.\n";

std::string information1 =  "If the frame is planar, each channel is in a separate element:\n"
                            "frame->data[0]/frame->extended_data[0] contains data for channel 1\n"
                            "frame->data[1]/frame->extended_data[1] contains data for channel 2\n";

std::string information2 =  "If the frame is in packed format( and therefore not planar),\n"
                            "then all the data is contained within:\n"
                            "frame->data[0]/frame->extended_data[0] \n"
                            "Similar to the manner in which some image formats have RGB(A) pixel data packed together,\n"
                            "rather than containing separate R G B (and A) data.\n";

void printAudioFrameInfo(const AVCodecContext* codecContext, const AVFrame* frame)
{
    /*
     This url: http://ffmpeg.org/doxygen/trunk/samplefmt_8h.html#af9a51ca15301871723577c730b5865c5
     contains information on the type you will need to utilise to access the audio data.
    */
    // format the tabs etc. in this string to suit your font, they line up for mine but may not for yours:)
    std::cout << "Audio frame info:\n"
              << "\tSample count:\t\t" << frame->nb_samples << '\n'
              << "\tChannel count:\t\t" << codecContext->channels << '\n'
              << "\tFormat:\t\t\t" << av_get_sample_fmt_name(codecContext->sample_fmt) << '\n'
              << "\tBytes per sample:\t" << av_get_bytes_per_sample(codecContext->sample_fmt) << '\n'
              << "\tPlanar storage format?:\t" << av_sample_fmt_is_planar(codecContext->sample_fmt) << '\n';


    std::cout << "frame->linesize[0] tells you the size (in bytes) of each plane\n";

    if (codecContext->channels > AV_NUM_DATA_POINTERS && av_sample_fmt_is_planar(codecContext->sample_fmt))
    {
        std::cout << tooManyChannels;
    }
    else
    {
        stc::cout << nonPlanar;
    }
    std::cout << information1 << information2;
}

int main()
{
    // You can change the filename for any other filename/supported format
    std::string filename = "../my file.ogg";
    // Initialize FFmpeg
    av_register_all();

    AVFrame* frame = avcodec_alloc_frame();
    if (!frame)
    {
        std::cout << "Error allocating the frame.  Let's try again shall we?\n";
        return 666;  // fail at start: 66 = number of the beast
    }

    // you can change the file name to whatever yo need:)
    AVFormatContext* formatContext = NULL;
    if (avformat_open_input(&formatContext, filename, NULL, NULL) != 0)
    {
        av_free(frame);
        std::cout << "Error opening file " << filename<< "\n";
        return 800; // cant open file.  800 = Boo!
    }

    if (avformat_find_stream_info(formatContext, NULL) < 0)
    {
        av_free(frame);
        avformat_close_input(&formatContext);
        std::cout << "Error finding the stream information.\nCheck your paths/connections and the details you supplied!\n";
        return 57005; // stream info error.  0xDEAD in hex is 57005 in decimal
    }

    // Find the audio stream
    AVCodec* cdc = nullptr;
    int streamIndex = av_find_best_stream(formatContext, AVMEDIA_TYPE_AUDIO, -1, -1, &cdc, 0);
    if (streamIndex < 0)
    {
        av_free(frame);
        avformat_close_input(&formatContext);
        std::cout << "Could not find any audio stream in the file.  Come on! I need data!\n";
        return 165; // no(0) (a)udio s(5)tream:  0A5 in hex = 165 in decimal
    }

    AVStream* audioStream = formatContext->streams[streamIndex];
    AVCodecContext* codecContext = audioStream->codec;
    codecContext->codec = cdc;

    if (avcodec_open2(codecContext, codecContext->codec, NULL) != 0)
    {
        av_free(frame);
        avformat_close_input(&formatContext);
        std::cout << "Couldn't open the context with the decoder.  I can decode but I need to have something to decode.\nAs I couldn't find anything I have surmised the decoded output is 0!\n (Well can't have you thinking I am doing nothing can we?\n";
        return 1057; // cant find/open context 1057 = lost
    }

    std::cout << "This stream has " << codecContext->channels << " channels with a sample rate of " << codecContext->sample_rate << "Hz\n";
    std::cout << "The data presented in format: " << av_get_sample_fmt_name(codecContext->sample_fmt) << std::endl;

    AVPacket readingPacket;
    av_init_packet(&readingPacket);

    // Read the packets in a loop
    while (av_read_frame(formatContext, &readingPacket) == 0)
    {
        if (readingPacket.stream_index == audioStream->index)
        {
            AVPacket decodingPacket = readingPacket;

            // Audio packets can have multiple audio frames in a single packet
            while (decodingPacket.size > 0)
            {
                // Try to decode the packet into a frame(s)
                // Some frames rely on multiple packets, so we have to make sure the frame is finished
                // before utilising it
                int gotFrame = 0;
                int result = avcodec_decode_audio4(codecContext, frame, &gotFrame, &decodingPacket);

                if (result >= 0 && gotFrame)
                {
                    decodingPacket.size -= result;
                    decodingPacket.data += result;

                    // et voila! a decoded audio frame!
                    printAudioFrameInfo(codecContext, frame);
                }
                else
                {
                    decodingPacket.size = 0;
                    decodingPacket.data = nullptr;
                }
            }
        }

        // You MUST call av_free_packet() after each call to av_read_frame()
        // or you will leak so much memory on a large file you will need a memory-plumber!
        av_free_packet(&readingPacket);
    }

    // Some codecs will cause frames to be buffered in the decoding process. 
    // If the CODEC_CAP_DELAY flag is set, there can be buffered frames that need to be flushed
    // therefore flush them now....
    if (codecContext->codec->capabilities & CODEC_CAP_DELAY)
    {
        av_init_packet(&readingPacket);
        // Decode all the remaining frames in the buffer
        int gotFrame = 0;
        while (avcodec_decode_audio4(codecContext, frame, &gotFrame, &readingPacket) >= 0 && gotFrame)
        {
            // Again: a fully decoded audio frame!
            printAudioFrameInfo(codecContext, frame);
        }
    }

    // Clean up! (unless you have a quantum memory machine with infinite RAM....)
    av_free(frame);
    avcodec_close(codecContext);
    avformat_close_input(&formatContext);

    return 0;  // success!!!!!!!!
}

Hope this helps. Let me know if you need more info, and I will try and help out:)

There is also some very good tutorial information available at dranger.com which you may find useful.

Upvotes: 3

Oswald

Reputation: 31647

Preallocate the format context and set its pb field as suggested in the note of avformat_open_input() documentation.

Upvotes: 1

Decode Audio from Memory - C++

Answers (2)

Related Questions