Reputation: 5746

How to record continuous raw audio data into a circular buffer with C++ on Windows 10?

Since Windows Multimedia turned out to be utterly incapable of recording continuous audio, I got the hint to use Windows Core Audio. There is sort of a manual here, but I can't figure out how to write the loads of overhead code to get the recording working. Can anyone provide a complete, minimal implementation of continuous audio recording to a circular buffer?

So far I am stuck at the code below not getting past the line pEnumerator->GetDefaultAudioEndpoint(eRender, eConsole, &pDevice); because pEnumerator remains nullptr.

#define VC_EXTRALEAN
#define _USE_MATH_DEFINES
#include <Windows.h>
#include <Audioclient.h>
#include <Mmdeviceapi.h>

#define REFTIMES_PER_SEC  10000000
#define REFTIMES_PER_MILLISEC  10000

int main() {
    REFERENCE_TIME hnsRequestedDuration = REFTIMES_PER_SEC;
    UINT32 bufferFrameCount;
    UINT32 numFramesAvailable;

    IMMDeviceEnumerator* pEnumerator = NULL;
    IMMDevice* pDevice = NULL;
    IAudioClient* pAudioClient = NULL;
    IAudioCaptureClient* pCaptureClient = NULL;
    WAVEFORMATEX* pwfx = NULL;
    UINT32 packetLength = 0;
    BYTE* pData;
    DWORD flags;

    CoCreateInstance(__uuidof(MMDeviceEnumerator), NULL, CLSCTX_ALL, __uuidof(IMMDeviceEnumerator), (void**)&pEnumerator);
    pEnumerator->GetDefaultAudioEndpoint(eRender, eConsole, &pDevice);
    pDevice->Activate(__uuidof(IAudioClient), CLSCTX_ALL, NULL, (void**)&pAudioClient);
    pAudioClient->GetMixFormat(&pwfx);
    pAudioClient->Initialize(AUDCLNT_SHAREMODE_SHARED, AUDCLNT_STREAMFLAGS_LOOPBACK, hnsRequestedDuration, 0, pwfx, NULL);
    pAudioClient->GetBufferSize(&bufferFrameCount); // Get the size of the allocated buffer.
    pAudioClient->GetService(__uuidof(IAudioCaptureClient), (void**)&pCaptureClient);

    // Calculate the actual duration of the allocated buffer.
    REFERENCE_TIME hnsActualDuration = (double)REFTIMES_PER_SEC* bufferFrameCount / pwfx->nSamplesPerSec;

    pAudioClient->Start();  // Start recording.

    // Each loop fills about half of the shared buffer.
    while(true) {
        // Sleep for half the buffer duration.
        Sleep(hnsActualDuration/REFTIMES_PER_MILLISEC/2);
        pCaptureClient->GetNextPacketSize(&packetLength);
        while(packetLength != 0) {
            // Get the available data in the shared buffer.
            pCaptureClient->GetBuffer(&pData, &numFramesAvailable, &flags, NULL, NULL);
            if(flags&AUDCLNT_BUFFERFLAGS_SILENT) {
                pData = NULL;  // Tell CopyData to write silence.
            }

            // Copy the available capture data to the audio sink.
            //hr = pMySink->CopyData(pData, numFramesAvailable, &bDone);

            pCaptureClient->ReleaseBuffer(numFramesAvailable);
            pCaptureClient->GetNextPacketSize(&packetLength);
        }
    }
    pAudioClient->Stop();
    return 0;
}

EDIT (24.07.2021):

Here is an update of my code for troubleshooting:

#define VC_EXTRALEAN
#define _USE_MATH_DEFINES
#include <Windows.h>
#include <Audioclient.h>
#include <Mmdeviceapi.h>

#include <chrono>
class Clock {
private:
    typedef chrono::high_resolution_clock clock;
    chrono::time_point<clock> t;
public:
    Clock() { start(); }
    void start() { t = clock::now(); }
    double stop() const { return chrono::duration_cast<chrono::duration<double>>(clock::now()-t).count(); }
};

const uint base        =   4096;
const uint sample_rate =  48000; // must be supported by microphone
const uint sample_size = 1*base; // must be a power of 2
const uint bandwidth   =   5000; // must be <= sample_rate/2

float* wave = new float[sample_size]; // circular buffer

void fill(float* const wave, const float* const buffer, int offset) {
    for(int i=sample_size; i>=offset; i--) {
        wave[i] = wave[i-offset];
    }
    for(int i=0; i<offset; i++) {
        const uint p = offset-1-i;
        wave[i] = 0.5f*(buffer[2*p]+buffer[2*p+1]); // left and right channels
    }
}

int main() {
    for(uint i=0; i<sample_size; i++) wave[i] = 0.0f;
    
    Clock clock;

    #define REFTIMES_PER_SEC  10000000
    #define REFTIMES_PER_MILLISEC  10000

    REFERENCE_TIME hnsRequestedDuration = REFTIMES_PER_SEC;
    UINT32 bufferFrameCount;
    UINT32 numFramesAvailable;

    IMMDeviceEnumerator* pEnumerator = NULL;
    IMMDevice* pDevice = NULL;
    IAudioClient* pAudioClient = NULL;
    IAudioCaptureClient* pCaptureClient = NULL;
    WAVEFORMATEX* pwfx = NULL;
    UINT32 packetLength = 0;
    BYTE* pData;
    DWORD flags;

    CoInitializeEx(NULL, COINIT_MULTITHREADED);
    CoCreateInstance(__uuidof(MMDeviceEnumerator), NULL, CLSCTX_ALL, __uuidof(IMMDeviceEnumerator), (void**)&pEnumerator);
    pEnumerator->GetDefaultAudioEndpoint(eRender, eConsole, &pDevice);
    pDevice->Activate(__uuidof(IAudioClient), CLSCTX_ALL, NULL, (void**)&pAudioClient);
    pAudioClient->GetMixFormat(&pwfx);
    
    println(pwfx->wFormatTag);// 65534
    println(WAVE_FORMAT_PCM);// 1
    println(pwfx->nChannels);// 2
    println((uint)pwfx->nSamplesPerSec);// 48000
    println(pwfx->wBitsPerSample);// 32
    println(pwfx->nBlockAlign);// 8
    println(pwfx->wBitsPerSample*pwfx->nChannels/8);// 8
    println((uint)pwfx->nAvgBytesPerSec);// 384000
    println((uint)(pwfx->nBlockAlign*pwfx->nSamplesPerSec*pwfx->nChannels));// 768000
    println(pwfx->cbSize);// 22

    pAudioClient->Initialize(AUDCLNT_SHAREMODE_SHARED, AUDCLNT_STREAMFLAGS_LOOPBACK, hnsRequestedDuration, 0, pwfx, NULL);
    pAudioClient->GetBufferSize(&bufferFrameCount); // Get the size of the allocated buffer.
    pAudioClient->GetService(__uuidof(IAudioCaptureClient), (void**)&pCaptureClient);

    // Calculate the actual duration of the allocated buffer.
    //REFERENCE_TIME hnsActualDuration = (double)REFTIMES_PER_SEC* bufferFrameCount / pwfx->nSamplesPerSec;

    pAudioClient->Start();  // Start recording.
    
    while(running) {

        pCaptureClient->GetNextPacketSize(&packetLength); // packetLength and numFramesAvailable are either 0 or 480
        pCaptureClient->GetBuffer(&pData, &numFramesAvailable, &flags, NULL, NULL);

        const int offset = (uint)numFramesAvailable;
        if(offset>0) {
            fill(wave, (float*)pData, offset); // here I add pData to the circular buffer "wave"
        }

        while(packetLength != 0) {
            pCaptureClient->GetBuffer(&pData, &numFramesAvailable, &flags, NULL, NULL); // Get the available data in the shared buffer.
            if(flags&AUDCLNT_BUFFERFLAGS_SILENT) {
                pData = NULL;  // Tell CopyData to write silence.
            }
            pCaptureClient->ReleaseBuffer(numFramesAvailable);
            pCaptureClient->GetNextPacketSize(&packetLength);
        }

        sleep(1.0/120.0-clock.stop());
        clock.start();
    }
    pAudioClient->Stop();
}

Upvotes: 2

Answers (2)

Moeren

Reputation: 179

You could use this audio library instead. Its way easier to get up and running than trying to interface with the platform specific SDKs:

http://www.music.mcgill.ca/~gary/rtaudio/recording.html

Also, while removing the sleep might not help in your example you should never call sleep, lock a mutex, or allocate memory during audio processing. The delay introduced by those is completely arbitrary compared to the short buffer times, so will always create problems for you.

Upvotes: 0

catnip

Reputation: 25388

You're not calling CoInitializeEx, so all COM calls will fail.

You should also be testing all calls to see if they return an error.

To address the questions posed in the comments:

I believe that if you want to operate the endpoint in shared mode then you have to use the parameters returned by GetFixFormat. This means that:

you are limited to the one sample rate (unless you write code to perform a conversion, which is a non-trivial task)
if you want the samples as floats, you will have to convert them yourself

To write code that runs on all machines, you must cater for whatever the mix format throws at you. This might be:

16 bit integers
24 bit integers (nBlockAlign = 3)
24 bit integers in 32 bit containers (nBlockAlign = 4)
32 bit integers
32 bit floating point (rare)
64 bit floating point (unheard of, in my experience)

The samples will be in the native byte order of the machine your code is running on, and are interleaved.

So, case out on the various parameters in pwfx and write the relevant code for each sample format you want to support.

Assuming you want your floats to be normalised to -1 .. +1, and 2-channel input data, you might do this for 16 bit integers, for example:

const int16_t *inbuf = (const int16_t *) pData;
float *outbuf = ...;

for (int i = 0; i < numFramesAvailable * 2; ++i)
{
    int16_t sample = *inbuf++;
    *outbuf++ = (float) (sample * (1.0 / 32767));
}

Note that I avoid a (slow) floating point division by multiplying by the reciprocal (the compiler will pre-calculate 1.0 / 32767).

I'll leave the rest to you.

Upvotes: 2

How to record continuous raw audio data into a circular buffer with C++ on Windows 10?

Answers (2)

Related Questions