Reputation: 5746
Since Windows Multimedia turned out to be utterly incapable of recording continuous audio, I got the hint to use Windows Core Audio. There is sort of a manual here, but I can't figure out how to write the loads of overhead code to get the recording working. Can anyone provide a complete, minimal implementation of continuous audio recording to a circular buffer?
So far I am stuck at the code below not getting past the line pEnumerator->GetDefaultAudioEndpoint(eRender, eConsole, &pDevice);
because pEnumerator
remains nullptr
.
#define VC_EXTRALEAN
#define _USE_MATH_DEFINES
#include <Windows.h>
#include <Audioclient.h>
#include <Mmdeviceapi.h>
#define REFTIMES_PER_SEC 10000000
#define REFTIMES_PER_MILLISEC 10000
int main() {
REFERENCE_TIME hnsRequestedDuration = REFTIMES_PER_SEC;
UINT32 bufferFrameCount;
UINT32 numFramesAvailable;
IMMDeviceEnumerator* pEnumerator = NULL;
IMMDevice* pDevice = NULL;
IAudioClient* pAudioClient = NULL;
IAudioCaptureClient* pCaptureClient = NULL;
WAVEFORMATEX* pwfx = NULL;
UINT32 packetLength = 0;
BYTE* pData;
DWORD flags;
CoCreateInstance(__uuidof(MMDeviceEnumerator), NULL, CLSCTX_ALL, __uuidof(IMMDeviceEnumerator), (void**)&pEnumerator);
pEnumerator->GetDefaultAudioEndpoint(eRender, eConsole, &pDevice);
pDevice->Activate(__uuidof(IAudioClient), CLSCTX_ALL, NULL, (void**)&pAudioClient);
pAudioClient->GetMixFormat(&pwfx);
pAudioClient->Initialize(AUDCLNT_SHAREMODE_SHARED, AUDCLNT_STREAMFLAGS_LOOPBACK, hnsRequestedDuration, 0, pwfx, NULL);
pAudioClient->GetBufferSize(&bufferFrameCount); // Get the size of the allocated buffer.
pAudioClient->GetService(__uuidof(IAudioCaptureClient), (void**)&pCaptureClient);
// Calculate the actual duration of the allocated buffer.
REFERENCE_TIME hnsActualDuration = (double)REFTIMES_PER_SEC* bufferFrameCount / pwfx->nSamplesPerSec;
pAudioClient->Start(); // Start recording.
// Each loop fills about half of the shared buffer.
while(true) {
// Sleep for half the buffer duration.
Sleep(hnsActualDuration/REFTIMES_PER_MILLISEC/2);
pCaptureClient->GetNextPacketSize(&packetLength);
while(packetLength != 0) {
// Get the available data in the shared buffer.
pCaptureClient->GetBuffer(&pData, &numFramesAvailable, &flags, NULL, NULL);
if(flags&AUDCLNT_BUFFERFLAGS_SILENT) {
pData = NULL; // Tell CopyData to write silence.
}
// Copy the available capture data to the audio sink.
//hr = pMySink->CopyData(pData, numFramesAvailable, &bDone);
pCaptureClient->ReleaseBuffer(numFramesAvailable);
pCaptureClient->GetNextPacketSize(&packetLength);
}
}
pAudioClient->Stop();
return 0;
}
EDIT (24.07.2021):
Here is an update of my code for troubleshooting:
#define VC_EXTRALEAN
#define _USE_MATH_DEFINES
#include <Windows.h>
#include <Audioclient.h>
#include <Mmdeviceapi.h>
#include <chrono>
class Clock {
private:
typedef chrono::high_resolution_clock clock;
chrono::time_point<clock> t;
public:
Clock() { start(); }
void start() { t = clock::now(); }
double stop() const { return chrono::duration_cast<chrono::duration<double>>(clock::now()-t).count(); }
};
const uint base = 4096;
const uint sample_rate = 48000; // must be supported by microphone
const uint sample_size = 1*base; // must be a power of 2
const uint bandwidth = 5000; // must be <= sample_rate/2
float* wave = new float[sample_size]; // circular buffer
void fill(float* const wave, const float* const buffer, int offset) {
for(int i=sample_size; i>=offset; i--) {
wave[i] = wave[i-offset];
}
for(int i=0; i<offset; i++) {
const uint p = offset-1-i;
wave[i] = 0.5f*(buffer[2*p]+buffer[2*p+1]); // left and right channels
}
}
int main() {
for(uint i=0; i<sample_size; i++) wave[i] = 0.0f;
Clock clock;
#define REFTIMES_PER_SEC 10000000
#define REFTIMES_PER_MILLISEC 10000
REFERENCE_TIME hnsRequestedDuration = REFTIMES_PER_SEC;
UINT32 bufferFrameCount;
UINT32 numFramesAvailable;
IMMDeviceEnumerator* pEnumerator = NULL;
IMMDevice* pDevice = NULL;
IAudioClient* pAudioClient = NULL;
IAudioCaptureClient* pCaptureClient = NULL;
WAVEFORMATEX* pwfx = NULL;
UINT32 packetLength = 0;
BYTE* pData;
DWORD flags;
CoInitializeEx(NULL, COINIT_MULTITHREADED);
CoCreateInstance(__uuidof(MMDeviceEnumerator), NULL, CLSCTX_ALL, __uuidof(IMMDeviceEnumerator), (void**)&pEnumerator);
pEnumerator->GetDefaultAudioEndpoint(eRender, eConsole, &pDevice);
pDevice->Activate(__uuidof(IAudioClient), CLSCTX_ALL, NULL, (void**)&pAudioClient);
pAudioClient->GetMixFormat(&pwfx);
println(pwfx->wFormatTag);// 65534
println(WAVE_FORMAT_PCM);// 1
println(pwfx->nChannels);// 2
println((uint)pwfx->nSamplesPerSec);// 48000
println(pwfx->wBitsPerSample);// 32
println(pwfx->nBlockAlign);// 8
println(pwfx->wBitsPerSample*pwfx->nChannels/8);// 8
println((uint)pwfx->nAvgBytesPerSec);// 384000
println((uint)(pwfx->nBlockAlign*pwfx->nSamplesPerSec*pwfx->nChannels));// 768000
println(pwfx->cbSize);// 22
pAudioClient->Initialize(AUDCLNT_SHAREMODE_SHARED, AUDCLNT_STREAMFLAGS_LOOPBACK, hnsRequestedDuration, 0, pwfx, NULL);
pAudioClient->GetBufferSize(&bufferFrameCount); // Get the size of the allocated buffer.
pAudioClient->GetService(__uuidof(IAudioCaptureClient), (void**)&pCaptureClient);
// Calculate the actual duration of the allocated buffer.
//REFERENCE_TIME hnsActualDuration = (double)REFTIMES_PER_SEC* bufferFrameCount / pwfx->nSamplesPerSec;
pAudioClient->Start(); // Start recording.
while(running) {
pCaptureClient->GetNextPacketSize(&packetLength); // packetLength and numFramesAvailable are either 0 or 480
pCaptureClient->GetBuffer(&pData, &numFramesAvailable, &flags, NULL, NULL);
const int offset = (uint)numFramesAvailable;
if(offset>0) {
fill(wave, (float*)pData, offset); // here I add pData to the circular buffer "wave"
}
while(packetLength != 0) {
pCaptureClient->GetBuffer(&pData, &numFramesAvailable, &flags, NULL, NULL); // Get the available data in the shared buffer.
if(flags&AUDCLNT_BUFFERFLAGS_SILENT) {
pData = NULL; // Tell CopyData to write silence.
}
pCaptureClient->ReleaseBuffer(numFramesAvailable);
pCaptureClient->GetNextPacketSize(&packetLength);
}
sleep(1.0/120.0-clock.stop());
clock.start();
}
pAudioClient->Stop();
}
Upvotes: 2
Views: 1234
Reputation: 179
You could use this audio library instead. Its way easier to get up and running than trying to interface with the platform specific SDKs:
http://www.music.mcgill.ca/~gary/rtaudio/recording.html
Also, while removing the sleep might not help in your example you should never call sleep, lock a mutex, or allocate memory during audio processing. The delay introduced by those is completely arbitrary compared to the short buffer times, so will always create problems for you.
Upvotes: 0
Reputation: 25388
You're not calling CoInitializeEx
, so all COM calls will fail.
You should also be testing all calls to see if they return an error.
To address the questions posed in the comments:
I believe that if you want to operate the endpoint in shared mode then you have to use the parameters returned by GetFixFormat
. This means that:
you are limited to the one sample rate (unless you write code to perform a conversion, which is a non-trivial task)
if you want the samples as floats, you will have to convert them yourself
To write code that runs on all machines, you must cater for whatever the mix format throws at you. This might be:
16 bit integers
24 bit integers (nBlockAlign = 3)
24 bit integers in 32 bit containers (nBlockAlign = 4)
32 bit integers
32 bit floating point (rare)
64 bit floating point (unheard of, in my experience)
The samples will be in the native byte order of the machine your code is running on, and are interleaved.
So, case out on the various parameters in pwfx
and write the relevant code for each sample format you want to support.
Assuming you want your float
s to be normalised to -1 .. +1
, and 2-channel input data, you might do this for 16 bit integers, for example:
const int16_t *inbuf = (const int16_t *) pData;
float *outbuf = ...;
for (int i = 0; i < numFramesAvailable * 2; ++i)
{
int16_t sample = *inbuf++;
*outbuf++ = (float) (sample * (1.0 / 32767));
}
Note that I avoid a (slow) floating point division by multiplying by the reciprocal (the compiler will pre-calculate 1.0 / 32767
).
I'll leave the rest to you.
Upvotes: 2