Reputation: 635
I'm using IMFSourceReader to continuously buffer 1 second portions of audio files from disk. I'm unable to accurately seek M4A audio data (AAC encoded) and this results in a discontinuous audio stream.
I'm aware that the data returned by IMFSourceReader.Read() is usually offset by a few hundred frames into the past relative to the position set in IMFSourceReader.SetCurrentPosition(). However, even accounting for this offset I'm unable to create a continuous glitch free stream (see readCall == 0 condition).
I am able to accurately seek portions of WAV files (uncompressed) so my offset calculation appears to be correct.
My question is whether the Media Foundation library is able to accurately seek/read portions of AAC encoded M4A files (or any compressed audio for that matter)?
Here's the code. inStartFrame is the sample frame I'm trying to read. Output format is configured as 32bit floating point data (see final function). To trim it down a little I've removed some error checks and cleanup e.g. end of file.
bool WindowsM4AReader::read(float** outBuffer, int inNumChannels, int64_t inStartFrame, int64_t inNumFramesToRead)
{
int64_t hnsToRequest = SampleFrameToHNS(inStartFrame);
int64_t frameRequested = HNSToSampleFrame(hnsToRequest);
PROPVARIANT positionProp;
positionProp.vt = VT_I8;
positionProp.hVal.QuadPart = hnsToRequest;
HRESULT hr = mReader->SetCurrentPosition(GUID_NULL, positionProp);
mReader->Flush(0);
IMFSample* pSample = nullptr;
int bytesPerFrame = sizeof(float) * mNumChannels;
int64_t totalFramesWritten = 0;
int64_t remainingFrames = inNumFramesToRead;
int readCall = 0;
bool quit = false;
while (!quit) {
DWORD streamIndex = 0;
DWORD flags = 0;
LONGLONG llTimeStamp = 0;
hr = mReader->ReadSample(
MF_SOURCE_READER_FIRST_AUDIO_STREAM, // Stream index.
0, // Flags.
&streamIndex, // Receives the actual stream index.
&flags, // Receives status flags.
&llTimeStamp, // Receives the time stamp.
&pSample // Receives the sample or NULL.
);
int64_t frameOffset = 0;
if (readCall == 0) {
int64_t hnsOffset = hnsToRequest - llTimeStamp;
frameOffset = HNSToSampleFrame(hnsOffset);
}
++readCall;
if (pSample) {
IMFMediaBuffer* decodedBuffer = nullptr;
pSample->ConvertToContiguousBuffer(&decodedBuffer);
BYTE* rawBuffer = nullptr;
DWORD maxLength = 0;
DWORD bufferLengthInBytes = 0;
decodedBuffer->Lock(&rawBuffer, &maxLength, &bufferLengthInBytes);
int64_t availableFrames = bufferLengthInBytes / bytesPerFrame;
availableFrames -= frameOffset;
int64_t framesToCopy = min(availableFrames, remainingFrames);
// copy to outputBuffer
float* floatBuffer = (float*)rawBuffer;
float* offsetBuffer = &floatBuffer[frameOffset * mNumChannels];
for (int channel = 0; channel < mNumChannels; ++channel) {
for (int64_t frame = 0; frame < framesToCopy; ++frame) {
float sampleValue = offsetBuffer[frame * mNumChannels + channel];
outBuffer[channel][totalFramesWritten + frame] = sampleValue;
}
}
decodedBuffer->Unlock();
totalFramesWritten += framesToCopy;
remainingFrames -= framesToCopy;
if (totalFramesWritten >= inNumFramesToRead)
quit = true;
}
}
}
LONGLONG WindowsM4AReader::SampleFrameToHNS(int64_t inFrame)
{
return inFrame * (10000000.0 / mSampleRate);
}
int64_t WindowsM4AReader::HNSToSampleFrame(LONGLONG inHNS)
{
return inHNS / 10000000.0 * mSampleRate;
}
bool WindowsM4AReader::ConfigureAsFloatDecoder()
{
IMFMediaType* outputType = nullptr;
HRESULT hr = MFCreateMediaType(&outputType);
UINT32 bitsPerSample = sizeof(float) * 8;
UINT32 blockAlign = mNumChannels * (bitsPerSample / 8);
UINT32 bytesPerSecond = blockAlign * (UINT32)mSampleRate;
hr = outputType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Audio);
hr = outputType->SetGUID(MF_MT_SUBTYPE, MFAudioFormat_Float);
hr = outputType->SetUINT32(MF_MT_AUDIO_PREFER_WAVEFORMATEX, TRUE);
hr = outputType->SetUINT32(MF_MT_AUDIO_NUM_CHANNELS, (UINT32)mNumChannels);
hr = outputType->SetUINT32(MF_MT_AUDIO_SAMPLES_PER_SECOND, (UINT32)mSampleRate);
hr = outputType->SetUINT32(MF_MT_AUDIO_BLOCK_ALIGNMENT, blockAlign);
hr = outputType->SetUINT32(MF_MT_AUDIO_AVG_BYTES_PER_SECOND, bytesPerSecond);
hr = outputType->SetUINT32(MF_MT_AUDIO_BITS_PER_SAMPLE, bitsPerSample);
hr = outputType->SetUINT32(MF_MT_ALL_SAMPLES_INDEPENDENT, TRUE);
DWORD streamIndex = 0;
hr = mReader->SetCurrentMediaType(streamIndex, NULL, outputType);
return true;
}
Upvotes: 1
Views: 683
Reputation: 1515
If you are using AAC Decoder provided by Microsoft (AAC Decoder), and the MPEG-4 File Source, yes i confirm, you can't seek audio frame with the same precision of wave file.
I'll have to make more test, but i think it's possible to find a workaround in your case.
EDIT
I've made a program to check seek position with SourceReader :
Under Stackoverflow > AudioSourceReaderSeek
Wav format is perfect at seeking, mp3 is good, and m4a not really good. But the m4a file was encoded with VLC. I encoded a m4a file using Mediafoundation encoder. The result is better when seeking with this file (like mp3).
So i would say that the encoder is important for seeking.
It would be interesting to test different audio format, with different encoders.
Also, there is IMFSeekInfo interface
I can't test this interface, because i'm under Windows Seven, and it's for Win8. It would be interesting for someone to test.
Upvotes: 1