IMFSourceReader M4A Audio Accurate Frame Seek

Question

I'm using IMFSourceReader to continuously buffer 1 second portions of audio files from disk. I'm unable to accurately seek M4A audio data (AAC encoded) and this results in a discontinuous audio stream.

I'm aware that the data returned by IMFSourceReader.Read() is usually offset by a few hundred frames into the past relative to the position set in IMFSourceReader.SetCurrentPosition(). However, even accounting for this offset I'm unable to create a continuous glitch free stream (see readCall == 0 condition).

I am able to accurately seek portions of WAV files (uncompressed) so my offset calculation appears to be correct.

My question is whether the Media Foundation library is able to accurately seek/read portions of AAC encoded M4A files (or any compressed audio for that matter)?

Here's the code. inStartFrame is the sample frame I'm trying to read. Output format is configured as 32bit floating point data (see final function). To trim it down a little I've removed some error checks and cleanup e.g. end of file.

bool WindowsM4AReader::read(float** outBuffer, int inNumChannels, int64_t inStartFrame, int64_t inNumFramesToRead)
{
    int64_t hnsToRequest = SampleFrameToHNS(inStartFrame);
    int64_t frameRequested = HNSToSampleFrame(hnsToRequest);

    PROPVARIANT positionProp;
    positionProp.vt = VT_I8;
    positionProp.hVal.QuadPart = hnsToRequest;
    HRESULT hr = mReader->SetCurrentPosition(GUID_NULL, positionProp);
    mReader->Flush(0);

    IMFSample* pSample = nullptr;
    int bytesPerFrame = sizeof(float) * mNumChannels;
    int64_t totalFramesWritten = 0;
    int64_t remainingFrames = inNumFramesToRead;

    int readCall = 0;
    bool quit = false;

    while (!quit) {
        DWORD streamIndex = 0;
        DWORD flags = 0;
        LONGLONG llTimeStamp = 0;

        hr = mReader->ReadSample(
            MF_SOURCE_READER_FIRST_AUDIO_STREAM,    // Stream index.
            0,                                      // Flags.
            &streamIndex,                           // Receives the actual stream index. 
            &flags,                                 // Receives status flags.
            &llTimeStamp,                           // Receives the time stamp.
            &pSample                                // Receives the sample or NULL.
        );

        int64_t frameOffset = 0;

        if (readCall == 0) {
            int64_t hnsOffset = hnsToRequest - llTimeStamp;
            frameOffset = HNSToSampleFrame(hnsOffset);
        }

        ++readCall;

        if (pSample) {
            IMFMediaBuffer* decodedBuffer = nullptr;
            pSample->ConvertToContiguousBuffer(&decodedBuffer);

            BYTE* rawBuffer = nullptr;
            DWORD maxLength = 0;
            DWORD bufferLengthInBytes = 0;
            decodedBuffer->Lock(&rawBuffer, &maxLength, &bufferLengthInBytes);

            int64_t availableFrames = bufferLengthInBytes / bytesPerFrame;
            availableFrames -= frameOffset;
            int64_t framesToCopy = min(availableFrames, remainingFrames);

            // copy to outputBuffer
            float* floatBuffer = (float*)rawBuffer;
            float* offsetBuffer = &floatBuffer[frameOffset * mNumChannels];

            for (int channel = 0; channel < mNumChannels; ++channel) {
                for (int64_t frame = 0; frame < framesToCopy; ++frame) {
                    float sampleValue = offsetBuffer[frame * mNumChannels + channel];
                    outBuffer[channel][totalFramesWritten + frame] = sampleValue;
                }
            }

            decodedBuffer->Unlock();

            totalFramesWritten += framesToCopy;
            remainingFrames -= framesToCopy;

            if (totalFramesWritten >= inNumFramesToRead)
                quit = true;
        }
    }
}

LONGLONG WindowsM4AReader::SampleFrameToHNS(int64_t inFrame)
{
    return inFrame * (10000000.0 / mSampleRate);
}

int64_t WindowsM4AReader::HNSToSampleFrame(LONGLONG inHNS)
{
    return inHNS / 10000000.0 * mSampleRate;
}

bool WindowsM4AReader::ConfigureAsFloatDecoder()
{
    IMFMediaType* outputType = nullptr;

    HRESULT hr = MFCreateMediaType(&outputType);

    UINT32 bitsPerSample = sizeof(float) * 8;
    UINT32 blockAlign = mNumChannels * (bitsPerSample / 8);
    UINT32 bytesPerSecond = blockAlign * (UINT32)mSampleRate;

    hr = outputType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Audio);
    hr = outputType->SetGUID(MF_MT_SUBTYPE, MFAudioFormat_Float);
    hr = outputType->SetUINT32(MF_MT_AUDIO_PREFER_WAVEFORMATEX, TRUE);
    hr = outputType->SetUINT32(MF_MT_AUDIO_NUM_CHANNELS, (UINT32)mNumChannels);
    hr = outputType->SetUINT32(MF_MT_AUDIO_SAMPLES_PER_SECOND, (UINT32)mSampleRate);
    hr = outputType->SetUINT32(MF_MT_AUDIO_BLOCK_ALIGNMENT, blockAlign);
    hr = outputType->SetUINT32(MF_MT_AUDIO_AVG_BYTES_PER_SECOND, bytesPerSecond);
    hr = outputType->SetUINT32(MF_MT_AUDIO_BITS_PER_SAMPLE, bitsPerSample);
    hr = outputType->SetUINT32(MF_MT_ALL_SAMPLES_INDEPENDENT, TRUE);

    DWORD streamIndex = 0;
    hr = mReader->SetCurrentMediaType(streamIndex, NULL, outputType);
    return true;
}

IMFSourceReader M4A Audio Accurate Frame Seek

Answers (1)

Related Questions