Leonisious
Leonisious

Reputation: 23

Interpreting .wav data for fftw

I'm trying to read a .wav file and find the most dominant frequency of the signal. I used this topic to read the file, then I used the function bytesToFloat to convert the result to float.

Finally after I copy the arrays into fftw_complex I run FFTW's plan, find the modulus (sqrt(real*real + im*im)) and find the highest value, but the results are not matching the signal's frequency and the output is usually not a number.

The .wav file I'm using is a 110 Hz (A2) frequency found on Wikipedia.

My questions are:

Is float conversion done properly?

Why is the output vector returning NaN after fft?

How can I read the .wav file so I can use fftw?

Thanks for reading any help is appreciated.

Full Code:

#include <math.h>
#include <fftw3.h>
#include "Reader.h"
#include <iostream>
#include <string>
#include <fstream>
#include <cstdint>

using namespace std;

typedef struct  WAV_HEADER
{
    /* RIFF Chunk Descriptor */
    uint8_t         RIFF[4];        // RIFF Header Magic header
    uint32_t        ChunkSize;      // RIFF Chunk Size
    uint8_t         WAVE[4];        // WAVE Header
                                    /* "fmt" sub-chunk */
    uint8_t         fmt[4];         // FMT header
    uint32_t        Subchunk1Size;  // Size of the fmt chunk
    uint16_t        AudioFormat;    // Audio format 1=PCM,6=mulaw,7=alaw,     257=IBM Mu-Law, 258=IBM A-Law, 259=ADPCM
    uint16_t        NumOfChan;      // Number of channels 1=Mono 2=Sterio
    uint32_t        SamplesPerSec;  // Sampling Frequency in Hz
    uint32_t        bytesPerSec;    // bytes per second
    uint16_t        blockAlign;     // 2=16-bit mono, 4=16-bit stereo
    uint16_t        bitsPerSample;  // Number of bits per sample
                                    /* "data" sub-chunk */
    uint8_t         Subchunk2ID[4]; // "data"  string
    uint32_t        Subchunk2Size;  // Sampled data length
} wav_hdr;

int getFileSize(FILE* inFile);
float bytesToFloat(int8_t b0, int8_t b1, int8_t b2, int8_t b3);
void WavRead(string fileName, int& samples, float* floatBuffer);

using namespace std;

int main(void) {
    fftw_complex *in, *out;
    fftw_plan p;

    int numSamples=0;

    float* floatBuffer;
    float* dest;

    floatBuffer = (float*)malloc(sizeof(float));

    WavRead("110.wav", numSamples, floatBuffer);

    in = (fftw_complex*)fftw_malloc(numSamples*sizeof(fftw_complex));
    out = (fftw_complex*)fftw_malloc(numSamples*sizeof(fftw_complex));

    for (int i = 0; i < numSamples; i++)
    {
        in[i][0] = floatBuffer[i];
        in[i][1] = (float)0;
    }

    p = fftw_plan_dft_1d(numSamples, in, out, FFTW_FORWARD, FFTW_ESTIMATE);

    fftw_execute(p);

    dest = (float*)malloc(sizeof(float)*numSamples);

    for (int i = 0; i < numSamples; i++) {
        dest[i] = std::sqrt(out[i][0] * out[i][0] + out[i][1] * out[i][1]);
    }

    double max = 0;
    int index=0;
    for (int i = 0; i < numSamples; i++) {
        if (dest[i] > max) {
            max = dest[i];
            index = i;
        }
    }

    cout << endl << index << endl << max << endl;

    fftw_destroy_plan(p);
    fftw_cleanup();

    system("pause");

    return 0;

}

void WavRead(string fileName, int& samples, float* floatBuffer)
{
    wav_hdr wavHeader;
    int headerSize = sizeof(wav_hdr), filelength = 0;

    const char* filePath;

    filePath = fileName.c_str();

    FILE* wavFile = fopen(filePath, "r");
    if (wavFile == nullptr)
    {
        fprintf(stderr, "Unable to open wave file: %s\n", filePath);
        system("pause");
    }

    //Read the header
    size_t bytesRead = fread(&wavHeader, 1, headerSize, wavFile);
    if (bytesRead > 0)
    {
        //Read the data
        uint16_t bytesPerSample = wavHeader.bitsPerSample / 8;      //Number     of bytes per sample
        uint64_t numSamples = wavHeader.ChunkSize / bytesPerSample; //How many samples are in the wav file?
        samples = numSamples;
        static const uint16_t BUFFER_SIZE = numSamples*sizeof(float);
        int8_t* buffer = new int8_t[BUFFER_SIZE];

        floatBuffer = (float*)malloc(sizeof(float)*numSamples);

        while ((bytesRead = fread(buffer, sizeof buffer[0], BUFFER_SIZE / (sizeof buffer[0]), wavFile)) > 0)
        {
        }

        for (int i = 0; i < numSamples * 4; i += 4)
        {
            floatBuffer[i / 4] = bytesToFloat(i, i + 1, i + 2, i + 3);
        }

        delete[] buffer;
        buffer = nullptr;
    }
    fclose(wavFile);
}

// find the file size
int getFileSize(FILE* inFile)
{
    int fileSize = 0;
    fseek(inFile, 0, SEEK_END);

    fileSize = ftell(inFile);

    fseek(inFile, 0, SEEK_SET);
    return fileSize;
}

float bytesToFloat(int8_t b0, int8_t b1, int8_t b2, int8_t b3)
{
    int8_t byte_array[] = { b3, b2, b1, b0 };
    float result;
    std::copy(reinterpret_cast<const char*>(&byte_array[0]),
        reinterpret_cast<const char*>(&byte_array[4]),
        reinterpret_cast<char*>(&result));
    return result;
}

Upvotes: 1

Views: 2270

Answers (1)

Csaba Toth
Csaba Toth

Reputation: 10697

WAV is a container format (type of a RIFF container). As a container it can encode any kind of codec/format which was registered with a codec on the recording machine. Each codec has a FOURCC. Even if your float conversion would be correct for a PCM (Pulse Code Modulated - meaning that the samples are recorded as they are (kinda)) format, it would fail if the encoded audio stream is not PCM. So you must make sure in your code that the AudioFormat is 1 (PCM). Sometimes this is called RAW encoding.

If it's not raw, mu-law and ADPCM codecs are not too complicated, but you rather better off requiring RAW format. If not, yo need to integrate a decoding library into your project. The way to do that largely depends on what platform you are on (Linux, Windows, Mac). In your code I don't see any hints of Windows libraries, so if you are on Linux you need to install lame and lame-dev packages (this depends on what distribution you use) read some about it's API.

Decoding depends on the actual library's API, but usually:

  1. configure up the decoding library with some meta-data you read from the container headers (if it's a stereo - that's kinda important too for your side, sampling frequency, 16 or 24 bit or what's the sampling resolution, etc.)
  2. extract out the audio stream out of the container - that's the RAW buffer, without any floating conversion, since you don't know the format of the data, it's pretty much likely compressed
  3. Pass it along the codec and let it do it's job.

After that the codec library will supply you back the RAW PCM data. And you can work off of that data.

I didn't have time to setup a test bed for this or debug it. These are general directions and stuff you have to take care of.

Upvotes: 1

Related Questions