nwf1115
nwf1115

Reputation: 37

Extending .WAV file with A4 Tone

My object here is to get some insight on how to lengthen a .WAV file while adding an A4 tone and a 2000hz sine wave. Just as an example, I should be able to take a 10 second long .WAV file and extend it to 30 seconds while adding an A4 tone from 10+ seconds to the 30 second mark, as well as add a 2000Hz sine wave throughout. My current code is below:

#include <iostream>
#include <fstream>
#include <stdint.h>
#include <cstdlib>
#include <ctime>
#include <math.h>

using namespace std;
//Structures for Headers
struct RIFFHeader
{
  char    chunkId[4]; // 4 byte character array
  uint32_t chunkSize; // 4 bytes
  char    format[4]; // 4 byte array 
};

struct FormatSubChunk
{
    char chunkId[4];
    uint32_t chunkSize; 
    uint16_t audioFormat;
    uint16_t channels;
    uint32_t frequency; //
    uint32_t byteRate;  //
    uint16_t blockAlign;
    uint16_t bitsPerSample;
};

struct DataSubChunkHeader
{
   char chunkId[4];
   uint32_t chunkSize; //
};

struct Sample
{
   uint16_t leftchannel;
   uint16_t rightchannel;
};
//------------------------------------------------



int main()
{

clock_t start;
double duration;
start = clock();

//Declaring input and output files
string infile = "Frederick_N_orig.wav";
string outfile = "Frederick_N_mod.wav";
ifstream in(infile.c_str(), ios::in | ios::binary);
ofstream out(outfile.c_str());

//Reading Headers
RIFFHeader RIFF;
in.read((char*)&RIFF,sizeof(RIFF));


FormatSubChunk Format;
in.read((char*)&Format,sizeof(Format));



DataSubChunkHeader Data;
in.read((char*)&Data,sizeof(Data));



RIFF.chunkSize = (RIFF.chunkSize - 46)/2 + 46;
Format.frequency = Format.frequency/2;
Format.byteRate = Format.byteRate/2;
Data.chunkSize = (Data.chunkSize-46)/2;


uint16_t clear = 0;

out.write((char*)&RIFF, sizeof(RIFF));
out.write((char*)&Format, sizeof(Format));
out.write((char*)&Data, sizeof(Data));
//---------------------------------------------

//Printing out Header information for troubleshooting
cout << "Chunk ID: " << RIFF.chunkId[0] << RIFF.chunkId[1] << RIFF.chunkId[2] << RIFF.chunkId[3] << endl;
cout << "Chunk Size: " << RIFF.chunkSize << endl;
cout << "Format: " << RIFF.format[0] << RIFF.format[1] << RIFF.format[2] << RIFF.format[3] <<endl;
cout << "Sub-chunk1 ID: " << Format.chunkId[0] << Format.chunkId[1] << Format.chunkId[2] << Format.chunkId[3] <<endl;
cout << "Sub-chunk1 Size: " << Format.chunkSize << endl;
cout << "Audio Format: " << Format.audioFormat << endl;
cout << "Number of Channels: " << Format.channels << endl;
cout << "Sample Rate: " << Format.frequency << endl;
cout << "Byte Rate: " << Format.byteRate << endl;
cout << "Block Align: " << Format.blockAlign << endl;
cout << "Bits Per Sample: " << Format.bitsPerSample << endl;
cout << "Sub-chunk2 ID: " << Data.chunkId[0] << Data.chunkId[1] << Data.chunkId[2] << Data.chunkId[3] << endl;
cout << "Sub-chunk2 Size: " << Data.chunkSize << endl << endl;
//------------------------------------------------------------------------------------------------------------------------
double p = 440;
float pie = 3.1415;


int16_t leftSample1;
int16_t leftSample2;
int16_t rightSample1;
int16_t rightSample2;
int32_t count = 0;

int n = 0;

//Reading in Left and Right Channels and performing processing
while(!in.eof())
{

    in.read((char*)&leftSample1, 2);
    if(in.eof())
        break;

    in.read((char*)&rightSample1, 2);
    if(in.eof())
        break;

    in.read((char*)&leftSample2, 2);
    if(in.eof())
        break;

    in.read((char*)&rightSample2, 2);
    if(in.eof())
        break;      

    const double max_amplitude = 32760;  // "volume"

    double hz        = 22050;    // samples per second
    double frequency = 440;  // middle C
    double seconds   = 21;      // time

    int N = hz * seconds;
    double amplitude = (double)n/N * max_amplitude;
    double value = sin(2*pie*p)*.25;

    int32_t leftAvg = ((int32_t)leftSample1 + (int32_t)leftSample2)/2;
    int32_t rightAvg = ((int32_t)rightSample1 + (int32_t)rightSample2)/2;
    leftAvg = leftAvg + leftAvg*value;
    rightAvg = rightAvg + rightAvg*value;

    n++;


    int16_t outLeft;
    int16_t outRight;

    if(leftAvg > 32767)
        outLeft = 32767;
    else if(leftAvg < -32768)
        outLeft = -32768;
    else
        outLeft = (int16_t)leftAvg;

    if(rightAvg > 32767)
        outRight = 32767;
    else if(rightAvg < -32768)
        outRight = -32768;
    else
        outRight = (int16_t)rightAvg;

    out.write((char*)&outLeft, sizeof(outLeft));
    out.write((char*)&outRight, sizeof(outRight));
    count++;

}
//--------------------------------------------------------------------------------------    
//cleaing up
in.close();
out.close();
//-------------

//Reading output file and comparing to original file
string infile1 = "Frederick_N_mod.wav";
ifstream in1(infile1.c_str(), ios::in | ios::binary);

RIFFHeader riff1;
in1.read((char*)&riff1,sizeof(riff1));


FormatSubChunk format1;
in1.read((char*)&format1,sizeof(format1));


//in.ignore(2);

DataSubChunkHeader data1;
in1.read((char*)&data1,sizeof(data1));
in.close();

cout << "Chunk ID: " << riff1.chunkId[0] << riff1.chunkId[1] << riff1.chunkId[2] << riff1.chunkId[3] << endl;
cout << "Chunk Size: " << riff1.chunkSize << endl;
cout << "Format: " << riff1.format[0] << riff1.format[1] << riff1.format[2] << riff1.format[3] <<endl;
cout << "Sub-chunk1 ID: " << format1.chunkId[0] << format1.chunkId[1] << format1.chunkId[2] << format1.chunkId[3] <<endl;
cout << "Sub-chunk1 Size: " << format1.chunkSize << endl;
cout << "Audio Format: " << format1.audioFormat << endl;
cout << "Number of Channels: " << format1.channels << endl;
cout << "Sample Rate: " << format1.frequency << endl;
cout << "Byte Rate: " << format1.byteRate << endl;
cout << "Block Align: " << format1.blockAlign << endl;
cout << "Bits Per Sample: " << format1.bitsPerSample << endl;
cout << "Sub-chunk2 ID: " << data1.chunkId[0] << data1.chunkId[1] << data1.chunkId[2] << data1.chunkId[3] << endl;
cout << "Sub-chunk2 Size: " << data1.chunkSize << endl << endl;
//---------------------------------------------------------------------------------------------------------------------------------

//Computing execution time and writing summary file
duration = ( std::clock() - start ) / (double) CLOCKS_PER_SEC;

string summaryoutfile = "summary.txt";
ofstream summaryoutput(summaryoutfile.c_str());
summaryoutput << "sampling frequency = " << Format.frequency << '\n';
long recordTime = count/Format.frequency;
summaryoutput << "record time = " << recordTime << " seconds" << '\n';
summaryoutput << "execution time = " << duration << " seconds" << '\n';
//----------------------------------------------------------------------------------------------

summaryoutput.close();

return 0;
}

So, the question is, how do I go about extending the .WAV file with an A4 tone and add in a 2000hz sine wave? When I try in my code above, I get a garbled mess. Any help would be appreciated! Thanks in advance!!

Upvotes: 0

Views: 181

Answers (1)

Ehz
Ehz

Reputation: 2067

There are many pieces to this question, so I'd recommend decomposing the problem into parts and validating that each of them work. I’d also recommend that you use an audio editing tool like Audacity so that you can inspect your results and verify that each step is working. Individual parts that I’d split this into:

  1. Does the code correctly input a wav file and output it identically if you do not do any edits?
  2. Can you then extend the wav file with silence of the duration specified?
  3. Can you generate a sine tone at a specific frequency and sample rate?
  4. Can you mix together the input wave data and any sine tones generated?

Very rough pseudo code for this operation:

Calculate output wave file data chuck size using input wave file data chunk size + size of extension

Maintain values leftValue and rightValue variables that are the calculated output values to be written out

For each sample from start of output wave file length to end wave file length:

  • Set current leftValue and rightValues to 0
  • If there is still remaining input file data, read the left and right input sample values into leftValue and rightValue
  • If there are any sine tones to be generated, calculate their values
    at the given position
  • Add the generated sine data for the current position to leftValue and rightValue
  • If more than one source of data is getting added together, may need
    to multiply by a certain gain amount so the audio does not clip
  • Write leftValue and rightValue to output wav file

Looking at your current code, there are some issues I notice:

The size of your output wave file data chuck size needs to be calculated using the input wave file data chunk size + size that you are extending the audio. Also, don't assume that the length of the RIFF header prior to the Data chunk is a fixed length in your input wav file.

I’m not sure why you are reading input samples two at a time and averaging together. This in itself would garble any audio data. (Edit: I see that you are downsampling the audio with this logic now)

You are calculating sin(2*pie*p)*.25 which all have fixed values and will never change over time. You need to calculate the sine at a certain location given the current sample rate. You are then multiplying the value of the sine operation with your sample data. You would want to mix the sine data in, which should be an addition operation.

Hard limiting your resulting amplitude with expressions like

if(leftAvg > 32767)
        outLeft = 32767;
    else if(leftAvg < -32768)
        outLeft = -32768;
    else
        outLeft = (int16_t)leftAvg;

Will just introduce clipping and produce bad results if the levels ever go above the limit. I'd also recommend just calculating all samples as float values of the range -1.0 to 1.0. This is a much easier format to manage when editing audio vs short values.

Edit: Added an example method that reads in a wav file and pads it with a sine tone of a specified length. Removed the downsampling code so it is simpler logic.

void padWaveFile(string sourcePath, int paddingSeconds, float sineFrequency, string destinationPath)
{
    ifstream in(sourcePath, ios::binary);
    ofstream out(destinationPath, ios::binary);

    RIFFHeader RIFF;    
    in.read((char*)&RIFF, sizeof(RIFF));

    FormatSubChunk Format;
    in.read((char*)&Format, sizeof(Format));

    DataSubChunkHeader Data;
    in.read((char*)&Data, sizeof(Data));

    float twoPi = 2 * 3.14159265358979f;

    int sourceSampleCount = Data.chunkSize / (Format.channels * Format.bitsPerSample / 8);
    int sampleRate = Format.frequency;
    int paddingSampleCount = paddingSeconds * sampleRate;
    int destinationSampleCount = sourceSampleCount + paddingSampleCount;    

    int sampleIndex = 0;
    float sinePosition = 0;                                         //Maintain position of sine for each sample
    float sineStep = (twoPi * sineFrequency) / (float)sampleRate;   //Sine sine step per sample at given sample rate and frequency
    float sineGain = 0.5;                                           //Attenuate sine by half so that it isnt at full volume in output
    int16_t maxShort = 32767;
    int16_t sineValue = 0;

    out.write((char*)&RIFF, sizeof(RIFF));
    out.write((char*)&Format, sizeof(Format));

    uint32_t destinationChunkSize = destinationSampleCount * Format.channels * Format.bitsPerSample;
    Data.chunkSize = destinationChunkSize;
    out.write((char*)&Data, sizeof(Data));

    int16_t inLeft;
    int16_t inRight;
    int16_t outLeft;
    int16_t outRight;

    if (Format.channels == 2) {
        for (int i = 0; i < destinationSampleCount; i++)
        {
            outLeft = 0;
            outRight = 0;

            if (!in.eof())
            {
                in.read((char*)&inLeft, 2);
                in.read((char*)&inRight, 2);

                outLeft = inLeft;
                outRight = inRight;
            }
            else {
                sineValue = sin(sinePosition) * sineGain * maxShort;

                outLeft = sineValue;
                outRight = sineValue;

                sinePosition += sineStep;
            }

            out.write((char*)&outLeft, sizeof(outLeft));
            out.write((char*)&outRight, sizeof(outRight));

            sampleIndex++;
        }
    }
}

Upvotes: 1

Related Questions