Reputation: 37
My object here is to get some insight on how to lengthen a .WAV file while adding an A4 tone and a 2000hz sine wave. Just as an example, I should be able to take a 10 second long .WAV file and extend it to 30 seconds while adding an A4 tone from 10+ seconds to the 30 second mark, as well as add a 2000Hz sine wave throughout. My current code is below:
#include <iostream>
#include <fstream>
#include <stdint.h>
#include <cstdlib>
#include <ctime>
#include <math.h>
using namespace std;
//Structures for Headers
struct RIFFHeader
{
char chunkId[4]; // 4 byte character array
uint32_t chunkSize; // 4 bytes
char format[4]; // 4 byte array
};
struct FormatSubChunk
{
char chunkId[4];
uint32_t chunkSize;
uint16_t audioFormat;
uint16_t channels;
uint32_t frequency; //
uint32_t byteRate; //
uint16_t blockAlign;
uint16_t bitsPerSample;
};
struct DataSubChunkHeader
{
char chunkId[4];
uint32_t chunkSize; //
};
struct Sample
{
uint16_t leftchannel;
uint16_t rightchannel;
};
//------------------------------------------------
int main()
{
clock_t start;
double duration;
start = clock();
//Declaring input and output files
string infile = "Frederick_N_orig.wav";
string outfile = "Frederick_N_mod.wav";
ifstream in(infile.c_str(), ios::in | ios::binary);
ofstream out(outfile.c_str());
//Reading Headers
RIFFHeader RIFF;
in.read((char*)&RIFF,sizeof(RIFF));
FormatSubChunk Format;
in.read((char*)&Format,sizeof(Format));
DataSubChunkHeader Data;
in.read((char*)&Data,sizeof(Data));
RIFF.chunkSize = (RIFF.chunkSize - 46)/2 + 46;
Format.frequency = Format.frequency/2;
Format.byteRate = Format.byteRate/2;
Data.chunkSize = (Data.chunkSize-46)/2;
uint16_t clear = 0;
out.write((char*)&RIFF, sizeof(RIFF));
out.write((char*)&Format, sizeof(Format));
out.write((char*)&Data, sizeof(Data));
//---------------------------------------------
//Printing out Header information for troubleshooting
cout << "Chunk ID: " << RIFF.chunkId[0] << RIFF.chunkId[1] << RIFF.chunkId[2] << RIFF.chunkId[3] << endl;
cout << "Chunk Size: " << RIFF.chunkSize << endl;
cout << "Format: " << RIFF.format[0] << RIFF.format[1] << RIFF.format[2] << RIFF.format[3] <<endl;
cout << "Sub-chunk1 ID: " << Format.chunkId[0] << Format.chunkId[1] << Format.chunkId[2] << Format.chunkId[3] <<endl;
cout << "Sub-chunk1 Size: " << Format.chunkSize << endl;
cout << "Audio Format: " << Format.audioFormat << endl;
cout << "Number of Channels: " << Format.channels << endl;
cout << "Sample Rate: " << Format.frequency << endl;
cout << "Byte Rate: " << Format.byteRate << endl;
cout << "Block Align: " << Format.blockAlign << endl;
cout << "Bits Per Sample: " << Format.bitsPerSample << endl;
cout << "Sub-chunk2 ID: " << Data.chunkId[0] << Data.chunkId[1] << Data.chunkId[2] << Data.chunkId[3] << endl;
cout << "Sub-chunk2 Size: " << Data.chunkSize << endl << endl;
//------------------------------------------------------------------------------------------------------------------------
double p = 440;
float pie = 3.1415;
int16_t leftSample1;
int16_t leftSample2;
int16_t rightSample1;
int16_t rightSample2;
int32_t count = 0;
int n = 0;
//Reading in Left and Right Channels and performing processing
while(!in.eof())
{
in.read((char*)&leftSample1, 2);
if(in.eof())
break;
in.read((char*)&rightSample1, 2);
if(in.eof())
break;
in.read((char*)&leftSample2, 2);
if(in.eof())
break;
in.read((char*)&rightSample2, 2);
if(in.eof())
break;
const double max_amplitude = 32760; // "volume"
double hz = 22050; // samples per second
double frequency = 440; // middle C
double seconds = 21; // time
int N = hz * seconds;
double amplitude = (double)n/N * max_amplitude;
double value = sin(2*pie*p)*.25;
int32_t leftAvg = ((int32_t)leftSample1 + (int32_t)leftSample2)/2;
int32_t rightAvg = ((int32_t)rightSample1 + (int32_t)rightSample2)/2;
leftAvg = leftAvg + leftAvg*value;
rightAvg = rightAvg + rightAvg*value;
n++;
int16_t outLeft;
int16_t outRight;
if(leftAvg > 32767)
outLeft = 32767;
else if(leftAvg < -32768)
outLeft = -32768;
else
outLeft = (int16_t)leftAvg;
if(rightAvg > 32767)
outRight = 32767;
else if(rightAvg < -32768)
outRight = -32768;
else
outRight = (int16_t)rightAvg;
out.write((char*)&outLeft, sizeof(outLeft));
out.write((char*)&outRight, sizeof(outRight));
count++;
}
//--------------------------------------------------------------------------------------
//cleaing up
in.close();
out.close();
//-------------
//Reading output file and comparing to original file
string infile1 = "Frederick_N_mod.wav";
ifstream in1(infile1.c_str(), ios::in | ios::binary);
RIFFHeader riff1;
in1.read((char*)&riff1,sizeof(riff1));
FormatSubChunk format1;
in1.read((char*)&format1,sizeof(format1));
//in.ignore(2);
DataSubChunkHeader data1;
in1.read((char*)&data1,sizeof(data1));
in.close();
cout << "Chunk ID: " << riff1.chunkId[0] << riff1.chunkId[1] << riff1.chunkId[2] << riff1.chunkId[3] << endl;
cout << "Chunk Size: " << riff1.chunkSize << endl;
cout << "Format: " << riff1.format[0] << riff1.format[1] << riff1.format[2] << riff1.format[3] <<endl;
cout << "Sub-chunk1 ID: " << format1.chunkId[0] << format1.chunkId[1] << format1.chunkId[2] << format1.chunkId[3] <<endl;
cout << "Sub-chunk1 Size: " << format1.chunkSize << endl;
cout << "Audio Format: " << format1.audioFormat << endl;
cout << "Number of Channels: " << format1.channels << endl;
cout << "Sample Rate: " << format1.frequency << endl;
cout << "Byte Rate: " << format1.byteRate << endl;
cout << "Block Align: " << format1.blockAlign << endl;
cout << "Bits Per Sample: " << format1.bitsPerSample << endl;
cout << "Sub-chunk2 ID: " << data1.chunkId[0] << data1.chunkId[1] << data1.chunkId[2] << data1.chunkId[3] << endl;
cout << "Sub-chunk2 Size: " << data1.chunkSize << endl << endl;
//---------------------------------------------------------------------------------------------------------------------------------
//Computing execution time and writing summary file
duration = ( std::clock() - start ) / (double) CLOCKS_PER_SEC;
string summaryoutfile = "summary.txt";
ofstream summaryoutput(summaryoutfile.c_str());
summaryoutput << "sampling frequency = " << Format.frequency << '\n';
long recordTime = count/Format.frequency;
summaryoutput << "record time = " << recordTime << " seconds" << '\n';
summaryoutput << "execution time = " << duration << " seconds" << '\n';
//----------------------------------------------------------------------------------------------
summaryoutput.close();
return 0;
}
So, the question is, how do I go about extending the .WAV file with an A4 tone and add in a 2000hz sine wave? When I try in my code above, I get a garbled mess. Any help would be appreciated! Thanks in advance!!
Upvotes: 0
Views: 181
Reputation: 2067
There are many pieces to this question, so I'd recommend decomposing the problem into parts and validating that each of them work. I’d also recommend that you use an audio editing tool like Audacity so that you can inspect your results and verify that each step is working. Individual parts that I’d split this into:
Very rough pseudo code for this operation:
Calculate output wave file data chuck size using input wave file data chunk size + size of extension
Maintain values leftValue and rightValue variables that are the calculated output values to be written out
For each sample from start of output wave file length to end wave file length:
Looking at your current code, there are some issues I notice:
The size of your output wave file data chuck size needs to be calculated using the input wave file data chunk size + size that you are extending the audio. Also, don't assume that the length of the RIFF header prior to the Data chunk is a fixed length in your input wav file.
I’m not sure why you are reading input samples two at a time and averaging together. This in itself would garble any audio data. (Edit: I see that you are downsampling the audio with this logic now)
You are calculating sin(2*pie*p)*.25
which all have fixed values and will never change over time. You need to calculate the sine at a certain location given the current sample rate. You are then multiplying the value of the sine operation with your sample data. You would want to mix the sine data in, which should be an addition operation.
Hard limiting your resulting amplitude with expressions like
if(leftAvg > 32767)
outLeft = 32767;
else if(leftAvg < -32768)
outLeft = -32768;
else
outLeft = (int16_t)leftAvg;
Will just introduce clipping and produce bad results if the levels ever go above the limit. I'd also recommend just calculating all samples as float values of the range -1.0 to 1.0. This is a much easier format to manage when editing audio vs short values.
Edit: Added an example method that reads in a wav file and pads it with a sine tone of a specified length. Removed the downsampling code so it is simpler logic.
void padWaveFile(string sourcePath, int paddingSeconds, float sineFrequency, string destinationPath)
{
ifstream in(sourcePath, ios::binary);
ofstream out(destinationPath, ios::binary);
RIFFHeader RIFF;
in.read((char*)&RIFF, sizeof(RIFF));
FormatSubChunk Format;
in.read((char*)&Format, sizeof(Format));
DataSubChunkHeader Data;
in.read((char*)&Data, sizeof(Data));
float twoPi = 2 * 3.14159265358979f;
int sourceSampleCount = Data.chunkSize / (Format.channels * Format.bitsPerSample / 8);
int sampleRate = Format.frequency;
int paddingSampleCount = paddingSeconds * sampleRate;
int destinationSampleCount = sourceSampleCount + paddingSampleCount;
int sampleIndex = 0;
float sinePosition = 0; //Maintain position of sine for each sample
float sineStep = (twoPi * sineFrequency) / (float)sampleRate; //Sine sine step per sample at given sample rate and frequency
float sineGain = 0.5; //Attenuate sine by half so that it isnt at full volume in output
int16_t maxShort = 32767;
int16_t sineValue = 0;
out.write((char*)&RIFF, sizeof(RIFF));
out.write((char*)&Format, sizeof(Format));
uint32_t destinationChunkSize = destinationSampleCount * Format.channels * Format.bitsPerSample;
Data.chunkSize = destinationChunkSize;
out.write((char*)&Data, sizeof(Data));
int16_t inLeft;
int16_t inRight;
int16_t outLeft;
int16_t outRight;
if (Format.channels == 2) {
for (int i = 0; i < destinationSampleCount; i++)
{
outLeft = 0;
outRight = 0;
if (!in.eof())
{
in.read((char*)&inLeft, 2);
in.read((char*)&inRight, 2);
outLeft = inLeft;
outRight = inRight;
}
else {
sineValue = sin(sinePosition) * sineGain * maxShort;
outLeft = sineValue;
outRight = sineValue;
sinePosition += sineStep;
}
out.write((char*)&outLeft, sizeof(outLeft));
out.write((char*)&outRight, sizeof(outRight));
sampleIndex++;
}
}
}
Upvotes: 1