Reputation: 14043
I am trying to decode the audio and draw the waveform using ffmpeg, and the input audio data is AV_SAMPLE_FMT_S16P
, basically I am following the tutorial here, and the audio is playing fine with libao. Now I need to plot the waveform using decoded data, currently I am writing left and right channel to separate csv file and plotting on excel. But the waveform is something different from the waveform shown in Audacity using the same audio clip. When I analyzed the value written on csv most of the values are close to maximum of uint16_t
(65535), but there are some other lower values, but majority is high peak.
Here is the source code,
const char* input_filename="/home/user/Music/Clip.mp3";
av_register_all();
AVFormatContext* container=avformat_alloc_context();
if(avformat_open_input(&container,input_filename,NULL,NULL)<0){
endApp("Could not open file");
}
if(avformat_find_stream_info(container, NULL)<0){
endApp("Could not find file info");
}
av_dump_format(container,0,input_filename,false);
int stream_id=-1;
int i;
for(i=0;i<container->nb_streams;i++){
if(container->streams[i]->codec->codec_type==AVMEDIA_TYPE_AUDIO){
stream_id=i;
break;
}
}
if(stream_id==-1){
endApp("Could not find Audio Stream");
}
AVDictionary *metadata=container->metadata;
AVCodecContext *ctx=container->streams[stream_id]->codec;
AVCodec *codec=avcodec_find_decoder(ctx->codec_id);
if(codec==NULL){
endApp("cannot find codec!");
}
if(avcodec_open2(ctx,codec,NULL)<0){
endApp("Codec cannot be found");
}
AVPacket packet;
av_init_packet(&packet);
//AVFrame *frame=avcodec_alloc_frame();
AVFrame *frame=av_frame_alloc();
int buffer_size=AVCODEC_MAX_AUDIO_FRAME_SIZE+ FF_INPUT_BUFFER_PADDING_SIZE;
// MSVC can't do variable size allocations on stack, ohgodwhy
uint8_t *buffer = new uint8_t[buffer_size];
packet.data=buffer;
packet.size =buffer_size;
int frameFinished=0;
int plane_size;
ofstream fileCh1,fileCh2;
fileCh1.open ("ch1.csv");
fileCh2.open ("ch2.csv");
AVSampleFormat sfmt=ctx->sample_fmt;
while(av_read_frame(container,&packet)>=0)
{
if(packet.stream_index==stream_id){
int len=avcodec_decode_audio4(ctx,frame,&frameFinished,&packet);
int data_size = av_samples_get_buffer_size(&plane_size, ctx->channels,
frame->nb_samples,
ctx->sample_fmt, 1);
if(frameFinished){
int write_p=0;
// QTime t;
switch (sfmt){
case AV_SAMPLE_FMT_S16P:
for (int nb=0;nb<plane_size/sizeof(uint16_t);nb++){
for (int ch = 0; ch < ctx->channels; ch++) {
if(ch==0)
fileCh1 <<((uint16_t *) frame->extended_data[ch])[nb]<<"\n";
else if(ch==1)
fileCh2 <<((uint16_t *) frame->extended_data[ch])[nb]<<"\n";
}
}
break;
}
} else {
DBG("frame failed");
}
}
av_free_packet(&packet);
}
fileCh1.close();
fileCh2.close();
avcodec_close(ctx);
avformat_close_input(&container);
delete buffer;
return 0;
Edit:
I have attached the waveform image draw using opencv, here I scaled the sample value to 0-255 range, and took value 127 as 0(Y-axis). Now for each sample draw line from (x,127) to (x,sample value) where x=1,2,3,...
Upvotes: 1
Views: 2576
Reputation: 9341
The problem is the cast to uint16_t
when the sample format is signed (AV_SAMPLE_FMT_S16P where the S means signed). Because of this, -1 is going to be written to the file as 2147483648 and so on.
To fix it, change the lines:
fileCh1 <<((uint16_t *) frame->extended_data[ch])[nb]<<"\n";
to:
fileCh1 <<((int16_t *) frame->extended_data[ch])[nb]<<"\n";
Upvotes: 3