Reputation: 25
I'm working on a voice chat and I need to compress my audio data. I record and play the audio data via the Qt Framework. If I record and play the audio data without compressing it everything is fine. If I compress,decompress and play the audio data I just hear a cracking sound.
Edit: I had a look at the demo code and I tried to use that code. I can hear something but it is very laggy. If I increase the size of pcm_bytes to e.g 40000 it sounds better but my voice has still lags and cracking sounds.
This is the line (audioinput.cpp at the bottom):
speaker->write((const char*)pcm_bytes,3840);
codecopus.cpp:
#include "codecopus.h"
CodecOpus::CodecOpus()
{
}
void CodecOpus::initDecoder(opus_int32 samplingRate, int channels) //decoder
{
int error;
decoderState = opus_decoder_create(samplingRate,channels,&error);
if(error == OPUS_OK){
std::cout << "Created Opus Decoder struct" << std::endl;
}
}
void CodecOpus::initEncoder(opus_int32 samplingRate, int channels) // Encoder
{
int error;
encoderState = opus_encoder_create(samplingRate,channels,OPUS_APPLICATION_VOIP,&error);
error = opus_encoder_ctl(encoderState,OPUS_SET_BITRATE(64000));
if(error == OPUS_OK){
std::cout << "Created Opus Encoder struct" << std::endl;
}
}
opus_int32 CodecOpus::encodeData(const opus_int16 *pcm, int frameSize, unsigned char *data, opus_int32 maxDataBytes) //Encoder
{
opus_int32 i = opus_encode(encoderState,pcm,frameSize,data,maxDataBytes);
return i;
}
int CodecOpus::decodeData(const unsigned char *data, opus_int32 numberOfBytes,opus_int16* pcm,int frameSizeInSec) //Decoder
{
int i = opus_decode(decoderState,data,numberOfBytes,pcm,frameSizeInSec,0);
return i;
}
CodecOpus::~CodecOpus()
{
opus_decoder_destroy(this->decoderState);
opus_encoder_destroy(this->encoderState);
}
audioinput.h:
#ifndef AUDIOINPUT_H
#define AUDIOINPUT_H
#include <QAudioFormat>
#include <iostream>
#include <QAudioInput>
#include <QAudioOutput>
#include <thread>
#include "codecopus.h"
#include "QDebug"
class AudioInput : public QObject
{
Q_OBJECT
public:
AudioInput();
~AudioInput();
void startRecording();
void CreateNewAudioThread();
private:
CodecOpus opus;
unsigned char cbits[4000] = {};
opus_int16 in[960*2*sizeof(opus_int16)] = {};
opus_int16 out[5760*2] = {};
unsigned char *pcm_bytes;
int MAX_FRAME_SIZE;
QAudioFormat audioFormat;
QAudioInput *audioInput;
QIODevice *mic;
QByteArray data;
int micFrameSize;
QAudioOutput *audioOutput;
QIODevice *speaker;
QAudioFormat speakerAudioFormat;
public slots:
void OnAudioNotfiy();
};
#endif // AUDIOINPUT_H
audioinput.cpp:
#include "audioinput.h"
AudioInput::AudioInput() : audioFormat(),pcm_bytes(new unsigned char[40000])
{
audioFormat.setSampleRate(48000);
audioFormat.setChannelCount(2);
audioFormat.setSampleSize(16);
audioFormat.setSampleType(QAudioFormat::SignedInt);
audioFormat.setByteOrder(QAudioFormat::LittleEndian);
audioFormat.setCodec("audio/pcm");
speakerAudioFormat.setSampleRate(48000);
speakerAudioFormat.setChannelCount(2);
speakerAudioFormat.setSampleSize(16);
speakerAudioFormat.setSampleType(QAudioFormat::SignedInt);
speakerAudioFormat.setByteOrder(QAudioFormat::LittleEndian);
speakerAudioFormat.setCodec("audio/pcm");
QAudioDeviceInfo info = QAudioDeviceInfo::defaultInputDevice();
if(!info.isFormatSupported(audioFormat)){
std::cout << "Mic Format not supported!" << std::endl;
audioFormat = info.nearestFormat(audioFormat);
}
QAudioDeviceInfo speakerInfo = QAudioDeviceInfo::defaultOutputDevice();
if(!speakerInfo.isFormatSupported(speakerAudioFormat)){
std::cout << "Speaker Format is not supported!" << std::endl;
speakerAudioFormat = info.nearestFormat(speakerAudioFormat);
}
std::cout << speakerAudioFormat.sampleRate() << audioFormat.sampleRate() << speakerAudioFormat.channelCount() << audioFormat.channelCount() << std::endl;
audioInput = new QAudioInput(audioFormat);
audioOutput = new QAudioOutput(speakerAudioFormat);
audioInput->setNotifyInterval(20);
micFrameSize = (audioFormat.sampleRate()/1000)*20;
opus.initEncoder(audioFormat.sampleRate(),audioFormat.channelCount());
opus.initDecoder(speakerAudioFormat.sampleRate(),speakerAudioFormat.channelCount());
MAX_FRAME_SIZE = 6*960;
connect(audioInput,SIGNAL(notify()),this,SLOT(OnAudioNotfiy()));
}
AudioInput::~AudioInput()
{
}
void AudioInput::startRecording()
{
mic = audioInput->start();
speaker = audioOutput->start();
std::cout << "Recording started!" << std::endl;
}
void AudioInput::CreateNewAudioThread()
{
std::thread t1(&AudioInput::startRecording,this);
t1.detach();
}
void AudioInput::OnAudioNotfiy()
{
data = mic->readAll();
std::cout << "data size" <<data.size() << std::endl;
if(data.size() > 0){
pcm_bytes = reinterpret_cast<unsigned char*>(data.data());
//convert
for(int i=0;i<2*960;i++){ //TODO HARDCODED
in[i]=pcm_bytes[2*i+1]<<8|pcm_bytes[2*i];
}
opus_int32 compressedBytes = opus.encodeData(in,960,cbits,4000);
opus_int32 decompressedBytes = opus.decodeData(cbits,compressedBytes,out,MAX_FRAME_SIZE);
for(int i = 0; i<2*decompressedBytes;i++) //TODO HARDCODED
{
pcm_bytes[2*i]=out[i]&0xFF;
pcm_bytes[2*i+1]=(out[i]>>8)&0xFF;
}
speaker->write((const char*)pcm_bytes,3840);
}
}
Upvotes: 1
Views: 5807
Reputation: 721
Qt6.2 opus encode/decode Ubuntu 22. No need convert byte order. I Modify code from upper post.
void audio_rec::processData()
{
QByteArray audio_buffer;
audio_buffer.resize(FRAME_SIZE*OPUS_INT_SIZE);
opus_int16 input_frame[FRAME_SIZE] = {};
opus_int16 output_frame[FRAME_SIZE] = {};
unsigned char compressed_frame[MAX_FRAME_SIZE] = {};
unsigned char decompressed_frame[FRAME_SIZE*OPUS_INT_SIZE] = {};
if(m_io->read(audio_buffer.data(), FRAME_SIZE*OPUS_INT_SIZE) > 0){
opus_int32 compressedBytes = opus_encode(m_encoder, reinterpret_cast<const qint16*>(audio_buffer.data()), FRAME_SIZE, compressed_frame, MAX_FRAME_SIZE);
qDebug() << "Compressed bytes" << compressedBytes;
QByteArray decoded_array;
decoded_array.resize(FRAME_SIZE*OPUS_INT_SIZE);
opus_int32 decompressedBytes = opus_decode(m_decoder, compressed_frame, compressedBytes, reinterpret_cast<qint16*>(decoded_array.data()), FRAME_SIZE, 0);
qDebug() << "Decompressed bytes" << decompressedBytes;
m_iout->write(decoded_array);
}
}
Upvotes: 1
Reputation: 11
1)You encode only 960 bytes, while the buffer is much larger. You must split the buffer into several equal parts and pass them to the encoder. The size of the part is 120, 240, 480, 960, 1920, and 2880.
2)Use qFromLittleEndian()/qToLittleEndian() functions or type casting when converting from char array to opus_int16 array/from opus_int16 array to char array. This will prevent cracking and poor sound quality.
Example:
void voice::slot_read_audio_input()
{
// Audio settings:
// Sample Rate=48000
// Sample Size=16
// Channel Count=1
// Byte Order=Little Endian
// Sample Type= UnSignedInt
// Encoder settings:
// Sample Rate=48000
// Channel Count=1
// OPUS_APPLICATION_VOIP
// Decoder settings:
// Sample Rate=48000
// Channel Count=1
QByteArray audio_buffer;//mic
QByteArray output_audio_buffer;//speaker
int const OPUS_INT_SIZE=2;//sizeof(opus_int16)
int const FRAME_SIZE=960;
int const MAX_FRAME_SIZE=1276;
int FRAME_COUNT=3840/FRAME_SIZE/OPUS_INT_SIZE;// 3840 is a sample size= voice_input->bytesReady;
opus_int16 input_frame[FRAME_SIZE] = {};
opus_int16 output_frame[FRAME_SIZE] = {};
unsigned char compressed_frame[MAX_FRAME_SIZE] = {};
unsigned char decompressed_frame[FRAME_SIZE*OPUS_INT_SIZE] = {};
audio_buffer.resize(voice_input->bytesReady());
output_audio_buffer.resize(FRAME_SIZE*OPUS_INT_SIZE);
input->read(audio_buffer.data(),audio_buffer.size());
for(int i=0;i<FRAME_COUNT;i++)
{
// convert from LittleEndian
for(int j=0;j<FRAME_SIZE;j++)
{
input_frame[j]=qFromLittleEndian<opus_int16>(audio_buffer.data()+j*OPUS_INT_SIZE);
// or use this:
// input_frame[j]=static_cast<short>(static_cast<unsigned char>(audio_buffer.at(OPUS_INT_SIZE*j+1))<<8|static_cast<unsigned char>(audio_buffer.at(OPUS_INT_SIZE*j)));
}
opus_int32 compressedBytes = opus_encode(enc, input_frame,FRAME_SIZE,compressed_frame,MAX_FRAME_SIZE);
opus_int32 decompressedBytes = opus_decode(dec,compressed_frame,compressedBytes,output_frame,FRAME_SIZE,0);
// conver to LittleEndian
for(int j = 0; j<decompressedBytes;j++)
{
qToLittleEndian(output_frame[j],output_audio_buffer.data()+j*OPUS_INT_SIZE);
// or use this:
// decompressed_frame[OPUS_INT_SIZE*j]=output_frame[j]&0xFF;
// decompressed_frame[OPUS_INT_SIZE*j+1]=(output_frame[j]>>8)&0xFF;
}
audio_buffer.remove(0,FRAME_SIZE*OPUS_INT_SIZE);
output->write(output_audio_buffer,FRAME_SIZE*OPUS_INT_SIZE);
// or use this:
// output->write(reinterpret_cast<char*>(decompressed_frame),FRAME_SIZE*OPUS_INT_SIZE);
}
}
Upvotes: 1
Reputation: 305
I had a long answer ready about how you are misinterpreting the return value of opus.decodeData as the number of bytes, where the correct interpretation is "number of decoded samples per channel". But it still looks like you account for that in the byte conversion routine later on. So I'm not precisely sure where the bug is.
In general I think you are making the conversion from unsigned char <-> int16 more complicated than it needs to be. You should be able to just pass the audio buffer directly to / from opus and reinterpret its pointer to the needed type inline, without having to manually do bit manipulations to convert and copy between different buffers. The audio device should give you little-endian data but if there is a mismatch you can do a basic byte swapping routine
for (int c = 0; c < numSamples; c++)
{
unsigned char tmp = data[2 * c];
data[2 * c] = data[2 * c + 1];
data[2 * c + 1] = tmp;
}
I don't see it here but I assume you also have code to only consume exactly 960 samples at a time from the mic and keep the rest in the buffer for the next frame, otherwise you'll drop data.
Not that it matters much, but you can also replace 4000 in cbits with 1275, which is the maximum opus packet size.
Upvotes: 0