opus and NAudio streaming out of sync

Question

I am adding voip in the game and since Unity's Microphone class is not supported in Web_GL and is already slow and gives floats instead of bytes. Now some people suggested me to use codec i.e Opus and then I found its wrapper along with its demo which used NAudio, well I was fairly happy with it, it was using some extra loops which after removing also gave the same result but anyway it also gave 4000 bytes with 48k sample rate which I reduced to 8k and max buffer size to 350. Here's the code for that script

private void Start()
{
    //StartEncoding();
    UnityEditor.EditorApplication.playmodeStateChanged = PlayModeStateChangedHandler;
}

private void PlayModeStateChangedHandler()
{
    if (UnityEditor.EditorApplication.isPaused)
    {
        StopEncoding();
    }
}

public void StartGame()
{
    StartEncoding();
}

private void StartEncoding()
{
    _client = FindObjectOfType();
    _client.AudioReceivers += UpdateAudioOutput;
    _startTime = DateTime.Now;
    _bytesSent = 0;
    _segmentFrames = 160;
    _encoder = OpusEncoder.Create(8000, 1, FragLabs.Audio.Codecs.Opus.Application.Voip);
    _encoder.MaxDataBytes = 350;
    _encoder.Bitrate = 4000;
    _decoder = OpusDecoder.Create(8000, 1);
    _decoder.MaxDataBytes = 175;
    _bytesPerSegment = _encoder.FrameByteCount(_segmentFrames);

    _waveIn = new WaveIn(WaveCallbackInfo.FunctionCallback());
    _waveIn.BufferMilliseconds = 50;
    _waveIn.DeviceNumber = 0;
    _waveIn.DataAvailable += _waveIn_DataAvailable;
    _waveIn.WaveFormat = new WaveFormat(8000, 16, 1);

    _playBuffer = new BufferedWaveProvider(new WaveFormat(8000, 16, 1));
    _playBuffer.DiscardOnBufferOverflow = true;

    _waveOut = new WaveOut(WaveCallbackInfo.FunctionCallback());
    _waveOut.DeviceNumber = 0;
    _waveOut.Init(_playBuffer);

    _waveOut.Play();
    _waveIn.StartRecording();

    if (_timer == null)
    {
        _timer = new Timer();
        _timer.Interval = 1000;
        _timer.Elapsed += _timer_Tick;
    }
    _timer.Start();
}

private void _timer_Tick(object sender, EventArgs e)
{
    var timeDiff = DateTime.Now - _startTime;
    var bytesPerSecond = _bytesSent / timeDiff.TotalSeconds;
}

byte[] _notEncodedBuffer = new byte[0];
private void _waveIn_DataAvailable(object sender, WaveInEventArgs e)
{
    byte[] soundBuffer = new byte[e.BytesRecorded + _notEncodedBuffer.Length];
    for (int i = 0; i < _notEncodedBuffer.Length; i++)
        soundBuffer[i] = _notEncodedBuffer[i];
    for (int i = 0; i < e.BytesRecorded; i++)
        soundBuffer[i + _notEncodedBuffer.Length] = e.Buffer[i];

    int byteCap = _bytesPerSegment;
    int segmentCount = (int)Math.Floor((decimal)soundBuffer.Length / byteCap);
    int segmentsEnd = segmentCount * byteCap;
    int notEncodedCount = soundBuffer.Length - segmentsEnd;
    _notEncodedBuffer = new byte[notEncodedCount];
    for (int i = 0; i < notEncodedCount; i++)
    {
        _notEncodedBuffer[i] = soundBuffer[segmentsEnd + i];
    }

    for (int i = 0; i < segmentCount; i++)
    {
        byte[] segment = new byte[byteCap];
        for (int j = 0; j < segment.Length; j++)
            segment[j] = soundBuffer[(i * byteCap) + j];
        int len;
        byte[] buff = _encoder.Encode(segment, segment.Length, out len);
        SendToServer(buff, len);
    }
}

public void UpdateAudioOutput(byte[] ba, int len)
{
    int outlen = len;
    byte[] buff = new byte[len];

    buff = _decoder.Decode(ba, outlen, out outlen);
    _playBuffer.AddSamples(buff, 0, outlen);
}

private void SendToServer(byte[] EncodedAudio, int Length)
{
    print("SENDING AUDIO");
    //print("audio length : " + EncodedAudio.Length);
    _client.Send(EncodedAudio, Length);
    //UpdateAudioOutput(EncodedAudio, Length);
}

private void StopEncoding()
{
    _timer.Stop();
    _waveIn.StopRecording();
    _waveIn.Dispose();
    _waveIn = null;
    _waveOut.Stop();
    _waveOut.Dispose();
    _waveOut = null;
    _playBuffer = null;
    _encoder.Dispose();
    _encoder = null;
    _decoder.Dispose();
    _decoder = null;
}

private void OnApplicationQuit()
{
    StopEncoding();
}

Now here is the tcp send and receive, they are pretty much same for the client and the server

public void Send(byte[] data, int customParamLen = 0)
{
    if (!socketReady)
    {
        return;
    }

    byte messageType = (1 << 3); // assume that 0000 1000 would be the Message type
    byte[] message = data;
    byte[] length = BitConverter.GetBytes(message.Length);
    byte[] customParam = BitConverter.GetBytes(customParamLen); //length also 4/sizeof(int)
    byte[] buffer = new byte[sizeof(int) + message.Length + 1 + customParam.Length];
    buffer[0] = messageType;

    //Enter length in the buffer
    for (int i = 0; i < sizeof(int); i++)
    {
        buffer[i + 1] = length[i];
    }

    //Enter data in the buffer
    for (int i = 0; i < message.Length; i++)
    {
        buffer[i + 1 + sizeof(int)] = message[i];
    }

    //Enter custom Param in the buffer
    for (int i = 0; i < sizeof(int); i++)
    {
        buffer[i + 1 + sizeof(int) + message.Length] = customParam[i];
    }

    heavyStream.Write(buffer, 0, buffer.Length);

    print("Writtin bytes");
}

if (heavyStream.DataAvailable)
        {
            print("Data Receiving YAY!");

            //Get message Type
            byte messageType = (byte)heavyStream.ReadByte();

            //Get length of the Data
            byte[] lengthBuffer = new byte[sizeof(int)];
            int recv = heavyStream.Read(lengthBuffer, 0, lengthBuffer.Length);

            if (recv == sizeof(int))
            {
                int messageLen = BitConverter.ToInt32(lengthBuffer, 0);

                //Get the Data
                byte[] messageBuffer = new byte[messageLen];
                recv = heavyStream.Read(messageBuffer, 0, messageBuffer.Length);

                if (recv == messageLen)
                {
                    // messageBuffer contains the whole message ...

                    //Get length paramater needed for opus to decode
                    byte[] customParamAudioLen = new byte[sizeof(int)];
                    recv = heavyStream.Read(customParamAudioLen, 0, customParamAudioLen.Length);

                    if (recv == sizeof(int))
                    {
                        AudioReceivers(messageBuffer, BitConverter.ToInt32(customParamAudioLen, 0) - 5);
                        print("Done! Everything went straight as planned");
                    }
                }
            }

Now the problem is that the audio is choppy and has gaps in them, as the time flies the more out of sync it becomes.

UPDATE

Still not fixed.

Ciaran Fisher · Accepted Answer

It looks like you're just sending audio straight out with no jitter buffer on the receiving end. This means if you have any variability in latency you'll start to hear gaps.

What you need to do is buffer audio on the client side - until you have a good amount, say 400ms, then start playing. That gives you a buffer of extra time to account for jitter.

This is a very naive approach, but gives you something to play with - you'll probably want to look at adaptive jitter buffers, and probably switch to UDP instead of TCP to get better performance. With UDP you will need to deal with lost packets, out of order etc.

Have a look at Speex which has a Jitter Buffer https://github.com/xiph/speex or Mumble which uses Speex for VOIP https://github.com/mumble-voip/mumble

opus and NAudio streaming out of sync

Answers (1)

Related Questions