Pratham
Pratham

Reputation: 1

Playback streamed audio chunks in my next.js application

I am creating a live transcription and audio streaming application.

The transcription streaming is working fine, but audio streaming isnt working as intended.

I am streaming a voice call to my backend. My backend in turn streams the audio chunks to my frontend via a websocket. When I try to play those audio chunks, the sound output is extremely muffled. How to properly playback these streamed audio chunks?

My current code for decoding the chunks:

import { Slider } from "@/components/ui/slider";
import { Pause, Play, Volume2 } from "lucide-react";
import { useState, useRef, useEffect } from "react";
import { Button } from "@/components/ui/button";

interface AudioPlayerProps {
  audioUrl: string | null;
  audioChunks?: string[];
  isLive: boolean;
}

// TODO: Set Record=True and fetch recording of call once the call is completed, also pass recorded audio url as prop if available
const AudioPlayer: React.FC<AudioPlayerProps> = ({ audioUrl, audioChunks, isLive }) => {
  const [isPlaying, setIsPlaying] = useState<boolean>(false);
  const [currentTime, setCurrentTime] = useState<number>(0);
  const [duration, setDuration] = useState<number>(0);
  const [volume, setVolume] = useState<number>(1);
  const audioRef = useRef<HTMLAudioElement | null>(null);
  const audioContextRef = useRef<AudioContext | null>(null);
  const sourceNodeRef = useRef<AudioBufferSourceNode | null>(null);
  const gainNodeRef = useRef<GainNode | null>(null);
  const scriptProcessorRef = useRef<ScriptProcessorNode | null>(null);

  useEffect(() => {
    if (audioRef.current) {
      audioRef.current.addEventListener('timeupdate', handleTimeUpdate);
      audioRef.current.addEventListener('loadedmetadata', handleLoadedMetadata);
    }
    if (isLive) {
      setIsPlaying(true);
    }
    return () => {
      if (audioRef.current) {
        audioRef.current.removeEventListener('timeupdate', handleTimeUpdate);
        audioRef.current.removeEventListener('loadedmetadata', handleLoadedMetadata);
      }
    };
  }, []);

  useEffect(() => {
    if (isLive && audioChunks && audioChunks.length > 0) {
      setupAudioContext();
    }
  }, [isLive, audioChunks]);

  const setupAudioContext = async () => {
    if (!audioContextRef.current) {
      audioContextRef.current = new AudioContext({ sampleRate: 44100 });
      gainNodeRef.current = audioContextRef.current.createGain();
      gainNodeRef.current.connect(audioContextRef.current.destination);

      // Create a ScriptProcessorNode for real-time processing
      scriptProcessorRef.current = audioContextRef.current.createScriptProcessor(4096, 1, 1);
      scriptProcessorRef.current.onaudioprocess = handleAudioProcess;
      scriptProcessorRef.current.connect(gainNodeRef.current);
    }

    if (audioChunks && audioChunks.length > 0) {
      const audioData = base64ToArrayBuffer(audioChunks[audioChunks.length - 1]);
      const decodedData = decodeMuLaw(audioData);

      // Create the buffer with the original sample rate
      const audioBuffer = audioContextRef.current.createBuffer(1, decodedData.length, 8000);
      audioBuffer.getChannelData(0).set(decodedData);

      if (sourceNodeRef.current) {
        sourceNodeRef.current.stop();
      }
      sourceNodeRef.current = audioContextRef.current.createBufferSource();
      sourceNodeRef.current.buffer = audioBuffer;
      sourceNodeRef.current.connect(scriptProcessorRef.current!);
      sourceNodeRef.current.start();
    }
  };

  const handleAudioProcess = (e: AudioProcessingEvent) => {
    const inputBuffer = e.inputBuffer;
    const outputBuffer = e.outputBuffer;
    const inputData = inputBuffer.getChannelData(0);
    const outputData = outputBuffer.getChannelData(0);

    // Implement a simple low-pass filter
    let lastOut = 0;
    for (let i = 0; i < inputData.length; i++) {
      lastOut = 0.7 * lastOut + 0.3 * inputData[i];
      outputData[i] = lastOut;
      // TODO: Try out outputData[i] = Math.max(-1, Math.min(1, outputData[i])); // Clamp the output to [-1, 1]

    }
  };

  const base64ToArrayBuffer = (base64: string) => {
    const binaryString = window.atob(base64);
    const len = binaryString.length;
    const bytes = new Uint8Array(len);
    for (let i = 0; i < len; i++) {
      bytes[i] = binaryString.charCodeAt(i);
    }
    return bytes;
  };

  const decodeMuLaw = (encodedData: Uint8Array): Float32Array => {
    const table = new Float32Array(256);
    for (let i = 0; i < 256; i++) {
      const input = i ^ 0xFF;
      let magnitude = ((input & 0x7F) << 5) | 0x84;
      magnitude <<= (input & 0x70) >> 4;
      let sample = magnitude - 0x84;
      if (input & 0x80) sample = -sample;
      table[i] = sample / 32768;
    }
    return new Float32Array(encodedData.length).map((_, i) => table[encodedData[i]]);
  };

  const handleTimeUpdate = () => {
    if (audioRef.current && !isLive) {
      setCurrentTime(audioRef.current.currentTime);
    }
  };

  const handleLoadedMetadata = () => {
    if (audioRef.current && !isLive) {
      setDuration(audioRef.current.duration);
    }
  };

  const togglePlayPause = () => {
    if (isLive) {
      if (audioContextRef.current?.state === 'suspended') {
        audioContextRef.current.resume();
      } else if (audioContextRef.current?.state === 'running') {
        audioContextRef.current.suspend();
      }
    } else if (audioRef.current) {
      if (isPlaying) {
        audioRef.current.pause();
      } else {
        audioRef.current.play();
      }
    }
    setIsPlaying(!isPlaying);
  };

  const handleSeek = (newTime: number) => {
    if (!isLive && audioRef.current) {
      audioRef.current.currentTime = newTime;
      setCurrentTime(newTime);
    }
  };

  const handleVolumeChange = (newVolume: number) => {
    if (isLive && gainNodeRef.current) {
      gainNodeRef.current.gain.setValueAtTime(newVolume, audioContextRef.current!.currentTime);
    } else if (audioRef.current) {
      audioRef.current.volume = newVolume;
    }
    setVolume(newVolume);
  };

  const formatTime = (time: number): string => {
    const minutes = Math.floor(time / 60);
    const seconds = Math.floor(time % 60);
    return `${minutes}:${seconds.toString().padStart(2, '0')}`;
  };

  return (
    <div className="bottom-0 left-0 right-0 bg-white border-t border-gray-200 p-4">
      {!isLive && <audio ref={audioRef} src={audioUrl as string} />}
      <div className="flex items-center justify-between">
        <div className="flex items-center space-x-4">
          <Button onClick={togglePlayPause} variant="ghost" size="icon">
            {isPlaying ? <Pause className="h-6 w-6" strokeWidth={1.5} /> : <Play className="h-6 w-6" strokeWidth={1.5} />}
          </Button>
        </div>
        <div className="flex-1 mx-4">
          <Slider
            value={[isLive ? 100 : (currentTime / duration) * 100]}
            max={100}
            step={1}
            onValueChange={(value) => !isLive && handleSeek((value[0] / 100) * duration)}
            disabled={isLive}
            className="my-2"
          />
          <div className="flex justify-between text-sm text-gray-500 mt-1">
            <span>{isLive ? 'LIVE' : formatTime(currentTime)}</span>
            <span>{isLive ? '' : formatTime(duration)}</span>
          </div>
        </div>
        <div className="flex items-center space-x-2">
          <Volume2 className="h-5 w-5" strokeWidth={1.5} />
          <Slider
            className="w-24"
            value={[volume]}
            max={1}
            step={0.01}
            onValueChange={(value) => handleVolumeChange(value[0])}
          />
        </div>
      </div>
    </div>
  );
};

export default AudioPlayer;```

Upvotes: 0

Views: 61

Answers (0)

Related Questions