Reputation: 1
I am creating a live transcription and audio streaming application.
The transcription streaming is working fine, but audio streaming isnt working as intended.
I am streaming a voice call to my backend. My backend in turn streams the audio chunks to my frontend via a websocket. When I try to play those audio chunks, the sound output is extremely muffled. How to properly playback these streamed audio chunks?
My current code for decoding the chunks:
import { Slider } from "@/components/ui/slider";
import { Pause, Play, Volume2 } from "lucide-react";
import { useState, useRef, useEffect } from "react";
import { Button } from "@/components/ui/button";
interface AudioPlayerProps {
audioUrl: string | null;
audioChunks?: string[];
isLive: boolean;
}
// TODO: Set Record=True and fetch recording of call once the call is completed, also pass recorded audio url as prop if available
const AudioPlayer: React.FC<AudioPlayerProps> = ({ audioUrl, audioChunks, isLive }) => {
const [isPlaying, setIsPlaying] = useState<boolean>(false);
const [currentTime, setCurrentTime] = useState<number>(0);
const [duration, setDuration] = useState<number>(0);
const [volume, setVolume] = useState<number>(1);
const audioRef = useRef<HTMLAudioElement | null>(null);
const audioContextRef = useRef<AudioContext | null>(null);
const sourceNodeRef = useRef<AudioBufferSourceNode | null>(null);
const gainNodeRef = useRef<GainNode | null>(null);
const scriptProcessorRef = useRef<ScriptProcessorNode | null>(null);
useEffect(() => {
if (audioRef.current) {
audioRef.current.addEventListener('timeupdate', handleTimeUpdate);
audioRef.current.addEventListener('loadedmetadata', handleLoadedMetadata);
}
if (isLive) {
setIsPlaying(true);
}
return () => {
if (audioRef.current) {
audioRef.current.removeEventListener('timeupdate', handleTimeUpdate);
audioRef.current.removeEventListener('loadedmetadata', handleLoadedMetadata);
}
};
}, []);
useEffect(() => {
if (isLive && audioChunks && audioChunks.length > 0) {
setupAudioContext();
}
}, [isLive, audioChunks]);
const setupAudioContext = async () => {
if (!audioContextRef.current) {
audioContextRef.current = new AudioContext({ sampleRate: 44100 });
gainNodeRef.current = audioContextRef.current.createGain();
gainNodeRef.current.connect(audioContextRef.current.destination);
// Create a ScriptProcessorNode for real-time processing
scriptProcessorRef.current = audioContextRef.current.createScriptProcessor(4096, 1, 1);
scriptProcessorRef.current.onaudioprocess = handleAudioProcess;
scriptProcessorRef.current.connect(gainNodeRef.current);
}
if (audioChunks && audioChunks.length > 0) {
const audioData = base64ToArrayBuffer(audioChunks[audioChunks.length - 1]);
const decodedData = decodeMuLaw(audioData);
// Create the buffer with the original sample rate
const audioBuffer = audioContextRef.current.createBuffer(1, decodedData.length, 8000);
audioBuffer.getChannelData(0).set(decodedData);
if (sourceNodeRef.current) {
sourceNodeRef.current.stop();
}
sourceNodeRef.current = audioContextRef.current.createBufferSource();
sourceNodeRef.current.buffer = audioBuffer;
sourceNodeRef.current.connect(scriptProcessorRef.current!);
sourceNodeRef.current.start();
}
};
const handleAudioProcess = (e: AudioProcessingEvent) => {
const inputBuffer = e.inputBuffer;
const outputBuffer = e.outputBuffer;
const inputData = inputBuffer.getChannelData(0);
const outputData = outputBuffer.getChannelData(0);
// Implement a simple low-pass filter
let lastOut = 0;
for (let i = 0; i < inputData.length; i++) {
lastOut = 0.7 * lastOut + 0.3 * inputData[i];
outputData[i] = lastOut;
// TODO: Try out outputData[i] = Math.max(-1, Math.min(1, outputData[i])); // Clamp the output to [-1, 1]
}
};
const base64ToArrayBuffer = (base64: string) => {
const binaryString = window.atob(base64);
const len = binaryString.length;
const bytes = new Uint8Array(len);
for (let i = 0; i < len; i++) {
bytes[i] = binaryString.charCodeAt(i);
}
return bytes;
};
const decodeMuLaw = (encodedData: Uint8Array): Float32Array => {
const table = new Float32Array(256);
for (let i = 0; i < 256; i++) {
const input = i ^ 0xFF;
let magnitude = ((input & 0x7F) << 5) | 0x84;
magnitude <<= (input & 0x70) >> 4;
let sample = magnitude - 0x84;
if (input & 0x80) sample = -sample;
table[i] = sample / 32768;
}
return new Float32Array(encodedData.length).map((_, i) => table[encodedData[i]]);
};
const handleTimeUpdate = () => {
if (audioRef.current && !isLive) {
setCurrentTime(audioRef.current.currentTime);
}
};
const handleLoadedMetadata = () => {
if (audioRef.current && !isLive) {
setDuration(audioRef.current.duration);
}
};
const togglePlayPause = () => {
if (isLive) {
if (audioContextRef.current?.state === 'suspended') {
audioContextRef.current.resume();
} else if (audioContextRef.current?.state === 'running') {
audioContextRef.current.suspend();
}
} else if (audioRef.current) {
if (isPlaying) {
audioRef.current.pause();
} else {
audioRef.current.play();
}
}
setIsPlaying(!isPlaying);
};
const handleSeek = (newTime: number) => {
if (!isLive && audioRef.current) {
audioRef.current.currentTime = newTime;
setCurrentTime(newTime);
}
};
const handleVolumeChange = (newVolume: number) => {
if (isLive && gainNodeRef.current) {
gainNodeRef.current.gain.setValueAtTime(newVolume, audioContextRef.current!.currentTime);
} else if (audioRef.current) {
audioRef.current.volume = newVolume;
}
setVolume(newVolume);
};
const formatTime = (time: number): string => {
const minutes = Math.floor(time / 60);
const seconds = Math.floor(time % 60);
return `${minutes}:${seconds.toString().padStart(2, '0')}`;
};
return (
<div className="bottom-0 left-0 right-0 bg-white border-t border-gray-200 p-4">
{!isLive && <audio ref={audioRef} src={audioUrl as string} />}
<div className="flex items-center justify-between">
<div className="flex items-center space-x-4">
<Button onClick={togglePlayPause} variant="ghost" size="icon">
{isPlaying ? <Pause className="h-6 w-6" strokeWidth={1.5} /> : <Play className="h-6 w-6" strokeWidth={1.5} />}
</Button>
</div>
<div className="flex-1 mx-4">
<Slider
value={[isLive ? 100 : (currentTime / duration) * 100]}
max={100}
step={1}
onValueChange={(value) => !isLive && handleSeek((value[0] / 100) * duration)}
disabled={isLive}
className="my-2"
/>
<div className="flex justify-between text-sm text-gray-500 mt-1">
<span>{isLive ? 'LIVE' : formatTime(currentTime)}</span>
<span>{isLive ? '' : formatTime(duration)}</span>
</div>
</div>
<div className="flex items-center space-x-2">
<Volume2 className="h-5 w-5" strokeWidth={1.5} />
<Slider
className="w-24"
value={[volume]}
max={1}
step={0.01}
onValueChange={(value) => handleVolumeChange(value[0])}
/>
</div>
</div>
</div>
);
};
export default AudioPlayer;```
Upvotes: 0
Views: 61