Reputation: 109
I am trying to get just the audio data of songs from YouTube videos to analyze without downloading (Python). I started with using yt-dlp with the following code
def search_youtube(song_name, artist_name=None):
query = f"{song_name} {artist_name} official audio" if artist_name else song_name
search_url = f"ytsearch: {query}"
ydl_opts = {
"format": "bestaudio/best",
"quiet": True
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
result = ydl.extract_info(search_url, download=False)
if "entries" in result:
return result["entries"][0]["url"] # Get the first result
return None
Testing by inserting an artist name and song title does produce a URL to an audio only playback. But when I run the following function
def stream_youtube_audio(url):
data = urllib.request.urlopen(url)
audio_data = data.read()
# Download audio into memory
audio_buffer = io.BytesIO(audio_data)
audio_array, sr = sf.read(audio_buffer.read())
#If the file is mono, reshape it to have two dimensions
if audio_array.ndim == 1:
audio_array = audio_array.reshape(-1, 1)
#If the file has more than 2 channels, take only the first two
if audio_array.shape[1] > 2:
audio_array = audio_array[:, :2]
#If the file is not float32, convert it
if audio_array.dtype != 'float32':
audio_array = audio_array.astype('float32')
# Transpose the audio data to have the shape (num_channels, num_frames)
audio_array = audio_array.T
# Convert to mono if it's not already
if audio_array.shape[0] > 1:
audio_mono = librosa.to_mono(audio_array)
else:
audio_mono = audio_array[0]
return audio_mono, sr
I get a LibsndfileError
Does anyone have any solution or other methods I should look into? I just don't want to download the audio directly.
Upvotes: 0
Views: 270