Reputation: 306
i'm trying to build an app with pyaudio that records speaker and microphone sound but i just don't know how to record them both.I tried enabling stereo mixer but it didn't work out because i could only listen the sound from the speakers. This code that i'm using records audio from the default microphone in a background thread using pyaudio:
import pyaudio
import wave
import threading
import time
import subprocess
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 2
RATE = 44100
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = "tmp/tmp.wav"
class recorder:
def __init__(self):
self.going = False
self.process = None
self.filename = "ScreenCapture.mpg"
def record(self,filename):
try:
if self.process.is_alive():
self.going = False
except AttributeError:
print("test")
self.process = threading.Thread(target=self._record)
self.process.start()
self.filename = filename
def _record(self):
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK)
print("* recording")
frames = []
self.going = True
while self.going:
data = stream.read(CHUNK)
frames.append(data)
print("* done recording")
stream.stop_stream()
stream.close()
p.terminate()
wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
wf.close()
def stop_recording(self):
self.going = False
I'm using Windows and I can also use other library, not only PyAudio. I just need to get this work.
Edit : I found this code to record the output from speaker but i couldn't make it work in my application:
import pyaudio
import wave
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 2
RATE = 44100
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = "output.wav"
p = pyaudio.PyAudio()
SPEAKERS = p.,get_default_output_device_info()["hostApi"]
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK,
input_host_api_specific_stream_info=SPEAKERS,
as_loopback=True)
print("* recording")
frames = []
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
data = stream.read(CHUNK)
frames.append(data)
print("* done recording")
stream.stop_stream()
stream.close()
p.terminate()
wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
wf.close()
UPDATE : I could record both my speaker and my microphone with this:
import pyaudio
import wave
import numpy as np
CHUNK = 1024
FORMAT = pyaudio.paInt16
RATE = 44100
RECORD_SECONDS = 2
WAVE_OUTPUT_FILENAME = "tmp.wav"
p = pyaudio.PyAudio()
for i in range(0, p.get_device_count()):
print(i, p.get_device_info_by_index(i)['name'])
#stream using as_loopback to get sound from OS
stream = p.open(
format = FORMAT,
channels = 2,
rate = RATE,
input=True,
frames_per_buffer=CHUNK,
input_device_index=2,
as_loopback=True)
##stream using my Microphone's input device
stream2 = p.open(
format = FORMAT,
channels = 1,
rate = RATE,
input=True,
frames_per_buffer=CHUNK,
input_device_index=1)
#as_loopback=False)
frames = []
frames2 = []
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
data = stream.read(CHUNK)
data2 = stream2.read(CHUNK)
frames.append(data)
frames2.append(data2)
#frames = as_loopback sound data (Speakers)
frames= b''.join(frames);
#frames2 = sound data of Microphone
frames2= b''.join(frames2);
#decoding Speaker data
Sdecoded = np.frombuffer(frames, 'int16')
#decoding the microphone data
Mdecoded = np.frombuffer(frames2, 'int16')
#converting Speaker data into a Numpy vector (making life easier when picking up audio channels)
Sdecoded= np.array(Sdecoded, dtype='int16')
#getting the data on the right side
direito=Sdecoded[1::2]
#getting the data on the left side
esquerdo=Sdecoded[::2]
#mixing everything to mono = add right side + left side + Microphone decoded data that is already mono
mix=(direito+esquerdo+Mdecoded)
#ensuring no value goes beyond the limits of short int
signal=np.clip(mix, -32767, 32766)
#encode the data again
encodecoded = wave.struct.pack("%dh"%(len(signal)), *list(signal))
#stop all streams and terminate pyaudio
stream.stop_stream()
stream.close()
stream2.stop_stream()
stream2.close()
p.terminate()
#recording mixed audio in mono
wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
wf.setnchannels(1)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes((encodecoded))
wf.close()
But when i try to mix with my code it doesn't work.What is going on?I think i'm almost solving it
import numpy as np
import pyaudio
import wave
import threading
import time
import subprocess
CHUNK = 1024
FORMAT = pyaudio.paInt16
RATE = 44100
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = "tmp/tmp.wav"
p = pyaudio.PyAudio()
for i in range(0, p.get_device_count()):
print(i, p.get_device_info_by_index(i)['name'])
class recorder:
def __init__(self):
self.going = False
self.process = None
self.filename = "ScreenCapture.mpg"
def record(self,filename):
try:
if self.process.is_alive():
self.going = False
except AttributeError:
print("test")
self.process = threading.Thread(target=self._record)
self.process.start()
self.filename = filename
def _record(self):
p = pyaudio.PyAudio()
#stream using as_loopback to get sound from OS
stream = p.open(
format=FORMAT,
channels=2,
rate=RATE,
input=True,
frames_per_buffer=CHUNK,
input_device_index=2,
as_loopback=True)
##stream using my Microphone's input device
stream2 = p.open(
format=FORMAT,
channels=1,
rate=RATE,
input=True,
frames_per_buffer=CHUNK,
input_device_index=1)
# as_loopback=False)
#print("* recording")
frames = []
frames2= []
self.going = True
while self.going:
data = stream.read(CHUNK)
data2 = stream2.read(CHUNK)
frames.append(data)
frames2.append(data2)
# frames = as_loopback sound data (Speakers)
frames = b''.join(frames);
# frames2 = sound data of Microphone
frames2 = b''.join(frames2);
# decoding Speaker data
Sdecoded = np.frombuffer(frames, 'int16')
# decoding the microphone data
Mdecoded = np.frombuffer(frames2, 'int16')
# converting Speaker data into a Numpy vector (making life easier when picking up audio channels)
Sdecoded = np.array(Sdecoded, dtype='int16')
# getting the data on the right side
direito = Sdecoded[1::2]
# getting the data on the left side
esquerdo = Sdecoded[::2]
# mixing everything to mono = add right side + left side + Microphone decoded data that is already mono
mix = (direito + esquerdo + Mdecoded)
# ensuring no value goes beyond the limits of short int
signal = np.clip(mix, -32767, 32766)
# encode the data again
encodecoded = wave.struct.pack("%dh" % (len(signal)), *list(signal))
# print("* done recording")
stream.stop_stream()
stream.close()
stream2.stop_stream()
stream2.close()
p.terminate()
wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
wf.setnchannels(1)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(encodecoded)
wf.close()
def stop_recording(self):
self.going = False
I made the code very clean and commented each part so you understand what is going on.I made a for loop in the beginning for Pyaudio show me what are the interfaces I have in my OS:
0 Mapeador de som da Microsoft - Input
1 Microfone (Realtek(R) Audio)
2 Mixagem estéreo (Realtek(R) Aud
3 Mapeador de som da Microsoft - Output
4 Alto-falantes (Realtek(R) Audio
5 Alto-falantes (Realtek(R) Audio)
6 Microfone (Realtek(R) Audio)
7 Mixagem estéreo (Realtek(R) Audio)
8 Speakers 1 (Realtek HD Audio output with SST)
9 Speakers 2 (Realtek HD Audio output with SST)
10 Alto-falante (Realtek HD Audio output with SST)
11 Microfone (Realtek HD Audio Mic input)
12 Mixagem estéreo (Realtek HD Audio Stereo input)
Upvotes: 5
Views: 12510
Reputation: 1150
You can use 2 separate threads to record from 2 different devices ( providing separate device Index) into separate Wav files.
Then Mix these 2 files Using the pydub library
from pydub import AudioSegment
speakersound = AudioSegment.from_file("/path/speaker.wav")
micsound = AudioSegment.from_file("/path/mic.wav")
mixsound = speakersound.overlay(micsound)
mixsound.export("/path/mixsound.wav", format='wav')
Upvotes: 3