Reputation: 147
I am looking to have a python script run in the background and use pyaudio to record sound files when the threshold of the microphone has reached a certain point. This is for a monitor on a two way radio network. So hence we only want to record transmitted audio.
Tasks in mind:
Record audio input on a n% gate threshold
stop recording after so many seconds of silence
keep recording for so many seconds after audio
Phase 2: input data into MySQL database to search the recordings
I am looking at a file structure of the similar
/home/Recodings/2013/8/23/12-33.wav would be a recording of the transmision on 23/08/2013 @ 12:33.wav
I have used the code from
Detect and record a sound with python
I am at a bit of a loss where to go from here now and a little guidance would be greatly appreciated
thank you
Upvotes: 8
Views: 35059
Reputation: 11
I fixed the code above by Mike Schultz. I also tried to make the value of rms threshold set up automatically depending on the microphone noise but failed miserably. So, you have to manually set the threshold to your microphone's noise level.
import pyaudio
import math
import struct
import wave
import time
import datetime
import os
TRIGGER_RMS = 10 # start recording above 10
RATE = 16000 # sample rate
TIMEOUT_SECS = 1 # silence time after which recording stops
FRAME_SECS = 0.25 # length of frame(chunks) to be processed at once in secs
CUSHION_SECS = 1 # amount of recording before and after sound
SHORT_NORMALIZE = (1.0/32768.0)
FORMAT = pyaudio.paInt16
CHANNELS = 1
SHORT_WIDTH = 2
CHUNK = int(RATE * FRAME_SECS)
CUSHION_FRAMES = int(CUSHION_SECS / FRAME_SECS)
TIMEOUT_FRAMES = int(TIMEOUT_SECS / FRAME_SECS)
f_name_directory = './'
class Recorder:
@staticmethod
def rms(frame):
count = len(frame) / SHORT_WIDTH
format = "%dh" % (count)
shorts = struct.unpack(format, frame)
sum_squares = 0.0
for sample in shorts:
n = sample * SHORT_NORMALIZE
sum_squares += n * n
rms = math.pow(sum_squares / count, 0.5)
return rms * 1000
def __init__(self):
self.p = pyaudio.PyAudio()
self.stream = self.p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
output=True,
frames_per_buffer=CHUNK)
self.time = time.time()
self.quiet = []
self.quiet_idx = -1
self.timeout = 0
def record(self):
print('')
sound = []
start = time.time()
begin_time = None
while True:
data = self.stream.read(CHUNK)
rms_val = self.rms(data)
if self.inSound(data):
sound.append(data)
if begin_time == None:
begin_time = datetime.datetime.now()
else:
if len(sound) > 0:
self.write(sound, begin_time)
sound.clear()
begin_time = None
else:
self.queueQuiet(data)
curr = time.time()
secs = int(curr - start)
tout = 0 if self.timeout == 0 else int(self.timeout - curr)
label = 'Listening' if self.timeout == 0 else 'Recording'
print('[+] %s: Level=[%4.2f] Secs=[%d] Timeout=[%d]' % (label, rms_val, secs, tout), end='\r')
# quiet is a circular buffer of size cushion
def queueQuiet(self, data):
self.quiet_idx += 1
# start over again on overflow
if self.quiet_idx == CUSHION_FRAMES:
self.quiet_idx = 0
# fill up the queue
if len(self.quiet) < CUSHION_FRAMES:
self.quiet.append(data)
# replace the element on the index in a cicular loop like this 0 -> 1 -> 2 -> 3 -> 0 and so on...
else:
self.quiet[self.quiet_idx] = data
def dequeueQuiet(self, sound):
if len(self.quiet) == 0:
return sound
ret = []
if len(self.quiet) < CUSHION_FRAMES:
ret.append(self.quiet)
ret.extend(sound)
else:
ret.extend(self.quiet[self.quiet_idx + 1:])
ret.extend(self.quiet[:self.quiet_idx + 1])
ret.extend(sound)
return ret
def inSound(self, data):
rms = self.rms(data)
curr = time.time()
if rms > TRIGGER_RMS:
self.timeout = curr + TIMEOUT_SECS
return True
if curr < self.timeout:
return True
self.timeout = 0
return False
def write(self, sound, begin_time):
# insert the pre-sound quiet frames into sound
sound = self.dequeueQuiet(sound)
# sound ends with TIMEOUT_FRAMES of quiet
# remove all but CUSHION_FRAMES
keep_frames = len(sound) - TIMEOUT_FRAMES + CUSHION_FRAMES
recording = b''.join(sound[0:keep_frames])
filename = begin_time.strftime('%Y-%m-%d_%H.%M.%S')
pathname = os.path.join(f_name_directory, '{}.wav'.format(filename))
wf = wave.open(pathname, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(self.p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(recording)
wf.close()
print('[+] Saved: {}'.format(pathname))
a = Recorder()
a.record()
Other than that if anyone is trying to detect human speech not all sounds in general, you should look up for something called voice activity detector (VAD) like this one, they provide SDK for multiple platforms good for application development. There also exists webrtc, but it is comparatively slower and less accurate.
Lastly you can train you own neural network model to detect speech, noise, exact words and whatever you want to even though it will take significantly more time and effort to do it.
Upvotes: 1
Reputation: 1
I wanted to have a cushion on both sides of the sound recorded so that the recording wouldn't either start or stop abruptly. This allowed me to get rid of the 'listen' method, so it's just always recording.
import pyaudio
import math
import struct
import wave
import time
import datetime
import os
TRIGGER_RMS = 5
#RATE = 44100 # = 300MB/hour
RATE = 22050 # = 150MB/hour
TIMEOUT_SECS = 5
FRAME_SECS = 0.25 # length of frame in secs
CUSHION_SECS = 1 # amount of recording before and after sound
SHORT_NORMALIZE = (1.0/32768.0)
FORMAT = pyaudio.paInt16
CHANNELS = 1
SHORT_WIDTH = 2
CHUNK = int(RATE * FRAME_SECS)
CUSHION_FRAMES = int(CUSHION_SECS / FRAME_SECS)
TIMEOUT_FRAMES = int(TIMEOUT_SECS / FRAME_SECS)
f_name_directory = '.'
class Recorder:
@staticmethod
def rms(frame):
count = len(frame) / SHORT_WIDTH
format = "%dh" % (count)
shorts = struct.unpack(format, frame)
sum_squares = 0.0
for sample in shorts:
n = sample * SHORT_NORMALIZE
sum_squares += n * n
rms = math.pow(sum_squares / count, 0.5)
return rms * 1000
def __init__(self):
self.p = pyaudio.PyAudio()
self.stream = self.p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
output=True,
frames_per_buffer=CHUNK)
self.time = time.time()
self.quiet = []
self.quiet_idx = -1
self.timeout = 0
def record(self):
sound = []
start = time.time()
begin_time = None
while True:
data = self.stream.read(CHUNK)
rms_val = self.rms(data)
if self.inSound(data):
sound.append(data)
if begin_time == None:
begin_time = datetime.datetime.now()
else:
self.queueQuiet(data)
if len(sound) > 0:
self.write(sound, begin_time)
sound.clear()
begin_time = None
curr = time.time()
secs = int(curr - start)
tout = 0 if self.timeout == 0 else int(self.timeout - curr)
label = 'listening' if self.timeout == 0 else 'recording'
print('%s: level=%4.2f secs=%d timeout=%d ' % (label, rms_val, secs, tout), end='\r')
# quiet is a circular buffer of size cushion
def queueQuiet(self, data):
self.quiet_idx += 1
if self.quiet_idx == CUSHION_FRAMES:
self.quiet_idx = 0
if len(self.quiet) < CUSHION_FRAMES:
self.quiet.append(data)
else:
self.quiet[self.quiet_idx] = data
def dequeueQuiet(self, sound):
if len(self.quiet) == 0:
return sound
ret = []
# either quiet not full or full and in order
if len(self.quiet) < CUSHION_FRAMES or self.quiet_idx == 0:
ret.extend(self.quiet)
ret.extend(sound)
else:
ret.extend(self.quiet[self.quiet_idx:])
ret.extend(self.quiet[0:self.quiet_idx])
ret.extend(sound)
return ret
def inSound(self, data):
rms = self.rms(data)
curr = time.time()
if rms >= TRIGGER_RMS:
self.timeout = curr + TIMEOUT_SECS
return True
if curr < self.timeout:
return True
self.timeout = 0
return False
def write(self, sound, begin_time):
# insert the pre-sound quiet frames into sound
sound = self.dequeueQuiet(sound)
# sound ends with TIMEOUT_FRAMES of quiet
# remove all but CUSHION_FRAMES
keep_frames = len(sound) - TIMEOUT_FRAMES + CUSHION_FRAMES
recording = b''.join(sound[0:keep_frames])
filename = begin_time.strftime('%Y-%m-%d_%H.%M.%S')
pathname = os.path.join(f_name_directory, '{}.wav'.format(filename))
wf = wave.open(pathname, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(self.p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(recording)
wf.close()
print('')
print('writing: {}'.format(pathname))
print('')
Upvotes: 0
Reputation: 123
For those who have problems installing pyaudio because of the missing portaudio.h, you can do that:
sudo apt-get install portaudio19-dev python-pyaudio python3-pyaudio
the answer is from: portaudio.h: No such file or directory
Upvotes: 0
Reputation: 3478
Some time ago I wrote some of the steps
Record audio input on a n% gate threshold
A: Start a Boolean variable type for "Silence" and you can calculate RMS to decide if Silence is true or False, Set one RMS Threshold
stop recording after so many seconds of silence
A: Do you need calculate one timeout, for it get the Frame Rate, Chunk Size and how many seconds do you want, to calculate your timeout make (FrameRate / chunk * Max_Seconds)
keep recording for so many seconds after audio
A: If Silence is false == (RMS > Threshold) get the last chunk of data of audio (LastBlock) and just keep record :-)
Phase 2: input data into MySQL database to search the recordings
A: This step is up to you
Source code:
import pyaudio
import math
import struct
import wave
#Assuming Energy threshold upper than 30 dB
Threshold = 30
SHORT_NORMALIZE = (1.0/32768.0)
chunk = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
swidth = 2
Max_Seconds = 10
TimeoutSignal=((RATE / chunk * Max_Seconds) + 2)
silence = True
FileNameTmp = '/home/Recodings/2013/8/23/12-33.wav'
Time=0
all =[]
def GetStream(chunk):
return stream.read(chunk)
def rms(frame):
count = len(frame)/swidth
format = "%dh"%(count)
# short is 16 bit int
shorts = struct.unpack( format, frame )
sum_squares = 0.0
for sample in shorts:
n = sample * SHORT_NORMALIZE
sum_squares += n*n
# compute the rms
rms = math.pow(sum_squares/count,0.5);
return rms * 1000
def WriteSpeech(WriteData):
stream.stop_stream()
stream.close()
p.terminate()
wf = wave.open(FileNameTmp, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(WriteData)
wf.close()
def KeepRecord(TimeoutSignal, LastBlock):
all.append(LastBlock)
for i in range(0, TimeoutSignal):
try:
data = GetStream(chunk)
except:
continue
#I chage here (new Ident)
all.append(data)
print "end record after timeout";
data = ''.join(all)
print "write to File";
WriteSpeech(data)
silence = True
Time=0
listen(silence,Time)
def listen(silence,Time):
print "waiting for Speech"
while silence:
try:
input = GetStream(chunk)
except:
continue
rms_value = rms(input)
if (rms_value > Threshold):
silence=False
LastBlock=input
print "hello ederwander I'm Recording...."
KeepRecord(TimeoutSignal, LastBlock)
Time = Time + 1
if (Time > TimeoutSignal):
print "Time Out No Speech Detected"
sys.exit()
p = pyaudio.PyAudio()
stream = p.open(format = FORMAT,
channels = CHANNELS,
rate = RATE,
input = True,
output = True,
frames_per_buffer = chunk)
listen(silence,Time)
Upvotes: 13
Reputation: 13498
The current top answer is a bit outdated and only works for python 2. Here is a version updated for python 3. It wraps the functions into classes and packages everything into one simple easy-to-use version. Note that there is one key difference between the top answer and my script:
The script at the top records for one file and then stops, while my script keeps recording whenever noise is detected and dumps the recordings into a directory as it goes.
The main idea for both scripts are pretty similar:
Step 1: 'Listen' until rms becomes greater than the threshold
Step 2: Start recording, set a timer for when to stop recording, == TIMEOUT_LENGTH
Step 3: If the rms breaks threshold again before the timer times out reset the timer
Step 4: Now that the timer is expired, write the recording to a directory and go back to step 1
import pyaudio
import math
import struct
import wave
import time
import os
Threshold = 10
SHORT_NORMALIZE = (1.0/32768.0)
chunk = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
swidth = 2
TIMEOUT_LENGTH = 5
f_name_directory = r'C:\Users\Jason\PyCharmProjects\AutoRecorder\records'
class Recorder:
@staticmethod
def rms(frame):
count = len(frame) / swidth
format = "%dh" % (count)
shorts = struct.unpack(format, frame)
sum_squares = 0.0
for sample in shorts:
n = sample * SHORT_NORMALIZE
sum_squares += n * n
rms = math.pow(sum_squares / count, 0.5)
return rms * 1000
def __init__(self):
self.p = pyaudio.PyAudio()
self.stream = self.p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
output=True,
frames_per_buffer=chunk)
def record(self):
print('Noise detected, recording beginning')
rec = []
current = time.time()
end = time.time() + TIMEOUT_LENGTH
while current <= end:
data = self.stream.read(chunk)
if self.rms(data) >= Threshold: end = time.time() + TIMEOUT_LENGTH
current = time.time()
rec.append(data)
self.write(b''.join(rec))
def write(self, recording):
n_files = len(os.listdir(f_name_directory))
filename = os.path.join(f_name_directory, '{}.wav'.format(n_files))
wf = wave.open(filename, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(self.p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(recording)
wf.close()
print('Written to file: {}'.format(filename))
print('Returning to listening')
def listen(self):
print('Listening beginning')
while True:
input = self.stream.read(chunk)
rms_val = self.rms(input)
if rms_val > Threshold:
self.record()
a = Recorder()
a.listen()
Upvotes: 24
Reputation: 381
So you just need the getLevel(data)
function?
A quick hack would be:
def getLevel(data):
sqrsum = 0
for b in data:
b = ord(b)
sqrsum+=b*b
return sqrsum
That should increase with volume. Set your threshold appropriately through trial and error.
Upvotes: 0