ZeroG
ZeroG

Reputation: 147

Python record audio on detected sound

I am looking to have a python script run in the background and use pyaudio to record sound files when the threshold of the microphone has reached a certain point. This is for a monitor on a two way radio network. So hence we only want to record transmitted audio.

Tasks in mind:

I am looking at a file structure of the similar

/home/Recodings/2013/8/23/12-33.wav would be a recording of the transmision on 23/08/2013 @ 12:33.wav

I have used the code from

Detect and record a sound with python

I am at a bit of a loss where to go from here now and a little guidance would be greatly appreciated

thank you

Upvotes: 8

Views: 35059

Answers (6)

Sujal Kaintura
Sujal Kaintura

Reputation: 11

I fixed the code above by Mike Schultz. I also tried to make the value of rms threshold set up automatically depending on the microphone noise but failed miserably. So, you have to manually set the threshold to your microphone's noise level.

import pyaudio
import math
import struct
import wave
import time
import datetime
import os

TRIGGER_RMS = 10 # start recording above 10
RATE = 16000 # sample rate
TIMEOUT_SECS = 1 # silence time after which recording stops
FRAME_SECS = 0.25 # length of frame(chunks) to be processed at once in secs
CUSHION_SECS = 1 # amount of recording before and after sound

SHORT_NORMALIZE = (1.0/32768.0)
FORMAT = pyaudio.paInt16
CHANNELS = 1
SHORT_WIDTH = 2
CHUNK = int(RATE * FRAME_SECS)
CUSHION_FRAMES = int(CUSHION_SECS / FRAME_SECS)
TIMEOUT_FRAMES = int(TIMEOUT_SECS / FRAME_SECS)

f_name_directory = './'

class Recorder:
    @staticmethod
    def rms(frame):
        count = len(frame) / SHORT_WIDTH
        format = "%dh" % (count)
        shorts = struct.unpack(format, frame)

        sum_squares = 0.0
        for sample in shorts:
            n = sample * SHORT_NORMALIZE
            sum_squares += n * n
        rms = math.pow(sum_squares / count, 0.5)

        return rms * 1000

    def __init__(self):
        self.p = pyaudio.PyAudio()
        self.stream = self.p.open(format=FORMAT,
                        channels=CHANNELS,
                        rate=RATE,
                        input=True,
                        output=True,
                        frames_per_buffer=CHUNK)
        self.time = time.time()
        self.quiet = []
        self.quiet_idx = -1
        self.timeout = 0

    def record(self):
        print('')
        sound = []
        start = time.time()
        begin_time = None
        while True:
            data = self.stream.read(CHUNK)
            rms_val = self.rms(data)
            if self.inSound(data):
                sound.append(data)
                if begin_time == None:
                    begin_time = datetime.datetime.now()
            else:
                if len(sound) > 0:
                    self.write(sound, begin_time)
                    sound.clear()
                    begin_time = None
                else:
                    self.queueQuiet(data)

            curr = time.time()
            secs = int(curr - start)
            tout = 0 if self.timeout == 0 else int(self.timeout - curr)
            label = 'Listening' if self.timeout == 0 else 'Recording'
            print('[+] %s: Level=[%4.2f] Secs=[%d] Timeout=[%d]' % (label, rms_val, secs, tout), end='\r')
        
    # quiet is a circular buffer of size cushion
    def queueQuiet(self, data):
        self.quiet_idx += 1
        # start over again on overflow
        if self.quiet_idx == CUSHION_FRAMES:
            self.quiet_idx = 0
        
        # fill up the queue
        if len(self.quiet) < CUSHION_FRAMES:
            self.quiet.append(data)
        # replace the element on the index in a cicular loop like this 0 -> 1 -> 2 -> 3 -> 0 and so on...
        else:            
            self.quiet[self.quiet_idx] = data

    def dequeueQuiet(self, sound):
        if len(self.quiet) == 0:
            return sound
        
        ret = []
        
        if len(self.quiet) < CUSHION_FRAMES:
            ret.append(self.quiet)
            ret.extend(sound)
        else:
            ret.extend(self.quiet[self.quiet_idx + 1:])
            ret.extend(self.quiet[:self.quiet_idx + 1])
            ret.extend(sound)

        return ret
    
    def inSound(self, data):
        rms = self.rms(data)
        curr = time.time()

        if rms > TRIGGER_RMS:
            self.timeout = curr + TIMEOUT_SECS
            return True
        
        if curr < self.timeout:
            return True

        self.timeout = 0
        return False

    def write(self, sound, begin_time):
        # insert the pre-sound quiet frames into sound
        sound = self.dequeueQuiet(sound)

        # sound ends with TIMEOUT_FRAMES of quiet
        # remove all but CUSHION_FRAMES
        keep_frames = len(sound) - TIMEOUT_FRAMES + CUSHION_FRAMES
        recording = b''.join(sound[0:keep_frames])

        filename = begin_time.strftime('%Y-%m-%d_%H.%M.%S')
        pathname = os.path.join(f_name_directory, '{}.wav'.format(filename))

        wf = wave.open(pathname, 'wb')
        wf.setnchannels(CHANNELS)
        wf.setsampwidth(self.p.get_sample_size(FORMAT))
        wf.setframerate(RATE)
        wf.writeframes(recording)
        wf.close()
        print('[+] Saved: {}'.format(pathname))

a = Recorder()

a.record()

Other than that if anyone is trying to detect human speech not all sounds in general, you should look up for something called voice activity detector (VAD) like this one, they provide SDK for multiple platforms good for application development. There also exists webrtc, but it is comparatively slower and less accurate.

Lastly you can train you own neural network model to detect speech, noise, exact words and whatever you want to even though it will take significantly more time and effort to do it.

Upvotes: 1

Mike Schultz
Mike Schultz

Reputation: 1

I wanted to have a cushion on both sides of the sound recorded so that the recording wouldn't either start or stop abruptly. This allowed me to get rid of the 'listen' method, so it's just always recording.

import pyaudio
import math
import struct
import wave
import time
import datetime
import os

TRIGGER_RMS = 5
#RATE = 44100 # = 300MB/hour
RATE = 22050 # = 150MB/hour
TIMEOUT_SECS = 5
FRAME_SECS = 0.25 # length of frame in secs
CUSHION_SECS = 1 # amount of recording before and after sound

SHORT_NORMALIZE = (1.0/32768.0)
FORMAT = pyaudio.paInt16
CHANNELS = 1
SHORT_WIDTH = 2
CHUNK = int(RATE * FRAME_SECS)
CUSHION_FRAMES = int(CUSHION_SECS / FRAME_SECS)
TIMEOUT_FRAMES = int(TIMEOUT_SECS / FRAME_SECS)

f_name_directory = '.'

class Recorder:
    @staticmethod
    def rms(frame):
        count = len(frame) / SHORT_WIDTH
        format = "%dh" % (count)
        shorts = struct.unpack(format, frame)

        sum_squares = 0.0
        for sample in shorts:
            n = sample * SHORT_NORMALIZE
            sum_squares += n * n
        rms = math.pow(sum_squares / count, 0.5)

        return rms * 1000

    def __init__(self):
        self.p = pyaudio.PyAudio()
        self.stream = self.p.open(format=FORMAT,
                        channels=CHANNELS,
                        rate=RATE,
                        input=True,
                        output=True,
                        frames_per_buffer=CHUNK)
        self.time = time.time()
        self.quiet = []
        self.quiet_idx = -1
        self.timeout = 0

    def record(self):
        sound = []
        start = time.time()
        begin_time = None

        while True:
            data = self.stream.read(CHUNK)
            rms_val = self.rms(data)

            if self.inSound(data):
                sound.append(data)
                if begin_time == None:
                    begin_time = datetime.datetime.now()
            else:
                self.queueQuiet(data)
                if len(sound) > 0:
                    self.write(sound, begin_time)
                    sound.clear()
                    begin_time = None

            curr = time.time()
            secs = int(curr - start)
            tout = 0 if self.timeout == 0 else int(self.timeout - curr)
            label = 'listening' if self.timeout == 0 else 'recording'
            print('%s: level=%4.2f secs=%d timeout=%d            ' % (label, rms_val, secs, tout), end='\r')
        
    # quiet is a circular buffer of size cushion 
    def queueQuiet(self, data):
        self.quiet_idx += 1
        if self.quiet_idx == CUSHION_FRAMES:
            self.quiet_idx = 0
        
        if len(self.quiet) < CUSHION_FRAMES:
            self.quiet.append(data)
        else:            
            self.quiet[self.quiet_idx] = data

    def dequeueQuiet(self, sound):
        if len(self.quiet) == 0:
            return sound
        
        ret = []
        
        # either quiet not full or full and in order
        if len(self.quiet) < CUSHION_FRAMES or self.quiet_idx == 0:
            ret.extend(self.quiet)
            ret.extend(sound)

        else:
            ret.extend(self.quiet[self.quiet_idx:])
            ret.extend(self.quiet[0:self.quiet_idx])
            ret.extend(sound)

        return ret
    
    def inSound(self, data):
        rms = self.rms(data)
        curr = time.time()

        if rms >= TRIGGER_RMS:
            self.timeout = curr + TIMEOUT_SECS
            return True
        
        if curr < self.timeout:
            return True

        self.timeout = 0
        return False

    def write(self, sound, begin_time):
        # insert the pre-sound quiet frames into sound
        sound = self.dequeueQuiet(sound)

        # sound ends with TIMEOUT_FRAMES of quiet
        # remove all but CUSHION_FRAMES
        keep_frames = len(sound) - TIMEOUT_FRAMES + CUSHION_FRAMES
        recording = b''.join(sound[0:keep_frames])

        filename = begin_time.strftime('%Y-%m-%d_%H.%M.%S')
        pathname = os.path.join(f_name_directory, '{}.wav'.format(filename))

        wf = wave.open(pathname, 'wb')
        wf.setnchannels(CHANNELS)
        wf.setsampwidth(self.p.get_sample_size(FORMAT))
        wf.setframerate(RATE)
        wf.writeframes(recording)
        wf.close()
        print('')
        print('writing: {}'.format(pathname))
        print('')

Upvotes: 0

Rodrigo
Rodrigo

Reputation: 123

For those who have problems installing pyaudio because of the missing portaudio.h, you can do that:

sudo apt-get install portaudio19-dev python-pyaudio python3-pyaudio

the answer is from: portaudio.h: No such file or directory

Upvotes: 0

ederwander
ederwander

Reputation: 3478

Some time ago I wrote some of the steps

  • Record audio input on a n% gate threshold

A: Start a Boolean variable type for "Silence" and you can calculate RMS to decide if Silence is true or False, Set one RMS Threshold

  • stop recording after so many seconds of silence

A: Do you need calculate one timeout, for it get the Frame Rate, Chunk Size and how many seconds do you want, to calculate your timeout make (FrameRate / chunk * Max_Seconds)

  • keep recording for so many seconds after audio

A: If Silence is false == (RMS > Threshold) get the last chunk of data of audio (LastBlock) and just keep record :-)

  • Phase 2: input data into MySQL database to search the recordings

A: This step is up to you

Source code:

import pyaudio
import math
import struct
import wave

#Assuming Energy threshold upper than 30 dB
Threshold = 30

SHORT_NORMALIZE = (1.0/32768.0)
chunk = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
swidth = 2
Max_Seconds = 10
TimeoutSignal=((RATE / chunk * Max_Seconds) + 2)
silence = True
FileNameTmp = '/home/Recodings/2013/8/23/12-33.wav'
Time=0
all =[]

def GetStream(chunk):
    return stream.read(chunk)
def rms(frame):
    count = len(frame)/swidth
    format = "%dh"%(count)
    # short is 16 bit int
    shorts = struct.unpack( format, frame )

    sum_squares = 0.0
    for sample in shorts:
        n = sample * SHORT_NORMALIZE
        sum_squares += n*n
    # compute the rms 
    rms = math.pow(sum_squares/count,0.5);
    return rms * 1000

def WriteSpeech(WriteData):
    stream.stop_stream()
    stream.close()
    p.terminate()
    wf = wave.open(FileNameTmp, 'wb')
    wf.setnchannels(CHANNELS)
    wf.setsampwidth(p.get_sample_size(FORMAT))
    wf.setframerate(RATE)
    wf.writeframes(WriteData)
    wf.close()

def KeepRecord(TimeoutSignal, LastBlock):
    all.append(LastBlock)
    for i in range(0, TimeoutSignal):
        try:
            data = GetStream(chunk)
        except:
            continue
        #I chage here (new Ident)
        all.append(data)

    print "end record after timeout";
    data = ''.join(all)
    print "write to File";
    WriteSpeech(data)
    silence = True
    Time=0
    listen(silence,Time)     

def listen(silence,Time):
    print "waiting for Speech"
    while silence:
        try:
            input = GetStream(chunk)
        except:
            continue
        rms_value = rms(input)
        if (rms_value > Threshold):
            silence=False
            LastBlock=input
            print "hello ederwander I'm Recording...."
            KeepRecord(TimeoutSignal, LastBlock)
        Time = Time + 1
        if (Time > TimeoutSignal):
            print "Time Out No Speech Detected"
            sys.exit()

p = pyaudio.PyAudio()

stream = p.open(format = FORMAT,
    channels = CHANNELS,
    rate = RATE,
    input = True,
    output = True,
    frames_per_buffer = chunk)

listen(silence,Time)

Upvotes: 13

Primusa
Primusa

Reputation: 13498

The current top answer is a bit outdated and only works for python 2. Here is a version updated for python 3. It wraps the functions into classes and packages everything into one simple easy-to-use version. Note that there is one key difference between the top answer and my script:

The script at the top records for one file and then stops, while my script keeps recording whenever noise is detected and dumps the recordings into a directory as it goes.

The main idea for both scripts are pretty similar:

Step 1: 'Listen' until rms becomes greater than the threshold

Step 2: Start recording, set a timer for when to stop recording, == TIMEOUT_LENGTH

Step 3: If the rms breaks threshold again before the timer times out reset the timer

Step 4: Now that the timer is expired, write the recording to a directory and go back to step 1

import pyaudio
import math
import struct
import wave
import time
import os

Threshold = 10

SHORT_NORMALIZE = (1.0/32768.0)
chunk = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
swidth = 2

TIMEOUT_LENGTH = 5

f_name_directory = r'C:\Users\Jason\PyCharmProjects\AutoRecorder\records'

class Recorder:

    @staticmethod
    def rms(frame):
        count = len(frame) / swidth
        format = "%dh" % (count)
        shorts = struct.unpack(format, frame)

        sum_squares = 0.0
        for sample in shorts:
            n = sample * SHORT_NORMALIZE
            sum_squares += n * n
        rms = math.pow(sum_squares / count, 0.5)

        return rms * 1000

    def __init__(self):
        self.p = pyaudio.PyAudio()
        self.stream = self.p.open(format=FORMAT,
                                  channels=CHANNELS,
                                  rate=RATE,
                                  input=True,
                                  output=True,
                                  frames_per_buffer=chunk)

    def record(self):
        print('Noise detected, recording beginning')
        rec = []
        current = time.time()
        end = time.time() + TIMEOUT_LENGTH

        while current <= end:

            data = self.stream.read(chunk)
            if self.rms(data) >= Threshold: end = time.time() + TIMEOUT_LENGTH

            current = time.time()
            rec.append(data)
        self.write(b''.join(rec))

    def write(self, recording):
        n_files = len(os.listdir(f_name_directory))

        filename = os.path.join(f_name_directory, '{}.wav'.format(n_files))

        wf = wave.open(filename, 'wb')
        wf.setnchannels(CHANNELS)
        wf.setsampwidth(self.p.get_sample_size(FORMAT))
        wf.setframerate(RATE)
        wf.writeframes(recording)
        wf.close()
        print('Written to file: {}'.format(filename))
        print('Returning to listening')



    def listen(self):
        print('Listening beginning')
        while True:
            input = self.stream.read(chunk)
            rms_val = self.rms(input)
            if rms_val > Threshold:
                self.record()

a = Recorder()

a.listen()

Upvotes: 24

ejk314
ejk314

Reputation: 381

So you just need the getLevel(data) function? A quick hack would be:

def getLevel(data):
   sqrsum = 0
   for b in data:
      b = ord(b)
      sqrsum+=b*b
   return sqrsum

That should increase with volume. Set your threshold appropriately through trial and error.

Upvotes: 0

Related Questions