Berry Tsakala
Berry Tsakala

Reputation: 16620

How to capture a video (AND audio) in python, from a camera (or webcam)

i'm looking for a solution, either in linux or in windows, that allows me to

Compression is NOT an issue in my case, and i actually prefer to capture RAW and compress it later.

So far i've done it with an ActiveX component in VB which took care of everything, and i'd like to progress with python (the VB solution is unstable, unreliable).

so far i've seen code that captures VIDEO only, or individual frames...

I've looked so far at

The question - is there a video & audio capture library for python?

or - what are the other options if any?

Upvotes: 43

Views: 104428

Answers (9)

user1098761
user1098761

Reputation: 589

The best way is using ffmpeg (installed with conda) in the terminal

  1. Get video and audio devices in your system

    ffmpeg -list_devices true -f dshow -i dummy

Mine are "Logitech StreamCam" and "Microphone (Logitech StreamCam)"

  1. Then record

    import os
    arg_command = 'ffmpeg -f dshow -threads 4 -r 30 -framerate 60 -video_size 1280x720 -y -i video="Logitech StreamCam":audio="Microphone (Logitech StreamCam)" output.mkv' os.system(arg_command)

  2. Wait until a button or any request is clicked/made

  3. Call this command to stop

    os.system('taskkill /im ffmpeg.exe /t /f')

You can maximize your webcam performance without having thread racing issue.

Upvotes: 0

João Marques
João Marques

Reputation: 141

Using everyone's contributions and following the suggestion of Paul

I was able to come up with the following code:

Recorder.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# VideoRecorder.py

from __future__ import print_function, division
import numpy as np
import sys
import cv2
import pyaudio
import wave
import threading
import time
import subprocess
import os
import ffmpeg

REC_FOLDER = "recordings/"

class Recorder():
    def __init__(self, filename):
        self.filename = filename
        self.video_thread = self.VideoRecorder(self, REC_FOLDER + filename)
        self.audio_thread = self.AudioRecorder(self, REC_FOLDER + filename)

    def startRecording(self):
        self.video_thread.start()
        self.audio_thread.start()

    def stopRecording(self):
        self.video_thread.stop()
        self.audio_thread.stop()

    def saveRecording(self):
        #Save audio / Show video resume
        self.audio_thread.saveAudio()
        self.video_thread.showFramesResume()
        
        #Merges both streams and writes
        video_stream = ffmpeg.input(self.video_thread.video_filename)
        audio_stream = ffmpeg.input(self.audio_thread.audio_filename)
        while (not os.path.exists(self.audio_thread.audio_filename)):
            print("waiting for audio file to exit...")
        stream = ffmpeg.output(video_stream, audio_stream, REC_FOLDER + self.filename +".mp4")

        try:
            ffmpeg.run(stream, capture_stdout=True, capture_stderr=True, overwrite_output=True)
        except ffmpeg.Error as e:
            print(e.stdout, file=sys.stderr)
            print(e.stderr, file=sys.stderr)

    class VideoRecorder():
        "Video class based on openCV"
        def __init__(self, recorder, name, fourcc="MJPG", frameSize=(640,480), camindex=0, fps=15):
            self.recorder = recorder
            self.open = True
            self.duration = 0
            self.device_index = camindex
            self.fps = fps                          # fps should be the minimum constant rate at which the camera can
            self.fourcc = fourcc                    # capture images (with no decrease in speed over time; testing is required)
            self.video_filename = name + ".avi"     # video formats and sizes also depend and vary according to the camera used
            self.video_cap = cv2.VideoCapture(self.device_index, cv2.CAP_DSHOW)
            self.video_writer = cv2.VideoWriter_fourcc(*fourcc)
            self.video_out = cv2.VideoWriter(self.video_filename, self.video_writer, self.fps, frameSize)
            self.frame_counts = 1
            self.start_time = time.time()

        def record(self):
            "Video starts being recorded"
            counter = 1
            while self.open:
                ret, video_frame = self.video_cap.read()
                if ret:
                    self.video_out.write(video_frame)
                    self.frame_counts += 1
                    counter += 1
                    self.duration += 1/self.fps
                    if (video_frame is None): print("I WAS NONEEEEEEEEEEEEEEEEEEEEEE")
                    gray = cv2.cvtColor(video_frame, cv2.COLOR_BGR2GRAY)
                    cv2.imshow('video_frame', gray)
                    cv2.waitKey(1)
                    
                    while(self.duration - self.recorder.audio_thread.duration >= 0.2 and self.recorder.audio_thread.open):
                        time.sleep(0.2)
                else:
                    break

            #Release Video
            self.video_out.release()
            self.video_cap.release()
            cv2.destroyAllWindows()
            self.video_out = None

        def stop(self):
            "Finishes the video recording therefore the thread too"
            self.open=False

        def start(self):
            "Launches the video recording function using a thread"
            self.thread = threading.Thread(target=self.record)
            self.thread.start()

        def showFramesResume(self):
            #Only stop of video has all frames
            frame_counts = self.frame_counts
            elapsed_time = time.time() - self.start_time
            recorded_fps = self.frame_counts / elapsed_time
            print("total frames " + str(frame_counts))
            print("elapsed time " + str(elapsed_time))
            print("recorded fps " + str(recorded_fps))

    class AudioRecorder():
        "Audio class based on pyAudio and Wave"
        def __init__(self, recorder, filename, rate=44100, fpb=1024, channels=1, audio_index=0):
            self.recorder = recorder
            self.open = True
            self.rate = rate
            self.duration = 0
            self.frames_per_buffer = fpb
            self.channels = channels
            self.format = pyaudio.paInt16
            self.audio_filename = filename + ".wav"
            self.audio = pyaudio.PyAudio()
            self.stream = self.audio.open(format=self.format,
                                        channels=self.channels,
                                        rate=self.rate,
                                        input=True,
                                        input_device_index=audio_index,
                                        frames_per_buffer = self.frames_per_buffer)
            self.audio_frames = []

        def record(self):
            "Audio starts being recorded"
            self.stream.start_stream()
            t_start = time.time_ns()
            while self.open:
                try:
                    self.duration += self.frames_per_buffer / self.rate 
                    data = self.stream.read(self.frames_per_buffer)
                    self.audio_frames.append(data)
                except Exception as e:
                    print('\n' + '*'*80)
                    print('PyAudio read exception at %.1fms\n' % ((time.time_ns() - t_start)/10**6))
                    print(e)
                    print('*'*80 + '\n')
                while(self.duration - self.recorder.video_thread.duration >= 0.5):
                    time.sleep(0.5)
            #Closes audio stream
            self.stream.stop_stream()
            self.stream.close()
            self.audio.terminate()

        def stop(self):
            "Finishes the audio recording therefore the thread too"
            self.open = False

        def start(self):
            "Launches the audio recording function using a thread"
            self.thread = threading.Thread(target=self.record)
            self.thread.start()

        def saveAudio(self):
            #Save Audio File
            waveFile = wave.open(self.audio_filename, 'wb')
            waveFile.setnchannels(self.channels)
            waveFile.setsampwidth(self.audio.get_sample_size(self.format))
            waveFile.setframerate(self.rate)
            waveFile.writeframes(b''.join(self.audio_frames))
            waveFile.close()

Main.py

from recorder import Recorder
import time

recorder = Recorder("test1")
recorder.startRecording()
time.sleep(240)
recorder.stopRecording()
recorder.saveRecording()

With this solution, the camera and the audio will wait for each other. I also tried the FFmpeg Re-encoding and Muxing and even though it was able to synchronize the audio with video, the video had a massive quality drop.

Upvotes: 1

Albert G Lieu
Albert G Lieu

Reputation: 911

If you notice misalignment between video and audio by the code above, please see my solution below

I think the most rated answer above does a great job. However, it did not work perfectly when I was using it,especially when you use a low fps rate (say 10). The main issue is with video recording. In order to properly synchronize video and audio recording with ffmpeg, one has to make sure that cv2.VideoCapture() and cv2.VideoWriter() share exact same FPS, because the recorded video time length is solely determined by fps rate and the number of frames.

Following is my suggested update:

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# VideoRecorder.py

from __future__ import print_function, division
import numpy as np
import cv2
import pyaudio
import wave
import threading
import time
import subprocess
import os
import ffmpeg


class VideoRecorder():
    "Video class based on openCV"
    def __init__(self, name="temp_video.avi", fourcc="MJPG", sizex=640, sizey=480, camindex=0, fps=30):
        self.open = True
        self.device_index = camindex
        self.fps = fps                  # fps should be the minimum constant rate at which the camera can
        self.fourcc = fourcc            # capture images (with no decrease in speed over time; testing is required)
        self.frameSize = (sizex, sizey) # video formats and sizes also depend and vary according to the camera used
        self.video_filename = name
        self.video_cap = cv2.VideoCapture(self.device_index)
        self.video_cap.set(cv2.CAP_PROP_FPS, self.fps)
        self.video_writer = cv2.VideoWriter_fourcc(*self.fourcc)
        self.video_out = cv2.VideoWriter(self.video_filename, self.video_writer, self.fps, self.frameSize)
        self.frame_counts = 1
        self.start_time = time.time()

    def record(self):
        "Video starts being recorded"
        # counter = 1
        timer_start = time.time()
        timer_current = 0
        while self.open:
            ret, video_frame = self.video_cap.read()
            if ret:
                self.video_out.write(video_frame)
                # print(str(counter) + " " + str(self.frame_counts) + " frames written " + str(timer_current))
                self.frame_counts += 1
                # print(self.frame_counts)
                # counter += 1
                # timer_current = time.time() - timer_start
                # time.sleep(1/self.fps)
                # gray = cv2.cvtColor(video_frame, cv2.COLOR_BGR2GRAY)
                # cv2.imshow('video_frame', gray)
                # cv2.waitKey(1)
            else:
                break

    def stop(self):
        "Finishes the video recording therefore the thread too"
        if self.open:
            self.open=False
            self.video_out.release()
            self.video_cap.release()
            cv2.destroyAllWindows()

    def start(self):
        "Launches the video recording function using a thread"
        video_thread = threading.Thread(target=self.record)
        video_thread.start()

class AudioRecorder():
    "Audio class based on pyAudio and Wave"
    def __init__(self, filename="temp_audio.wav", rate=44100, fpb=1024, channels=2):
        self.open = True
        self.rate = rate
        self.frames_per_buffer = fpb
        self.channels = channels
        self.format = pyaudio.paInt16
        self.audio_filename = filename
        self.audio = pyaudio.PyAudio()
        self.stream = self.audio.open(format=self.format,
                                      channels=self.channels,
                                      rate=self.rate,
                                      input=True,
                                      frames_per_buffer = self.frames_per_buffer)
        self.audio_frames = []

    def record(self):
        "Audio starts being recorded"
        self.stream.start_stream()
        while self.open:
            data = self.stream.read(self.frames_per_buffer)
            self.audio_frames.append(data)
            if not self.open:
                break

    def stop(self):
        "Finishes the audio recording therefore the thread too"
        if self.open:
            self.open = False
            self.stream.stop_stream()
            self.stream.close()
            self.audio.terminate()
            waveFile = wave.open(self.audio_filename, 'wb')
            waveFile.setnchannels(self.channels)
            waveFile.setsampwidth(self.audio.get_sample_size(self.format))
            waveFile.setframerate(self.rate)
            waveFile.writeframes(b''.join(self.audio_frames))
            waveFile.close()

    def start(self):
        "Launches the audio recording function using a thread"
        audio_thread = threading.Thread(target=self.record)
        audio_thread.start()

def start_AVrecording(filename="test"):
    global video_thread
    global audio_thread
    video_thread = VideoRecorder()
    audio_thread = AudioRecorder()
    audio_thread.start()
    video_thread.start()
    return filename

def start_video_recording(filename="test"):
    global video_thread
    video_thread = VideoRecorder()
    video_thread.start()
    return filename

def start_audio_recording(filename="test"):
    global audio_thread
    audio_thread = AudioRecorder()
    audio_thread.start()
    return filename

def stop_AVrecording(filename="test"):
    audio_thread.stop()
    frame_counts = video_thread.frame_counts
    elapsed_time = time.time() - video_thread.start_time
    recorded_fps = frame_counts / elapsed_time
    print("total frames " + str(frame_counts))
    print("elapsed time " + str(elapsed_time))
    print("recorded fps " + str(recorded_fps))
    video_thread.stop()

    # Makes sure the threads have finished
    while threading.active_count() > 1:
        time.sleep(1)

    video_stream = ffmpeg.input(video_thread.video_filename)
    audio_stream = ffmpeg.input(audio_thread.audio_filename)
    ffmpeg.output(audio_stream, video_stream, 'out.mp4').run(overwrite_output=True)

    # # Merging audio and video signal
    # if abs(recorded_fps - 6) >= 0.01:    # If the fps rate was higher/lower than expected, re-encode it to the expected
    #     print("Re-encoding")
    #     cmd = "ffmpeg -r " + str(recorded_fps) + " -i temp_video.avi -pix_fmt yuv420p -r 6 temp_video2.avi"
    #     subprocess.call(cmd, shell=True)
    #     print("Muxing")
    #     cmd = "ffmpeg -y -ac 2 -channel_layout stereo -i temp_audio.wav -i temp_video2.avi -pix_fmt yuv420p " + filename + ".avi"
    #     subprocess.call(cmd, shell=True)
    # else:
    #     print("Normal recording\nMuxing")
    #     cmd = "ffmpeg -y -ac 2 -channel_layout stereo -i temp_audio.wav -i temp_video.avi -pix_fmt yuv420p " + filename + ".avi"
    #     subprocess.call(cmd, shell=True)
    #     print("..")

def file_manager(filename="test"):
    "Required and wanted processing of final files"
    local_path = os.getcwd()
    if os.path.exists(str(local_path) + "/temp_audio.wav"):
        os.remove(str(local_path) + "/temp_audio.wav")
    if os.path.exists(str(local_path) + "/temp_video.avi"):
        os.remove(str(local_path) + "/temp_video.avi")
    if os.path.exists(str(local_path) + "/temp_video2.avi"):
        os.remove(str(local_path) + "/temp_video2.avi")
    # if os.path.exists(str(local_path) + "/" + filename + ".avi"):
    #     os.remove(str(local_path) + "/" + filename + ".avi")

if __name__ == '__main__':
    start_AVrecording()
    # try:
    #     while True:
    #         pass
    # except KeyboardInterrupt:
    #     stop_AVrecording()
    time.sleep(10)
    stop_AVrecording()
    print("finishing recording")

    file_manager()


Upvotes: 1

dbdq
dbdq

Reputation: 355

I was randomly getting "[Errno -9999] Unanticipated host error" while using JRodrigoF's solution and found that it's due to a race condition where an audio stream can be closed just before being read for the last time inside record() of AudioRecorder class.

I modified slightly so that all the closing procedures are done after the while loop and added a function list_audio_devices() that shows the list of audio devices to select from. I also added an audio device index as a parameter to choose an audio device.

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# VideoRecorder.py

from __future__ import print_function, division
import numpy as np
import cv2
import pyaudio
import wave
import threading
import time
import subprocess
import os

class VideoRecorder():
    "Video class based on openCV"
    def __init__(self, name="temp_video.avi", fourcc="MJPG", sizex=640, sizey=480, camindex=0, fps=30):
        self.open = True
        self.device_index = camindex
        self.fps = fps                  # fps should be the minimum constant rate at which the camera can
        self.fourcc = fourcc            # capture images (with no decrease in speed over time; testing is required)
        self.frameSize = (sizex, sizey) # video formats and sizes also depend and vary according to the camera used
        self.video_filename = name
        self.video_cap = cv2.VideoCapture(self.device_index)
        self.video_writer = cv2.VideoWriter_fourcc(*self.fourcc)
        self.video_out = cv2.VideoWriter(self.video_filename, self.video_writer, self.fps, self.frameSize)
        self.frame_counts = 1
        self.start_time = time.time()

    def record(self):
        "Video starts being recorded"
        # counter = 1
        timer_start = time.time()
        timer_current = 0
        while self.open:
            ret, video_frame = self.video_cap.read()
            if ret:
                self.video_out.write(video_frame)
                # print(str(counter) + " " + str(self.frame_counts) + " frames written " + str(timer_current))
                self.frame_counts += 1
                # counter += 1
                # timer_current = time.time() - timer_start
                time.sleep(1/self.fps)
                # gray = cv2.cvtColor(video_frame, cv2.COLOR_BGR2GRAY)
                # cv2.imshow('video_frame', gray)
                # cv2.waitKey(1)
            else:
                break

    def stop(self):
        "Finishes the video recording therefore the thread too"
        if self.open:
            self.open=False
            self.video_out.release()
            self.video_cap.release()
            cv2.destroyAllWindows()

    def start(self):
        "Launches the video recording function using a thread"
        video_thread = threading.Thread(target=self.record)
        video_thread.start()

class AudioRecorder():
    "Audio class based on pyAudio and Wave"
    def __init__(self, filename="temp_audio.wav", rate=44100, fpb=2**12, channels=1, audio_index=0):
        self.open = True
        self.rate = rate
        self.frames_per_buffer = fpb
        self.channels = channels
        self.format = pyaudio.paInt16
        self.audio_filename = filename
        self.audio = pyaudio.PyAudio()
        self.stream = self.audio.open(format=self.format,
                                      channels=self.channels,
                                      rate=self.rate,
                                      input=True,
                                      input_device_index=audio_index,
                                      frames_per_buffer = self.frames_per_buffer)
        self.audio_frames = []

    def record(self):
        "Audio starts being recorded"
        self.stream.start_stream()
        t_start = time.time_ns()
        while self.open:
            try:
                data = self.stream.read(self.frames_per_buffer)
                self.audio_frames.append(data)
            except Exception as e:
                print('\n' + '*'*80)
                print('PyAudio read exception at %.1fms\n' % ((time.time_ns() - t_start)/10**6))
                print(e)
                print('*'*80 + '\n')
            time.sleep(0.01)
        self.stream.stop_stream()
        self.stream.close()
        self.audio.terminate()
        waveFile = wave.open(self.audio_filename, 'wb')
        waveFile.setnchannels(self.channels)
        waveFile.setsampwidth(self.audio.get_sample_size(self.format))
        waveFile.setframerate(self.rate)
        waveFile.writeframes(b''.join(self.audio_frames))
        waveFile.close()

    def stop(self):
        "Finishes the audio recording therefore the thread too"
        if self.open:
            self.open = False

    def start(self):
        "Launches the audio recording function using a thread"
        audio_thread = threading.Thread(target=self.record)
        audio_thread.start()

def start_AVrecording(filename="test", audio_index=0, sample_rate=44100):
    global video_thread
    global audio_thread
    video_thread = VideoRecorder()
    audio_thread = AudioRecorder(audio_index=audio_index, rate=sample_rate)
    audio_thread.start()
    video_thread.start()
    return filename

def start_video_recording(filename="test"):
    global video_thread
    video_thread = VideoRecorder()
    video_thread.start()
    return filename

def start_audio_recording(filename="test", audio_index=0, sample_rate=44100):
    global audio_thread
    audio_thread = AudioRecorder(audio_index=audio_index, rate=sample_rate)
    audio_thread.start()
    return filename

def stop_AVrecording(filename="test"):
    audio_thread.stop()
    frame_counts = video_thread.frame_counts
    elapsed_time = time.time() - video_thread.start_time
    recorded_fps = frame_counts / elapsed_time
    print("total frames " + str(frame_counts))
    print("elapsed time " + str(elapsed_time))
    print("recorded fps " + str(recorded_fps))
    video_thread.stop()

    # Makes sure the threads have finished
    while threading.active_count() > 1:
        time.sleep(1)

    # Merging audio and video signal
    if abs(recorded_fps - 6) >= 0.01:    # If the fps rate was higher/lower than expected, re-encode it to the expected
        print("Re-encoding")
        cmd = "ffmpeg -r " + str(recorded_fps) + " -i temp_video.avi -pix_fmt yuv420p -r 6 temp_video2.avi"
        subprocess.call(cmd, shell=True)
        print("Muxing")
        cmd = "ffmpeg -y -ac 2 -channel_layout stereo -i temp_audio.wav -i temp_video2.avi -pix_fmt yuv420p " + filename + ".avi"
        subprocess.call(cmd, shell=True)
    else:
        print("Normal recording\nMuxing")
        cmd = "ffmpeg -y -ac 2 -channel_layout stereo -i temp_audio.wav -i temp_video.avi -pix_fmt yuv420p " + filename + ".avi"
        subprocess.call(cmd, shell=True)
        print("..")

def file_manager(filename="test"):
    "Required and wanted processing of final files"
    local_path = os.getcwd()
    if os.path.exists(str(local_path) + "/temp_audio.wav"):
        os.remove(str(local_path) + "/temp_audio.wav")
    if os.path.exists(str(local_path) + "/temp_video.avi"):
        os.remove(str(local_path) + "/temp_video.avi")
    if os.path.exists(str(local_path) + "/temp_video2.avi"):
        os.remove(str(local_path) + "/temp_video2.avi")
    # if os.path.exists(str(local_path) + "/" + filename + ".avi"):
    #     os.remove(str(local_path) + "/" + filename + ".avi")

def list_audio_devices(name_filter=None):
    pa = pyaudio.PyAudio()
    device_index = None
    sample_rate = None
    for x in range(pa.get_device_count()):
        info = pa.get_device_info_by_index(x)
        print(pa.get_device_info_by_index(x))
        if name_filter is not None and name_filter in info['name']:
            device_index = info['index']
            sample_rate = int(info['defaultSampleRate'])
            break
    return device_index, sample_rate

if __name__ == '__main__':
    start_AVrecording()
    time.sleep(5)
    stop_AVrecording()
    file_manager()

Upvotes: 0

mjdargen
mjdargen

Reputation: 71

I used JRodrigoF's script for a while on a project. However, I noticed that sometimes the threads would hang and it would cause the program to crash. Another issue is that openCV does not capture video frames at a reliable rate and ffmpeg would distort my video when re-encoding.

I came up with a new solution that records much more reliably and with much higher quality for my application. It presently only works for Windows because it uses pywinauto and the built-in Windows Camera app. The last bit of the script does some error-checking to confirm the video successfully recorded by checking the timestamp of the name of the video.

https://gist.github.com/mjdargen/956cc968864f38bfc4e20c9798c7d670

import pywinauto
import time
import subprocess
import os
import datetime


def win_record(duration):
    subprocess.run('start microsoft.windows.camera:', shell=True)  # open camera app

    # focus window by getting handle using title and class name
    # subprocess call opens camera and gets focus, but this provides alternate way
    # t, c = 'Camera', 'ApplicationFrameWindow'
    # handle = pywinauto.findwindows.find_windows(title=t, class_name=c)[0]
    # # get app and window
    # app = pywinauto.application.Application().connect(handle=handle)
    # window = app.window(handle=handle)
    # window.set_focus()  # set focus
    time.sleep(2)  # have to sleep

    # take control of camera window to take video
    desktop = pywinauto.Desktop(backend="uia")
    cam = desktop['Camera']
    # cam.print_control_identifiers()
    # make sure in video mode
    if cam.child_window(title="Switch to Video mode", auto_id="CaptureButton_1", control_type="Button").exists():
        cam.child_window(title="Switch to Video mode", auto_id="CaptureButton_1", control_type="Button").click()
    time.sleep(1)
    # start then stop video
    cam.child_window(title="Take Video", auto_id="CaptureButton_1", control_type="Button").click()
    time.sleep(duration+2)
    cam.child_window(title="Stop taking Video", auto_id="CaptureButton_1", control_type="Button").click()

    # retrieve vids from camera roll and sort
    dir = 'C:/Users/m/Pictures/Camera Roll'
    all_contents = list(os.listdir(dir))
    vids = [f for f in all_contents if "_Pro.mp4" in f]
    vids.sort()
    vid = vids[-1]  # get last vid
    # compute time difference
    vid_time = vid.replace('WIN_', '').replace('_Pro.mp4', '')
    vid_time = datetime.datetime.strptime(vid_time, '%Y%m%d_%H_%M_%S')
    now = datetime.datetime.now()
    diff = now - vid_time
    # time different greater than 2 minutes, assume something wrong & quit
    if diff.seconds > 120:
        quit()
    
    subprocess.run('Taskkill /IM WindowsCamera.exe /F', shell=True)  # close camera app
    print('Recorded successfully!')


win_record(2)

Upvotes: 3

bunkus
bunkus

Reputation: 1025

To the questions asked above: Yes the code should also works under Python3. I adjusted it a little bit and now works for python2 and python3 (tested it on windows7 with 2.7 and 3.6, though you need to have ffmpeg installed or the executable ffmpeg.exe at least in the same directory, you can get it here: https://www.ffmpeg.org/download.html ). Of course you also need all the other libraries cv2, numpy, pyaudio, installed like herewith:

pip install opencv-python numpy pyaudio

You can now run the code directly:

#!/usr/bin/env python
# -*- coding: utf-8 -*-
# VideoRecorder.py

from __future__ import print_function, division
import numpy as np
import cv2
import pyaudio
import wave
import threading
import time
import subprocess
import os

class VideoRecorder():  
    "Video class based on openCV"
    def __init__(self, name="temp_video.avi", fourcc="MJPG", sizex=640, sizey=480, camindex=0, fps=30):
        self.open = True
        self.device_index = camindex
        self.fps = fps                  # fps should be the minimum constant rate at which the camera can
        self.fourcc = fourcc            # capture images (with no decrease in speed over time; testing is required)
        self.frameSize = (sizex, sizey) # video formats and sizes also depend and vary according to the camera used
        self.video_filename = name
        self.video_cap = cv2.VideoCapture(self.device_index)
        self.video_writer = cv2.VideoWriter_fourcc(*self.fourcc)
        self.video_out = cv2.VideoWriter(self.video_filename, self.video_writer, self.fps, self.frameSize)
        self.frame_counts = 1
        self.start_time = time.time()

    def record(self):
        "Video starts being recorded"
        # counter = 1
        timer_start = time.time()
        timer_current = 0
        while self.open:
            ret, video_frame = self.video_cap.read()
            if ret:
                self.video_out.write(video_frame)
                # print(str(counter) + " " + str(self.frame_counts) + " frames written " + str(timer_current))
                self.frame_counts += 1
                # counter += 1
                # timer_current = time.time() - timer_start
                time.sleep(1/self.fps)
                # gray = cv2.cvtColor(video_frame, cv2.COLOR_BGR2GRAY)
                # cv2.imshow('video_frame', gray)
                # cv2.waitKey(1)
            else:
                break

    def stop(self):
        "Finishes the video recording therefore the thread too"
        if self.open:
            self.open=False
            self.video_out.release()
            self.video_cap.release()
            cv2.destroyAllWindows()

    def start(self):
        "Launches the video recording function using a thread"
        video_thread = threading.Thread(target=self.record)
        video_thread.start()

class AudioRecorder():
    "Audio class based on pyAudio and Wave"
    def __init__(self, filename="temp_audio.wav", rate=44100, fpb=1024, channels=2):
        self.open = True
        self.rate = rate
        self.frames_per_buffer = fpb
        self.channels = channels
        self.format = pyaudio.paInt16
        self.audio_filename = filename
        self.audio = pyaudio.PyAudio()
        self.stream = self.audio.open(format=self.format,
                                      channels=self.channels,
                                      rate=self.rate,
                                      input=True,
                                      frames_per_buffer = self.frames_per_buffer)
        self.audio_frames = []

    def record(self):
        "Audio starts being recorded"
        self.stream.start_stream()
        while self.open:
            data = self.stream.read(self.frames_per_buffer) 
            self.audio_frames.append(data)
            if not self.open:
                break

    def stop(self):
        "Finishes the audio recording therefore the thread too"
        if self.open:
            self.open = False
            self.stream.stop_stream()
            self.stream.close()
            self.audio.terminate()
            waveFile = wave.open(self.audio_filename, 'wb')
            waveFile.setnchannels(self.channels)
            waveFile.setsampwidth(self.audio.get_sample_size(self.format))
            waveFile.setframerate(self.rate)
            waveFile.writeframes(b''.join(self.audio_frames))
            waveFile.close()

    def start(self):
        "Launches the audio recording function using a thread"
        audio_thread = threading.Thread(target=self.record)
        audio_thread.start()

def start_AVrecording(filename="test"):
    global video_thread
    global audio_thread
    video_thread = VideoRecorder()
    audio_thread = AudioRecorder()
    audio_thread.start()
    video_thread.start()
    return filename

def start_video_recording(filename="test"):
    global video_thread
    video_thread = VideoRecorder()
    video_thread.start()
    return filename

def start_audio_recording(filename="test"):
    global audio_thread
    audio_thread = AudioRecorder()
    audio_thread.start()
    return filename

def stop_AVrecording(filename="test"):
    audio_thread.stop() 
    frame_counts = video_thread.frame_counts
    elapsed_time = time.time() - video_thread.start_time
    recorded_fps = frame_counts / elapsed_time
    print("total frames " + str(frame_counts))
    print("elapsed time " + str(elapsed_time))
    print("recorded fps " + str(recorded_fps))
    video_thread.stop() 

    # Makes sure the threads have finished
    while threading.active_count() > 1:
        time.sleep(1)

    # Merging audio and video signal
    if abs(recorded_fps - 6) >= 0.01:    # If the fps rate was higher/lower than expected, re-encode it to the expected
        print("Re-encoding")
        cmd = "ffmpeg -r " + str(recorded_fps) + " -i temp_video.avi -pix_fmt yuv420p -r 6 temp_video2.avi"
        subprocess.call(cmd, shell=True)
        print("Muxing")
        cmd = "ffmpeg -y -ac 2 -channel_layout stereo -i temp_audio.wav -i temp_video2.avi -pix_fmt yuv420p " + filename + ".avi"
        subprocess.call(cmd, shell=True)
    else:
        print("Normal recording\nMuxing")
        cmd = "ffmpeg -y -ac 2 -channel_layout stereo -i temp_audio.wav -i temp_video.avi -pix_fmt yuv420p " + filename + ".avi"
        subprocess.call(cmd, shell=True)
        print("..")

def file_manager(filename="test"):
    "Required and wanted processing of final files"
    local_path = os.getcwd()
    if os.path.exists(str(local_path) + "/temp_audio.wav"):
        os.remove(str(local_path) + "/temp_audio.wav")
    if os.path.exists(str(local_path) + "/temp_video.avi"):
        os.remove(str(local_path) + "/temp_video.avi")
    if os.path.exists(str(local_path) + "/temp_video2.avi"):
        os.remove(str(local_path) + "/temp_video2.avi")
    # if os.path.exists(str(local_path) + "/" + filename + ".avi"):
    #     os.remove(str(local_path) + "/" + filename + ".avi")

if __name__ == '__main__':
    start_AVrecording()
    time.sleep(5)
    stop_AVrecording()
    file_manager()

Upvotes: 12

JRodrigoF
JRodrigoF

Reputation: 935

Answer: No. There is no single library/solution in python to do video/audio recording simultaneously. You have to implement both separately and merge the audio and video signal in a smart way to end up with a video/audio file.

I got a solution for the problem you present. My code addresses your three issues:

  • Records video + audio from webcam and microphone simultaneously.
  • It saves the final video/audio file as .AVI
  • Un-commenting lines 76, 77 and 78 will make the video to be displayed to screen while recording.

My solution uses pyaudio for audio recording, opencv for video recording, and ffmpeg for muxing the two signals. To be able to record both simultaneously, I use multithreading. One thread records video, and a second one the audio. I have uploaded my code to github and also have included all the essential parts it here.

https://github.com/JRodrigoF/AVrecordeR

Note: opencv is not able to control the fps at which the webcamera does the recording. It is only able to specify in the encoding of the file the desired final fps, but the webcamera usually behaves differently according to specifications and light conditions (I found). So the fps have to be controlled at the level of the code.

import cv2
import pyaudio
import wave
import threading
import time
import subprocess
import os

class VideoRecorder():  

    # Video class based on openCV 
    def __init__(self):

        self.open = True
        self.device_index = 0
        self.fps = 6               # fps should be the minimum constant rate at which the camera can
        self.fourcc = "MJPG"       # capture images (with no decrease in speed over time; testing is required)
        self.frameSize = (640,480) # video formats and sizes also depend and vary according to the camera used
        self.video_filename = "temp_video.avi"
        self.video_cap = cv2.VideoCapture(self.device_index)
        self.video_writer = cv2.VideoWriter_fourcc(*self.fourcc)
        self.video_out = cv2.VideoWriter(self.video_filename, self.video_writer, self.fps, self.frameSize)
        self.frame_counts = 1
        self.start_time = time.time()


    # Video starts being recorded 
    def record(self):

#       counter = 1
        timer_start = time.time()
        timer_current = 0


        while(self.open==True):
            ret, video_frame = self.video_cap.read()
            if (ret==True):

                    self.video_out.write(video_frame)
#                   print str(counter) + " " + str(self.frame_counts) + " frames written " + str(timer_current)
                    self.frame_counts += 1
#                   counter += 1
#                   timer_current = time.time() - timer_start
                    time.sleep(0.16)
#                   gray = cv2.cvtColor(video_frame, cv2.COLOR_BGR2GRAY)
#                   cv2.imshow('video_frame', gray)
#                   cv2.waitKey(1)
            else:
                break

                # 0.16 delay -> 6 fps
                # 


    # Finishes the video recording therefore the thread too
    def stop(self):

        if self.open==True:

            self.open=False
            self.video_out.release()
            self.video_cap.release()
            cv2.destroyAllWindows()

        else: 
            pass


    # Launches the video recording function using a thread          
    def start(self):
        video_thread = threading.Thread(target=self.record)
        video_thread.start()





class AudioRecorder():


    # Audio class based on pyAudio and Wave
    def __init__(self):

        self.open = True
        self.rate = 44100
        self.frames_per_buffer = 1024
        self.channels = 2
        self.format = pyaudio.paInt16
        self.audio_filename = "temp_audio.wav"
        self.audio = pyaudio.PyAudio()
        self.stream = self.audio.open(format=self.format,
                                      channels=self.channels,
                                      rate=self.rate,
                                      input=True,
                                      frames_per_buffer = self.frames_per_buffer)
        self.audio_frames = []


    # Audio starts being recorded
    def record(self):

        self.stream.start_stream()
        while(self.open == True):
            data = self.stream.read(self.frames_per_buffer) 
            self.audio_frames.append(data)
            if self.open==False:
                break


    # Finishes the audio recording therefore the thread too    
    def stop(self):

        if self.open==True:
            self.open = False
            self.stream.stop_stream()
            self.stream.close()
            self.audio.terminate()

            waveFile = wave.open(self.audio_filename, 'wb')
            waveFile.setnchannels(self.channels)
            waveFile.setsampwidth(self.audio.get_sample_size(self.format))
            waveFile.setframerate(self.rate)
            waveFile.writeframes(b''.join(self.audio_frames))
            waveFile.close()

        pass

    # Launches the audio recording function using a thread
    def start(self):
        audio_thread = threading.Thread(target=self.record)
        audio_thread.start()





def start_AVrecording(filename):

    global video_thread
    global audio_thread

    video_thread = VideoRecorder()
    audio_thread = AudioRecorder()

    audio_thread.start()
    video_thread.start()

    return filename




def start_video_recording(filename):

    global video_thread

    video_thread = VideoRecorder()
    video_thread.start()

    return filename


def start_audio_recording(filename):

    global audio_thread

    audio_thread = AudioRecorder()
    audio_thread.start()

    return filename




def stop_AVrecording(filename):

    audio_thread.stop() 
    frame_counts = video_thread.frame_counts
    elapsed_time = time.time() - video_thread.start_time
    recorded_fps = frame_counts / elapsed_time
    print "total frames " + str(frame_counts)
    print "elapsed time " + str(elapsed_time)
    print "recorded fps " + str(recorded_fps)
    video_thread.stop() 

    # Makes sure the threads have finished
    while threading.active_count() > 1:
        time.sleep(1)


#    Merging audio and video signal

    if abs(recorded_fps - 6) >= 0.01:    # If the fps rate was higher/lower than expected, re-encode it to the expected

        print "Re-encoding"
        cmd = "ffmpeg -r " + str(recorded_fps) + " -i temp_video.avi -pix_fmt yuv420p -r 6 temp_video2.avi"
        subprocess.call(cmd, shell=True)

        print "Muxing"
        cmd = "ffmpeg -ac 2 -channel_layout stereo -i temp_audio.wav -i temp_video2.avi -pix_fmt yuv420p " + filename + ".avi"
        subprocess.call(cmd, shell=True)

    else:

        print "Normal recording\nMuxing"
        cmd = "ffmpeg -ac 2 -channel_layout stereo -i temp_audio.wav -i temp_video.avi -pix_fmt yuv420p " + filename + ".avi"
        subprocess.call(cmd, shell=True)

        print ".."




# Required and wanted processing of final files
def file_manager(filename):

    local_path = os.getcwd()

    if os.path.exists(str(local_path) + "/temp_audio.wav"):
        os.remove(str(local_path) + "/temp_audio.wav")

    if os.path.exists(str(local_path) + "/temp_video.avi"):
        os.remove(str(local_path) + "/temp_video.avi")

    if os.path.exists(str(local_path) + "/temp_video2.avi"):
        os.remove(str(local_path) + "/temp_video2.avi")

    if os.path.exists(str(local_path) + "/" + filename + ".avi"):
        os.remove(str(local_path) + "/" + filename + ".avi")

Upvotes: 48

Kevin Hill
Kevin Hill

Reputation: 493

I've been looking around for a good answer to this, and I think it is GStreamer...

The documentation for the python bindings is extremely light, and most of it seemed centered around the old 0.10 version of GStreamer instead of the new 1.X versions, but GStreamer is a extremely powerful, cross-platform multimedia framework that can stream, mux, transcode, and display just about anything.

Upvotes: 3

Tuim
Tuim

Reputation: 2511

I would recommend ffmpeg. There is a python wrapper.

http://code.google.com/p/pyffmpeg/

Upvotes: 5

Related Questions