Reputation: 729
I am using Linux. I have an external executable called "combine" and a loop of 20 iterations. Per each iteration, "combine" needs to be called with an argument that depends on the i-th iteration. Example:
arguments = " "
for i in range(1,20):
arguments += str(i) + "_image.jpg "
# begin of pseudo-code
execute: "./combine" + arguments # in parallel using all cores
# pseudo-code continues
wait_for_all_previous_process_to_terminate
execute: "./merge_resized_images" # use all cores - possible for one single command?
How do I achieve this using the multiprocessing module in Python?
Upvotes: 7
Views: 23599
Reputation: 94961
You can use subprocess.Popen
to launch the external commands asynchronously, and store each Popen
object returned in a list. Once you've launched all the processes, just iterate over them and wait for each to finish using popen_object.wait
.
from subprocess import Popen
processes = []
for i in range(1,20):
arguments += str(i) + "_image.jpg "
processes.append(subprocess.Popen(shlex.split("./combine" + arguments)))
for p in processes:
p.wait()
subprocess.call("./merge_resized_images")
However, this will launch twenty concurrent processes, which is probably going to hurt performance.
To avoid that, you can use a ThreadPool
to limit yourself to some lower number of concurrent processes (multiprocessing.cpu_count
is a good number), and then use pool.join
to wait for them all to finish.
import multiprocessing
import subprocess
import shlex
from multiprocessing.pool import ThreadPool
def call_proc(cmd):
""" This runs in a separate thread. """
#subprocess.call(shlex.split(cmd)) # This will block until cmd finishes
p = subprocess.Popen(shlex.split(cmd), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out, err = p.communicate()
return (out, err)
pool = ThreadPool(multiprocessing.cpu_count())
results = []
for i in range(1,20):
arguments += str(i) + "_image.jpg "
results.append(pool.apply_async(call_proc, ("./combine" + arguments,)))
# Close the pool and wait for each running task to complete
pool.close()
pool.join()
for result in results:
out, err = result.get()
print("out: {} err: {}".format(out, err))
subprocess.call("./merge_resized_images")
Each thread will release the GIL while waiting for the subprocess to complete, so they'll all run in parallel.
Upvotes: 25
Reputation: 43533
My solution to this problem is to create and manage a list of subprocesses. Pay special attention to startencoder
and manageprocs
. That is where the actual work is being started and managed.
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
#
# Author: R.F. Smith
# $Date: 2014-02-15 14:44:31 +0100 $
#
# To the extent possible under law, Roland Smith has waived all copyright and
# related or neighboring rights to vid2mkv.py. This work is published from the
# Netherlands. See http://creativecommons.org/publicdomain/zero/1.0/
"""Convert all video files given on the command line to Theora/Vorbis streams
in a Matroska container."""
from __future__ import print_function, division
__version__ = '$Revision: a42ef58 $'[11:-2]
import os
import sys
import subprocess
from multiprocessing import cpu_count
from time import sleep
def warn(s):
"""Print a warning message.
:param s: Message string
"""
s = ' '.join(['Warning:', s])
print(s, file=sys.stderr)
def checkfor(args, rv=0):
"""Make sure that a program necessary for using this script is
available.
:param args: String or list of strings of commands. A single string may
not contain spaces.
:param rv: Expected return value from evoking the command.
"""
if isinstance(args, str):
if ' ' in args:
raise ValueError('no spaces in single command allowed')
args = [args]
try:
with open(os.devnull, 'w') as bb:
rc = subprocess.call(args, stdout=bb, stderr=bb)
if rc != rv:
raise OSError
except OSError as oops:
outs = "Required program '{}' not found: {}."
print(outs.format(args[0], oops.strerror))
sys.exit(1)
def startencoder(fname):
"""Use ffmpeg to convert a video file to Theora/Vorbis
streams in a Matroska container.
:param fname: Name of the file to convert.
:returns: a 3-tuple of a Process, input path and output path
"""
basename, ext = os.path.splitext(fname)
known = ['.mp4', '.avi', '.wmv', '.flv', '.mpg', '.mpeg', '.mov', '.ogv']
if ext.lower() not in known:
warn("File {} has unknown extension, ignoring it.".format(fname))
return (None, fname, None)
ofn = basename + '.mkv'
args = ['ffmpeg', '-i', fname, '-c:v', 'libtheora', '-q:v', '6', '-c:a',
'libvorbis', '-q:a', '3', '-sn', ofn]
with open(os.devnull, 'w') as bitbucket:
try:
p = subprocess.Popen(args, stdout=bitbucket, stderr=bitbucket)
print("Conversion of {} to {} started.".format(fname, ofn))
except:
warn("Starting conversion of {} failed.".format(fname))
return (p, fname, ofn)
def manageprocs(proclist):
"""Check a list of subprocesses tuples for processes that have ended and
remove them from the list.
:param proclist: a list of (process, input filename, output filename)
tuples.
"""
print('# of conversions running: {}\r'.format(len(proclist)), end='')
sys.stdout.flush()
for p in proclist:
pr, ifn, ofn = p
if pr is None:
proclist.remove(p)
elif pr.poll() is not None:
print('Conversion of {} to {} finished.'.format(ifn, ofn))
proclist.remove(p)
sleep(0.5)
def main(argv):
"""Main program.
:param argv: command line arguments
"""
if len(argv) == 1:
binary = os.path.basename(argv[0])
print("{} version {}".format(binary, __version__), file=sys.stderr)
print("Usage: {} [file ...]".format(binary), file=sys.stderr)
sys.exit(0)
checkfor(['ffmpeg', '-version'])
avis = argv[1:]
procs = []
maxprocs = cpu_count()
for ifile in avis:
while len(procs) == maxprocs:
manageprocs(procs)
procs.append(startencoder(ifile))
while len(procs) > 0:
manageprocs(procs)
if __name__ == '__main__':
main(sys.argv)
Upvotes: 1