Commit 72006fe0 authored by Luke Campagnola's avatar Luke Campagnola
Browse files

Added custom multiprocessing module:

  - allows starting new processes and controlling them remotely from the parent process
  - remote processes can run their own GUI, Qt signals can be connected between processes
    (in general this is not possible with the built-in multiprocessing module due to
    the use of fork() ).
  - Control works by a system of proxy-objects such that controlling a remote process
    looks almost exactly like working with local objects.
  - Uses sockets to communicate between processes (so in theory could be made to 
    work over a network), but also includes a mode that uses fork() to allow fast
    parallelization.
  - Wicked-easy inline parallelization by adding only one line of code to break up work between
    processes (requires fork; sorry windows users)
parent c7a78642
# -*- coding: utf-8 -*-
import initExample ## Add path to library (just for examples; you do not need this)
import numpy as np
import pyqtgraph.multiprocess as mp
from pyqtgraph.multiprocess.parallelizer import Parallelize #, Parallelizer
import time
print "\n=================\nParallelize"
tasks = [1,2,4,8]
results = [None] * len(tasks)
size = 2000000
start = time.time()
with Parallelize(enumerate(tasks), results=results, workers=1) as tasker:
for i, x in tasker:
print i, x
tot = 0
for j in xrange(size):
tot += j * x
results[i] = tot
print results
print "serial:", time.time() - start
start = time.time()
with Parallelize(enumerate(tasks), results=results) as tasker:
for i, x in tasker:
print i, x
tot = 0
for j in xrange(size):
tot += j * x
results[i] = tot
print results
print "parallel:", time.time() - start
print "\n=================\nStart Process"
proc = mp.Process()
import os
print "parent:", os.getpid(), "child:", proc.proc.pid
print "started"
rnp = proc._import('numpy')
arr = rnp.array([1,2,3,4])
print repr(arr)
print str(arr)
print "return value:", repr(arr.mean(_returnType='value'))
print "return proxy:", repr(arr.mean(_returnType='proxy'))
print "return auto: ", repr(arr.mean(_returnType='auto'))
proc.join()
print "process finished"
print "\n=================\nStart ForkedProcess"
proc = mp.ForkedProcess()
rnp = proc._import('numpy')
arr = rnp.array([1,2,3,4])
print repr(arr)
print str(arr)
print repr(arr.mean())
proc.join()
print "process finished"
import pyqtgraph as pg
from pyqtgraph.Qt import QtCore, QtGui
app = pg.QtGui.QApplication([])
print "\n=================\nStart QtProcess"
proc = mp.QtProcess()
d1 = proc.transfer(np.random.normal(size=1000))
d2 = proc.transfer(np.random.normal(size=1000))
rpg = proc._import('pyqtgraph')
plt = rpg.plot(d1+d2)
## Start Qt event loop unless running in interactive mode or using pyside.
#import sys
#if (sys.flags.interactive != 1) or not hasattr(QtCore, 'PYQT_VERSION'):
#QtGui.QApplication.instance().exec_()
"""
Multiprocessing utility library
(parallelization done the way I like it)
Luke Campagnola
2012.06.10
This library provides:
- simple mechanism for starting a new python interpreter process that can be controlled from the original process
(this allows, for example, displaying and manipulating plots in a remote process
while the parent process is free to do other work)
- proxy system that allows objects hosted in the remote process to be used as if they were local
- Qt signal connection between processes
- very simple in-line parallelization (fork only; does not work on windows) for number-crunching
TODO:
allow remote processes to serve as rendering engines that pass pixmaps back to the parent process for display
(RemoteGraphicsView class)
"""
from processes import *
import os, sys, time, multiprocessing
from processes import ForkedProcess
from remoteproxy import ExitError
class Parallelize:
"""
Class for ultra-simple inline parallelization on multi-core CPUs
Example::
## Here is the serial (single-process) task:
tasks = [1, 2, 4, 8]
results = []
for task in tasks:
result = processTask(task)
results.append(result)
print results
## Here is the parallelized version:
tasks = [1, 2, 4, 8]
results = []
with Parallelize(tasks, workers=4, results=results) as tasker:
for task in tasker:
result = processTask(task)
tasker.results.append(result)
print results
The only major caveat is that *result* in the example above must be picklable.
"""
def __init__(self, tasks, workers=None, block=True, **kwds):
"""
Args:
tasks - list of objects to be processed (Parallelize will determine how to distribute the tasks)
workers - number of worker processes or None to use number of CPUs in the system
kwds - objects to be shared by proxy with child processes
"""
self.block = block
if workers is None:
workers = multiprocessing.cpu_count()
if not hasattr(os, 'fork'):
workers = 1
self.workers = workers
self.tasks = list(tasks)
self.kwds = kwds
def __enter__(self):
self.proc = None
workers = self.workers
if workers == 1:
return Tasker(None, self.tasks, self.kwds)
self.childs = []
## break up tasks into one set per worker
chunks = [[] for i in xrange(workers)]
i = 0
for i in range(len(self.tasks)):
chunks[i%workers].append(self.tasks[i])
## fork and assign tasks to each worker
for i in range(workers):
proc = ForkedProcess(target=None, preProxy=self.kwds)
if not proc.isParent:
self.proc = proc
return Tasker(proc, chunks[i], proc.forkedProxies)
else:
self.childs.append(proc)
## process events from workers until all have exited.
activeChilds = self.childs[:]
while len(activeChilds) > 0:
for ch in activeChilds:
rem = []
try:
ch.processRequests()
except ExitError:
rem.append(ch)
for ch in rem:
activeChilds.remove(ch)
time.sleep(0.1)
return [] ## no tasks for parent process.
def __exit__(self, *exc_info):
if exc_info[0] is not None:
sys.excepthook(*exc_info)
if self.proc is not None:
os._exit(0)
def wait(self):
## wait for all child processes to finish
pass
class Tasker:
def __init__(self, proc, tasks, kwds):
self.proc = proc
self.tasks = tasks
for k, v in kwds.iteritems():
setattr(self, k, v)
def __iter__(self):
## we could fix this up such that tasks are retrieved from the parent process one at a time..
for task in self.tasks:
yield task
if self.proc is not None:
self.proc.close()
#class Parallelizer:
#"""
#Use::
#p = Parallelizer()
#with p(4) as i:
#p.finish(do_work(i))
#print p.results()
#"""
#def __init__(self):
#pass
#def __call__(self, n):
#self.replies = []
#self.conn = None ## indicates this is the parent process
#return Session(self, n)
#def finish(self, data):
#if self.conn is None:
#self.replies.append((self.i, data))
#else:
##print "send", self.i, data
#self.conn.send((self.i, data))
#os._exit(0)
#def result(self):
#print self.replies
#class Session:
#def __init__(self, par, n):
#self.par = par
#self.n = n
#def __enter__(self):
#self.childs = []
#for i in range(1, self.n):
#c1, c2 = multiprocessing.Pipe()
#pid = os.fork()
#if pid == 0: ## child
#self.par.i = i
#self.par.conn = c2
#self.childs = None
#c1.close()
#return i
#else:
#self.childs.append(c1)
#c2.close()
#self.par.i = 0
#return 0
#def __exit__(self, *exc_info):
#if exc_info[0] is not None:
#sys.excepthook(*exc_info)
#if self.childs is not None:
#self.par.replies.extend([conn.recv() for conn in self.childs])
#else:
#self.par.finish(None)
from remoteproxy import RemoteEventHandler, ExitError, NoResultError, LocalObjectProxy, ObjectProxy
import subprocess, atexit, os, sys, time, random, socket
import cPickle as pickle
import multiprocessing.connection
class Process(RemoteEventHandler):
def __init__(self, name=None, target=None):
if target is None:
target = startEventLoop
if name is None:
name = str(self)
## random authentication key
authkey = ''.join([chr(random.getrandbits(7)) for i in range(20)])
## Listen for connection from remote process (and find free port number)
port = 10000
while True:
try:
l = multiprocessing.connection.Listener(('localhost', int(port)), authkey=authkey)
break
except socket.error as ex:
if ex.errno != 98:
raise
port += 1
## start remote process, instruct it to run target function
self.proc = subprocess.Popen((sys.executable, __file__, 'remote'), stdin=subprocess.PIPE)
pickle.dump((name+'_child', port, authkey, target), self.proc.stdin)
self.proc.stdin.close()
## open connection for remote process
conn = l.accept()
RemoteEventHandler.__init__(self, conn, name+'_parent', pid=self.proc.pid)
atexit.register(self.join)
def join(self, timeout=10):
if self.proc.poll() is None:
self.close()
start = time.time()
while self.proc.poll() is None:
if timeout is not None and time.time() - start > timeout:
raise Exception('Timed out waiting for remote process to end.')
time.sleep(0.05)
def startEventLoop(name, port, authkey):
conn = multiprocessing.connection.Client(('localhost', int(port)), authkey=authkey)
global HANDLER
HANDLER = RemoteEventHandler(conn, name, os.getppid())
while True:
try:
HANDLER.processRequests() # exception raised when the loop should exit
time.sleep(0.01)
except ExitError:
break
class ForkedProcess(RemoteEventHandler):
"""
ForkedProcess is a substitute for Process that uses os.fork() to generate a new process.
This is much faster than starting a completely new interpreter, but carries some caveats
and limitations:
- open file handles are shared with the parent process, which is potentially dangerous
- it is not possible to have a QApplication in both parent and child process
(unless both QApplications are created _after_ the call to fork())
- generally not thread-safe. Also, threads are not copied by fork(); the new process
will have only one thread that starts wherever fork() was called in the parent process.
- forked processes are unceremoniously terminated when join() is called; they are not
given any opportunity to clean up. (This prevents them calling any cleanup code that
was only intended to be used by the parent process)
"""
def __init__(self, name=None, target=0, preProxy=None):
"""
When initializing, an optional target may be given.
If no target is specified, self.eventLoop will be used.
If None is given, no target will be called (and it will be up
to the caller to properly shut down the forked process)
preProxy may be a dict of values that will appear as ObjectProxy
in the remote process (but do not need to be sent explicitly since
they are available immediately before the call to fork().
Proxies will be availabe as self.proxies[name].
"""
self.hasJoined = False
if target == 0:
target = self.eventLoop
if name is None:
name = str(self)
conn, remoteConn = multiprocessing.Pipe()
proxyIDs = {}
if preProxy is not None:
for k, v in preProxy.iteritems():
proxyId = LocalObjectProxy.registerObject(v)
proxyIDs[k] = proxyId
pid = os.fork()
if pid == 0:
self.isParent = False
conn.close()
sys.stdin.close() ## otherwise we screw with interactive prompts.
RemoteEventHandler.__init__(self, remoteConn, name+'_child', pid=os.getppid())
if target is not None:
target()
ppid = os.getppid()
self.forkedProxies = {}
for name, proxyId in proxyIDs.iteritems():
self.forkedProxies[name] = ObjectProxy(ppid, proxyId=proxyId, typeStr=repr(preProxy[name]))
else:
self.isParent = True
self.childPid = pid
remoteConn.close()
RemoteEventHandler.handlers = {} ## don't want to inherit any of this from the parent.
RemoteEventHandler.__init__(self, conn, name+'_parent', pid=pid)
atexit.register(self.join)
def eventLoop(self):
while True:
try:
self.processRequests() # exception raised when the loop should exit
time.sleep(0.01)
except ExitError:
sys.exit(0)
except:
print "Error occurred in forked event loop:"
sys.excepthook(*sys.exc_info())
def join(self, timeout=10):
if self.hasJoined:
return
#os.kill(pid, 9)
try:
self.close(callSync='sync', timeout=timeout, noCleanup=True) ## ask the child process to exit and require that it return a confirmation.
except IOError: ## probably remote process has already quit
pass
self.hasJoined = True
##Special set of subclasses that implement a Qt event loop instead.
class RemoteQtEventHandler(RemoteEventHandler):
def __init__(self, *args, **kwds):
RemoteEventHandler.__init__(self, *args, **kwds)
def startEventTimer(self):
from pyqtgraph.Qt import QtGui, QtCore
self.timer = QtCore.QTimer()
self.timer.timeout.connect(self.processRequests)
self.timer.start(10)
def processRequests(self):
try:
RemoteEventHandler.processRequests(self)
except ExitError:
from pyqtgraph.Qt import QtGui, QtCore
QtGui.QApplication.instance().quit()
self.timer.stop()
#raise
class QtProcess(Process):
def __init__(self, name=None):
Process.__init__(self, name, target=startQtEventLoop)
self.startEventTimer()
def startEventTimer(self):
from pyqtgraph.Qt import QtGui, QtCore ## avoid module-level import to keep bootstrap snappy.
self.timer = QtCore.QTimer()
app = QtGui.QApplication.instance()
if app is None:
raise Exception("Must create QApplication before starting QtProcess")
self.timer.timeout.connect(self.processRequests)
self.timer.start(10)
def processRequests(self):
try:
Process.processRequests(self)
except ExitError:
self.timer.stop()
def startQtEventLoop(name, port, authkey):
conn = multiprocessing.connection.Client(('localhost', int(port)), authkey=authkey)
from pyqtgraph.Qt import QtGui, QtCore
#from PyQt4 import QtGui, QtCore
app = QtGui.QApplication.instance()
#print app
if app is None:
app = QtGui.QApplication([])
app.setQuitOnLastWindowClosed(False) ## generally we want the event loop to stay open
## until it is explicitly closed by the parent process.
global HANDLER
HANDLER = RemoteQtEventHandler(conn, name, os.getppid())
HANDLER.startEventTimer()
app.exec_()
if __name__ == '__main__':
if len(sys.argv) == 2 and sys.argv[1] == 'remote': ## module has been invoked as script in new python interpreter.
name, port, authkey, target = pickle.load(sys.stdin)
target(name, port, authkey)
sys.exit(0)
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment