Reputation: 360
I have a class which opens a large raster image using the GDAL module (https://pypi.org/project/GDAL/) and extracts small images from it at a number of locations, defined by a passed list of coordinate tuples. I want to process a large list of coordinates using Pathos, as in this simplified version of my code:
import gdal
import pathos.pools as pp
class MyClass:
def __init__(self, image):
self.image_object = gdal.Open(image)
def get_small_image(self, coord, size=100):
small_image = self.image_object.ReadAsArray(coord[0], coord[1], size, size)
return small_image
def run_multi(self, coords_in):
pool = pp.ProcessPool(2)
output = pool.map(self.get_small_image, coords_in)
return output
image = r'C:\path\to\image.JP2'
class_obj = MyClass(image)
coords = [(200, 200), (400, 400), (600, 600), (800, 800)]
results = class_obj.run_multi(coords)
print(results)
Running this gives the following error:
Traceback (most recent call last):
File "C:/Users/lharris/PycharmProjects/road_condition_py3/error_recreate.py", line 26, in <module>
results = class_obj.run_multi(coords)
File "C:/Users/lharris/PycharmProjects/road_condition_py3/error_recreate.py", line 17, in run_multi
output = pool.map(self.get_small_image, coords_in)
File "C:\Users\lharris\PycharmProjects\road_condition_py3\venv\lib\site-packages\pathos\multiprocessing.py", line 137, in map
return _pool.map(star(f), zip(*args)) # chunksize
File "C:\Users\lharris\PycharmProjects\road_condition_py3\venv\lib\site-packages\multiprocess\pool.py", line 266, in map
return self._map_async(func, iterable, mapstar, chunksize).get()
File "C:\Users\lharris\PycharmProjects\road_condition_py3\venv\lib\site-packages\multiprocess\pool.py", line 644, in get
raise self._value
File "C:\Users\lharris\PycharmProjects\road_condition_py3\venv\lib\site-packages\multiprocess\pool.py", line 424, in _handle_tasks
put(task)
File "C:\Users\lharris\PycharmProjects\road_condition_py3\venv\lib\site-packages\multiprocess\connection.py", line 209, in send
self._send_bytes(_ForkingPickler.dumps(obj))
File "C:\Users\lharris\PycharmProjects\road_condition_py3\venv\lib\site-packages\multiprocess\reduction.py", line 54, in dumps
cls(buf, protocol).dump(obj)
File "C:\Python36\lib\pickle.py", line 409, in dump
self.save(obj)
File "C:\Python36\lib\pickle.py", line 476, in save
f(self, obj) # Call unbound method with explicit self
File "C:\Python36\lib\pickle.py", line 751, in save_tuple
save(element)
File "C:\Python36\lib\pickle.py", line 476, in save
f(self, obj) # Call unbound method with explicit self
File "C:\Python36\lib\pickle.py", line 736, in save_tuple
save(element)
File "C:\Python36\lib\pickle.py", line 476, in save
f(self, obj) # Call unbound method with explicit self
File "C:\Python36\lib\pickle.py", line 736, in save_tuple
save(element)
File "C:\Python36\lib\pickle.py", line 476, in save
f(self, obj) # Call unbound method with explicit self
File "C:\Users\lharris\PycharmProjects\road_condition_py3\venv\lib\site-packages\dill\_dill.py", line 1377, in save_function
obj.__dict__), obj=obj)
File "C:\Python36\lib\pickle.py", line 610, in save_reduce
save(args)
File "C:\Python36\lib\pickle.py", line 476, in save
f(self, obj) # Call unbound method with explicit self
File "C:\Python36\lib\pickle.py", line 751, in save_tuple
save(element)
File "C:\Python36\lib\pickle.py", line 476, in save
f(self, obj) # Call unbound method with explicit self
File "C:\Python36\lib\pickle.py", line 736, in save_tuple
save(element)
File "C:\Python36\lib\pickle.py", line 476, in save
f(self, obj) # Call unbound method with explicit self
File "C:\Users\lharris\PycharmProjects\road_condition_py3\venv\lib\site-packages\dill\_dill.py", line 1120, in save_cell
pickler.save_reduce(_create_cell, (f,), obj=obj)
File "C:\Python36\lib\pickle.py", line 610, in save_reduce
save(args)
File "C:\Python36\lib\pickle.py", line 476, in save
f(self, obj) # Call unbound method with explicit self
File "C:\Python36\lib\pickle.py", line 736, in save_tuple
save(element)
File "C:\Python36\lib\pickle.py", line 476, in save
f(self, obj) # Call unbound method with explicit self
File "C:\Users\lharris\PycharmProjects\road_condition_py3\venv\lib\site-packages\dill\_dill.py", line 1069, in save_instancemethod0
pickler.save_reduce(MethodType, (obj.__func__, obj.__self__), obj=obj)
File "C:\Python36\lib\pickle.py", line 610, in save_reduce
save(args)
File "C:\Python36\lib\pickle.py", line 476, in save
f(self, obj) # Call unbound method with explicit self
File "C:\Python36\lib\pickle.py", line 736, in save_tuple
save(element)
File "C:\Python36\lib\pickle.py", line 521, in save
self.save_reduce(obj=obj, *rv)
File "C:\Python36\lib\pickle.py", line 634, in save_reduce
save(state)
File "C:\Python36\lib\pickle.py", line 476, in save
f(self, obj) # Call unbound method with explicit self
File "C:\Users\lharris\PycharmProjects\road_condition_py3\venv\lib\site-packages\dill\_dill.py", line 893, in save_module_dict
StockPickler.save_dict(pickler, obj)
File "C:\Python36\lib\pickle.py", line 821, in save_dict
self._batch_setitems(obj.items())
File "C:\Python36\lib\pickle.py", line 852, in _batch_setitems
save(v)
File "C:\Python36\lib\pickle.py", line 521, in save
self.save_reduce(obj=obj, *rv)
File "C:\Python36\lib\pickle.py", line 634, in save_reduce
save(state)
File "C:\Python36\lib\pickle.py", line 476, in save
f(self, obj) # Call unbound method with explicit self
File "C:\Users\lharris\PycharmProjects\road_condition_py3\venv\lib\site-packages\dill\_dill.py", line 893, in save_module_dict
StockPickler.save_dict(pickler, obj)
File "C:\Python36\lib\pickle.py", line 821, in save_dict
self._batch_setitems(obj.items())
File "C:\Python36\lib\pickle.py", line 852, in _batch_setitems
save(v)
File "C:\Python36\lib\pickle.py", line 496, in save
rv = reduce(self.proto)
TypeError: can't pickle SwigPyObject objects
Looking through that, it does look as though Pathos is using Dill and Multiprocess correctly. So is there a way of dealing with SwigPyObjects in parallel, either using Pathos or otherwise?
Something similar was asked a few weeks ago, but didn't get any answers (Python pathos error can't pickle SwigPyObject objects), and I asked a related question when I was previously unable to use Pathos (How can type 'SwigPyObject be registered using copy_reg.pickle in Python?).
I encountered this error using Python 3.6.5, in Windows 7, developing in PyCharm, using a virtual environment onto which any packages I need have been pip installed from .whl files downloaded from PyPi (I'm unable to use pip install normally on my network).
Any help anyone can give me would be great!
Upvotes: 6
Views: 1218
Reputation: 16711
Because get_small_image
is a bound method each worker is needs a copy of class_obj
. With multiprocessing this is done by passing pickled python objects. However because the object contains a member of class SwigPyObject
class_obj
isn't pickable and the whole thing fails.
The most straight forward fix is something like:
class MyClass:
def __init__(self, image):
self.image_object = None
self.image = image
def get_small_image(self, coord, size=100):
if self.image_object is None:
self.image_object = gdal.Open(self.image)
small_image = self.image_object.ReadAsArray(coord[0], coord[1], size, size)
return small_image
def run_multi(self, coords_in):
pool = pp.ProcessPool(2)
output = pool.map(self.get_small_image, coords_in)
return output
This lazy loading of the image causes the SwigPyObject
to be created after the object has been pickled/unpickled by the child processes and thus avoiding the issue you are encountering.
Upvotes: 2