user8400607
user8400607

Reputation: 55

GridSearchCV crashes when n_job=... is specified. PicklingError: Could not pickle the task to send it to the workers

I am running a GridSearchCV with tensorflow.keras.wrappers.scikit_learn KerasClassifier. If I specify the parameter n_job=.. I get the Pickling error, even after importing Dill and Pathos packages as suggested by someone in another post. Here is my code:

    from sklearn.model_selection import GridSearchCV
    from tensorflow.keras.wrappers.scikit_learn import KerasClassifier

    import tensorflow.keras.optimizers
    import numpy
    import dill as pickle
    from pathos.multiprocessing import ProcessingPool as Pool 

    # Function to create model, required for KerasClassifier
    def create_model_SGD(neurons = 1, learning_rate=0.1, momentum=0.0):
        model = Sequential()
        model.add(Conv2D(32, kernel_size = (3, 3), activation = 'relu', input_shape = 
                  input_shape, padding = 'same'))
        model.add(BatchNormalization())
        model.add(MaxPooling2D(pool_size = (2, 2)))
        model.add(BatchNormalization())
        model.add(Conv2D(64, kernel_size = (3, 3), activation = 'relu', padding = 'same'))
        model.add(BatchNormalization())
        model.add(MaxPooling2D(pool_size = (2, 2)))
        model.add(BatchNormalization())
        model.add(Flatten())
        model.add(Dense(neurons, activation = 'relu'))
        model.add(Dropout(rate = 0.2))
        model.add(BatchNormalization())


        model.add(Dense(no_classes, activation = 'softmax'))

        # compilation of the model
        model.compile(loss=tensorflow.keras.losses.sparse_categorical_crossentropy,
          optimizer=tensorflow.keras.optimizers.SGD(learning_rate=learning_rate, momentum=momentum),
          metrics=['accuracy'])    
        return model

    # fix random seed for reproducibility
    seed = 7
    tensorflow.random.set_seed(seed)

    # create model
    model = KerasClassifier(build_fn=create_model_SGD, verbose=0)

    # define the grid search parameters
    learn_rate = [0.001, 0.01, 0.1]
    momentum = [0.0, 0.5, 0.9]
    neurons = [256, 512, 1024]
    batch_size = [100, 250, 350]
    epochs = [10, 25, 50]

    param_grid = dict(neurons=neurons, learning_rate=learn_rate, momentum=momentum,
             batch_size=batch_size, epochs=epochs)

    grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3, verbose = 1)

    grid_result = grid.fit(input_train, target_train)

This is the output I get when I execute the grid.fit command:

Fitting 3 folds for each of 243 candidates, totalling 729 fits
---------------------------------------------------------------------------
_RemoteTraceback                          Traceback (most recent call last)
_RemoteTraceback: 
"""
Traceback (most recent call last):
  File "/Users/gpc/opt/anaconda3/envs/tensor/lib/python3.7/site-packages/joblib/externals/loky/backend/queues.py", line 153, in _feed
    obj_ = dumps(obj, reducers=reducers)
  File "/Users/gpc/opt/anaconda3/envs/tensor/lib/python3.7/site-packages/joblib/externals/loky/backend/reduction.py", line 271, in dumps
    dump(obj, buf, reducers=reducers, protocol=protocol)
  File "/Users/gpc/opt/anaconda3/envs/tensor/lib/python3.7/site-packages/joblib/externals/loky/backend/reduction.py", line 264, in dump
    _LokyPickler(file, reducers=reducers, protocol=protocol).dump(obj)
  File "/Users/gpc/opt/anaconda3/envs/tensor/lib/python3.7/site-packages/joblib/externals/cloudpickle/cloudpickle_fast.py", line 602, in dump
    return Pickler.dump(self, obj)
  File "/Users/gpc/opt/anaconda3/envs/tensor/lib/python3.7/pickle.py", line 437, in dump
    self.save(obj)
  File "/Users/gpc/opt/anaconda3/envs/tensor/lib/python3.7/pickle.py", line 549, in save
    self.save_reduce(obj=obj, *rv)
  File "/Users/gpc/opt/anaconda3/envs/tensor/lib/python3.7/pickle.py", line 662, in save_reduce
    save(state)
  File "/Users/gpc/opt/anaconda3/envs/tensor/lib/python3.7/pickle.py", line 504, in save
    f(self, obj) # Call unbound method with explicit self
  File "/Users/gpc/opt/anaconda3/envs/tensor/lib/python3.7/pickle.py", line 859, in save_dict
    self._batch_setitems(obj.items())
  File "/Users/gpc/opt/anaconda3/envs/tensor/lib/python3.7/pickle.py", line 885, in _batch_setitems
    save(v)
  File "/Users/gpc/opt/anaconda3/envs/tensor/lib/python3.7/pickle.py", line 549, in save
    self.save_reduce(obj=obj, *rv)
  File "/Users/gpc/opt/anaconda3/envs/tensor/lib/python3.7/pickle.py", line 662, in save_reduce
    save(state)
  File "/Users/gpc/opt/anaconda3/envs/tensor/lib/python3.7/pickle.py", line 504, in save
    f(self, obj) # Call unbound method with explicit self
  File "/Users/gpc/opt/anaconda3/envs/tensor/lib/python3.7/pickle.py", line 859, in save_dict
    self._batch_setitems(obj.items())
  File "/Users/gpc/opt/anaconda3/envs/tensor/lib/python3.7/pickle.py", line 890, in _batch_setitems
    save(v)
  File "/Users/gpc/opt/anaconda3/envs/tensor/lib/python3.7/pickle.py", line 549, in save
    self.save_reduce(obj=obj, *rv)
  File "/Users/gpc/opt/anaconda3/envs/tensor/lib/python3.7/pickle.py", line 638, in save_reduce
    save(args)
  File "/Users/gpc/opt/anaconda3/envs/tensor/lib/python3.7/pickle.py", line 504, in save
    f(self, obj) # Call unbound method with explicit self
  File "/Users/gpc/opt/anaconda3/envs/tensor/lib/python3.7/pickle.py", line 789, in save_tuple
    save(element)
  File "/Users/gpc/opt/anaconda3/envs/tensor/lib/python3.7/pickle.py", line 504, in save
    f(self, obj) # Call unbound method with explicit self
  File "/Users/gpc/opt/anaconda3/envs/tensor/lib/python3.7/pickle.py", line 819, in save_list
    self._batch_appends(obj)
  File "/Users/gpc/opt/anaconda3/envs/tensor/lib/python3.7/pickle.py", line 846, in _batch_appends
    save(tmp[0])
  File "/Users/gpc/opt/anaconda3/envs/tensor/lib/python3.7/pickle.py", line 504, in save
    f(self, obj) # Call unbound method with explicit self
  File "/Users/gpc/opt/anaconda3/envs/tensor/lib/python3.7/pickle.py", line 774, in save_tuple
    save(element)
  File "/Users/gpc/opt/anaconda3/envs/tensor/lib/python3.7/pickle.py", line 504, in save
    f(self, obj) # Call unbound method with explicit self
  File "/Users/gpc/opt/anaconda3/envs/tensor/lib/python3.7/pickle.py", line 774, in save_tuple
    save(element)
  File "/Users/gpc/opt/anaconda3/envs/tensor/lib/python3.7/pickle.py", line 549, in save
    self.save_reduce(obj=obj, *rv)
  File "/Users/gpc/opt/anaconda3/envs/tensor/lib/python3.7/pickle.py", line 662, in save_reduce
    save(state)
  File "/Users/gpc/opt/anaconda3/envs/tensor/lib/python3.7/pickle.py", line 504, in save
    f(self, obj) # Call unbound method with explicit self
  File "/Users/gpc/opt/anaconda3/envs/tensor/lib/python3.7/pickle.py", line 859, in save_dict
    self._batch_setitems(obj.items())
  File "/Users/gpc/opt/anaconda3/envs/tensor/lib/python3.7/pickle.py", line 885, in _batch_setitems
    save(v)
  File "/Users/gpc/opt/anaconda3/envs/tensor/lib/python3.7/pickle.py", line 504, in save
    f(self, obj) # Call unbound method with explicit self
  File "/Users/gpc/opt/anaconda3/envs/tensor/lib/python3.7/site-packages/joblib/externals/cloudpickle/cloudpickle_fast.py", line 784, in save_function
    *self._dynamic_function_reduce(obj), obj=obj
  File "/Users/gpc/opt/anaconda3/envs/tensor/lib/python3.7/site-packages/joblib/externals/cloudpickle/cloudpickle_fast.py", line 726, in _save_reduce_pickle5
    save(state)
  File "/Users/gpc/opt/anaconda3/envs/tensor/lib/python3.7/pickle.py", line 504, in save
    f(self, obj) # Call unbound method with explicit self
  File "/Users/gpc/opt/anaconda3/envs/tensor/lib/python3.7/pickle.py", line 774, in save_tuple
    save(element)
  File "/Users/gpc/opt/anaconda3/envs/tensor/lib/python3.7/pickle.py", line 504, in save
    f(self, obj) # Call unbound method with explicit self
  File "/Users/gpc/opt/anaconda3/envs/tensor/lib/python3.7/pickle.py", line 859, in save_dict
    self._batch_setitems(obj.items())
  File "/Users/gpc/opt/anaconda3/envs/tensor/lib/python3.7/pickle.py", line 885, in _batch_setitems
    save(v)
  File "/Users/gpc/opt/anaconda3/envs/tensor/lib/python3.7/pickle.py", line 504, in save
    f(self, obj) # Call unbound method with explicit self
  File "/Users/gpc/opt/anaconda3/envs/tensor/lib/python3.7/pickle.py", line 819, in save_list
    self._batch_appends(obj)
  File "/Users/gpc/opt/anaconda3/envs/tensor/lib/python3.7/pickle.py", line 843, in _batch_appends
    save(x)
  File "/Users/gpc/opt/anaconda3/envs/tensor/lib/python3.7/pickle.py", line 524, in save
    rv = reduce(self.proto)
TypeError: can't pickle _LazyLoader objects
"""

The above exception was the direct cause of the following exception:

PicklingError                             Traceback (most recent call last)
/var/folders/js/0000d_gn7_q1nxvx4_72y48c0000gn/T/ipykernel_1331/3195828411.py in <module>
----> 1 grid_result = grid.fit(input_train, target_train)

~/opt/anaconda3/envs/tensor/lib/python3.7/site-packages/sklearn/model_selection/_search.py in fit(self, X, y, groups, **fit_params)
    889                 return results
    890 
--> 891             self._run_search(evaluate_candidates)
    892 
    893             # multimetric is determined here because in the case of a callable

~/opt/anaconda3/envs/tensor/lib/python3.7/site-packages/sklearn/model_selection/_search.py in _run_search(self, evaluate_candidates)
   1390     def _run_search(self, evaluate_candidates):
   1391         """Search all candidates in param_grid"""
-> 1392         evaluate_candidates(ParameterGrid(self.param_grid))
   1393 
   1394 

~/opt/anaconda3/envs/tensor/lib/python3.7/site-packages/sklearn/model_selection/_search.py in evaluate_candidates(candidate_params, cv, more_results)
    849                     )
    850                     for (cand_idx, parameters), (split_idx, (train, test)) in product(
--> 851                         enumerate(candidate_params), enumerate(cv.split(X, y, groups))
    852                     )
    853                 )

~/opt/anaconda3/envs/tensor/lib/python3.7/site-packages/joblib/parallel.py in __call__(self, iterable)
   1054 
   1055             with self._backend.retrieval_context():
-> 1056                 self.retrieve()
   1057             # Make sure that we get a last message telling us we are done
   1058             elapsed_time = time.time() - self._start_time

~/opt/anaconda3/envs/tensor/lib/python3.7/site-packages/joblib/parallel.py in retrieve(self)
    933             try:
    934                 if getattr(self._backend, 'supports_timeout', False):
--> 935                     self._output.extend(job.get(timeout=self.timeout))
    936                 else:
    937                     self._output.extend(job.get())

~/opt/anaconda3/envs/tensor/lib/python3.7/site-packages/joblib/_parallel_backends.py in wrap_future_result(future, timeout)
    540         AsyncResults.get from multiprocessing."""
    541         try:
--> 542             return future.result(timeout=timeout)
    543         except CfTimeoutError as e:
    544             raise TimeoutError from e

~/opt/anaconda3/envs/tensor/lib/python3.7/concurrent/futures/_base.py in result(self, timeout)
    426                 raise CancelledError()
    427             elif self._state == FINISHED:
--> 428                 return self.__get_result()
    429 
    430             self._condition.wait(timeout)

~/opt/anaconda3/envs/tensor/lib/python3.7/concurrent/futures/_base.py in __get_result(self)
    382     def __get_result(self):
    383         if self._exception:
--> 384             raise self._exception
    385         else:
    386             return self._result

PicklingError: Could not pickle the task to send it to the workers.

The command executes if I eliminate the n_jobs= parameter, but the whole thing runs only in one process. Is there a way to parallelise the execution? I am using anaconda, jupyter notebook on M1 Mac.

Upvotes: 1

Views: 768

Answers (1)

Paloha
Paloha

Reputation: 690

You may want to use scikeras package and follow this tutorial. Simplified example:

from scikeras.wrappers import KerasClassifier
...
model = KerasClassifier(model=create_model, epochs=100, batch_size=10, verbose=0)
...
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)

Upvotes: 2

Related Questions