Reputation: 474
I currently have this piece of code
def predict(features):
probabilities = _classifier.predict_proba(features * _weights).ravel()
label = np.argmax(probabilities)
margin = 2 * probabilities[label] - np.sum(probabilities)
return label, margin
def run_estimator(weights, X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test, optimization=True):
classifier = get_classifier(weights, X_train=X_train, y_train=y_train)
global _classifier
global _weights
_classifier = classifier
_weights = weights
with Pool(processes=8) as pool:
labels, margins = zip(*pool.imap(predict, X_test))
I want to pass the classifier
and weights
instances to predict
method directly, without global variables and creating one big iterable to pass to imap
. How can I achieve that in the cleanest way?
Note: creating a function inside the run_estimator
method will not work since the callable passed to imap
should pickable, and only functions declared at the root level are.
Upvotes: 0
Views: 426
Reputation: 27575
Maybe like that, defining a partial function predictor
for each couple of instances _classifier
and _weights
:
def predict(features,_classifier,_weights):
probabilities = _classifier.predict_proba(features * _weights).ravel()
label = np.argmax(probabilities)
margin = 2 * probabilities[label] - np.sum(probabilities)
return label, margin
from functools import partial
def run_estimator(weights,
X_train=X_train,
y_train=y_train,
X_test=X_test,
y_test=y_test,
optimization=True,
partial = partial
predict = predict):
classifier = get_classifier(weights,
X_train=X_train,
y_train=y_train)
predictor = partial(predict,
_classifier = classifier,
_weights = weights)
with Pool(processes=8) as pool:
labels, margins = zip(*pool.imap(predictor, X_test))
.
Tested on a simplified example, the following solution takes 22 % of time less than the preceding one:
def predict(features,_classifier=None,_weights=None):
probabilities = _classifier.predict_proba(features * _weights).ravel()
label = np.argmax(probabilities)
margin = 2 * probabilities[label] - np.sum(probabilities)
return label, margin
def run_estimator(weights,
X_train=X_train,
y_train=y_train,
X_test=X_test,
y_test=y_test,
optimization=True,
predict = predict):
classifier = get_classifier(weights,
X_train=X_train,
y_train=y_train)
predict.func_defaults = (classifier, weights)
with Pool(processes=8) as pool:
labels, margins = zip(*pool.imap(predict, X_test))
Upvotes: 2
Reputation: 9953
Wouldn't this work:
def predict(fcw):
features, classifier, weights = fcw
probabilities = classifier.predict_proba(features * weights).ravel()
label = np.argmax(probabilities)
margin = 2 * probabilities[label] - np.sum(probabilities)
return label, margin
def run_estimator(weights, X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test, optimization=True):
classifier = get_classifier(weights, X_train=X_train, y_train=y_train)
classifier = classifier
weights = weights
wrapped_predict = lambda x: predict(x, classifier, weights)
with Pool(processes=8) as pool:
labels, margins = zip(*pool.imap(wrapped_predict, (X_test, classifier, weights)))
Upvotes: 1