Reputation: 561
I'm working on a dataset from Kaggle (the one on Human ressources) I am trying to implement some bayesian statistics (logistic regression) but I don't understand how to change the prior on this model (I want the intercept to be an uninformative gaussian and the laplace for all the other predictors)
import pymc3 as pm
priors = {"Intercept": pm.Normal('alpha', mu=0, sd=100),
"Regressor": pm.Laplace('beta', mu=0, b=np.sqrt(2))
}
with pm.Model() as logistic_model:
pm.glm.GLM.from_formula('left ~ satisfaction_level + last_evaluation \
+ number_project + average_montly_hours \
+ time_spend_company + Work_accident + promotion_last_5years\
+ sales + salary', df, family=pm.glm.families.Binomial(), priors = priors)
trace_logistic_model = pm.sample(4000)
And the error message:
TypeError: No model on context stack, which is needed to instantiate distributions. Add variable inside a 'with model:' block, or use the '.dist' syntax for a standalone distribution.
I've tried to use the "dist" but it doesn't work
Thanks for your help
Update:
I have change my code to the following:
with pm.Model() as logistic_model:
priors = {"Intercept": pm.Normal('alpha', mu=0, sd=100),
"Regressor": pm.Laplace('beta', mu=0, b=np.sqrt(2))
}
pm.glm.GLM.from_formula('left ~ satisfaction_level + last_evaluation \
+ number_project + average_montly_hours \
+ time_spend_company + Work_accident + promotion_last_5years\
+ sales + salary', df, family=pm.glm.families.Binomial(), priors = priors)
trace_logistic_model = pm.sample(4000)
Now I Have this error:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-16-e3566f815877> in <module>()
5 }
6
----> 7 pm.glm.GLM.from_formula('left ~ satisfaction_level + last_evaluation + number_project + average_montly_hours + time_spend_company + Work_accident + promotion_last_5years + sales + salary', df, family=pm.glm.families.Binomial(), priors = priors)
8 trace_logistic_model = pm.sample(4000)
/home/glouis/anaconda/lib/python3.5/site-packages/pymc3/glm/linear.py in from_formula(cls, formula, data, priors, vars, family, name, model)
134 labels = x.design_info.column_names
135 return cls(np.asarray(x), np.asarray(y)[:, 0], intercept=False, labels=labels,
--> 136 priors=priors, vars=vars, family=family, name=name, model=model)
137
138 glm = GLM
/home/glouis/anaconda/lib/python3.5/site-packages/pymc3/model.py in __call__(cls, *args, **kwargs)
238 instance = cls.__new__(cls, *args, **kwargs)
239 with instance: # appends context
--> 240 instance.__init__(*args, **kwargs)
241 return instance
242
/home/glouis/anaconda/lib/python3.5/site-packages/pymc3/glm/linear.py in __init__(self, x, y, intercept, labels, priors, vars, family, name, model)
111 super(GLM, self).__init__(
112 x, y, intercept=intercept, labels=labels,
--> 113 priors=priors, vars=vars, name=name, model=model
114 )
115
/home/glouis/anaconda/lib/python3.5/site-packages/pymc3/glm/linear.py in __init__(self, x, y, intercept, labels, priors, vars, name, model)
57 dist=priors.get(
58 name,
---> 59 self.default_intercept_prior
60 )
61 )
/home/glouis/anaconda/lib/python3.5/site-packages/pymc3/model.py in Var(self, name, dist, data, total_size)
750 with self:
751 var = FreeRV(name=name, distribution=dist,
--> 752 total_size=total_size, model=self)
753 self.free_RVs.append(var)
754 else:
/home/glouis/anaconda/lib/python3.5/site-packages/pymc3/model.py in __init__(self, type, owner, index, name, distribution, total_size, model)
1117 if distribution is not None:
1118 self.dshape = tuple(distribution.shape)
-> 1119 self.dsize = int(np.prod(distribution.shape))
1120 self.distribution = distribution
1121 self.tag.test_value = np.ones(
/home/glouis/anaconda/lib/python3.5/site-packages/numpy/core/fromnumeric.py in prod(a, axis, dtype, out, keepdims)
2513 pass
2514 else:
-> 2515 return prod(axis=axis, dtype=dtype, out=out, **kwargs)
2516
2517 return _methods._prod(a, axis=axis, dtype=dtype,
TypeError: prod() got an unexpected keyword argument 'out'
Upvotes: 4
Views: 5141
Reputation: 301
you should try this code
import pymc3 as pm
import numpy as np
df = ...
# pm.Normal('alpha', mu=0, sd=100) is random variable, named `alpha`,
# pm.Normal.dist(...) is distribution object used to create random variables
#
# From https://docs.pymc.io/api/glm.html
#
# use Intercept key for defining Intercept prior
# defaults to Flat.dist()
#
# use Regressor key for defining default prior for all regressors
# defaults to Normal.dist(mu=0, tau=1.0E-6)
priors = {"Intercept": pm.Normal.dist(mu=0, sd=100),
"Regressor": pm.Laplace.dist(mu=0, b=np.sqrt(2))
}
with pm.Model() as logistic_model:
pm.glm.GLM.from_formula('left ~ satisfaction_level + last_evaluation \
+ number_project + average_montly_hours \
+ time_spend_company + Work_accident + promotion_last_5years\
+ sales + salary', df, family=pm.glm.families.Binomial(), priors = priors)
trace_logistic_model = pm.sample(4000)
Upvotes: 1