glouis
glouis

Reputation: 561

Change Prior Pymc3 with from_formula

I'm working on a dataset from Kaggle (the one on Human ressources) I am trying to implement some bayesian statistics (logistic regression) but I don't understand how to change the prior on this model (I want the intercept to be an uninformative gaussian and the laplace for all the other predictors)

import pymc3 as pm

priors = {"Intercept": pm.Normal('alpha', mu=0, sd=100),
      "Regressor": pm.Laplace('beta', mu=0, b=np.sqrt(2))
          }

with pm.Model() as logistic_model:
     pm.glm.GLM.from_formula('left ~ satisfaction_level + last_evaluation \
                        + number_project + average_montly_hours \
                        + time_spend_company + Work_accident + promotion_last_5years\
                        + sales + salary', df, family=pm.glm.families.Binomial(), priors = priors)
    trace_logistic_model = pm.sample(4000)

And the error message:

TypeError: No model on context stack, which is needed to instantiate distributions. Add variable inside a 'with model:' block, or use the '.dist' syntax for a standalone distribution.

I've tried to use the "dist" but it doesn't work

Thanks for your help

Update:

I have change my code to the following:

with pm.Model() as logistic_model:

    priors = {"Intercept": pm.Normal('alpha', mu=0, sd=100),
      "Regressor": pm.Laplace('beta', mu=0, b=np.sqrt(2))
          }

    pm.glm.GLM.from_formula('left ~ satisfaction_level + last_evaluation \
                        + number_project + average_montly_hours \
                        + time_spend_company + Work_accident + promotion_last_5years\
                        + sales + salary', df, family=pm.glm.families.Binomial(), priors = priors)
    trace_logistic_model = pm.sample(4000)

Now I Have this error:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-16-e3566f815877> in <module>()
      5               }
      6 
----> 7     pm.glm.GLM.from_formula('left ~ satisfaction_level + last_evaluation                             + number_project + average_montly_hours                             + time_spend_company + Work_accident + promotion_last_5years                            + sales + salary', df, family=pm.glm.families.Binomial(), priors = priors)
      8     trace_logistic_model = pm.sample(4000)

/home/glouis/anaconda/lib/python3.5/site-packages/pymc3/glm/linear.py in from_formula(cls, formula, data, priors, vars, family, name, model)
    134         labels = x.design_info.column_names
    135         return cls(np.asarray(x), np.asarray(y)[:, 0], intercept=False, labels=labels,
--> 136                    priors=priors, vars=vars, family=family, name=name, model=model)
    137 
    138 glm = GLM

/home/glouis/anaconda/lib/python3.5/site-packages/pymc3/model.py in __call__(cls, *args, **kwargs)
    238         instance = cls.__new__(cls, *args, **kwargs)
    239         with instance:  # appends context
--> 240             instance.__init__(*args, **kwargs)
    241         return instance
    242 

/home/glouis/anaconda/lib/python3.5/site-packages/pymc3/glm/linear.py in __init__(self, x, y, intercept, labels, priors, vars, family, name, model)
    111         super(GLM, self).__init__(
    112             x, y, intercept=intercept, labels=labels,
--> 113             priors=priors, vars=vars, name=name, model=model
    114         )
    115 

/home/glouis/anaconda/lib/python3.5/site-packages/pymc3/glm/linear.py in __init__(self, x, y, intercept, labels, priors, vars, name, model)
     57                         dist=priors.get(
     58                             name,
---> 59                             self.default_intercept_prior
     60                         )
     61                     )

/home/glouis/anaconda/lib/python3.5/site-packages/pymc3/model.py in Var(self, name, dist, data, total_size)
    750                 with self:
    751                     var = FreeRV(name=name, distribution=dist,
--> 752                                  total_size=total_size, model=self)
    753                 self.free_RVs.append(var)
    754             else:

/home/glouis/anaconda/lib/python3.5/site-packages/pymc3/model.py in __init__(self, type, owner, index, name, distribution, total_size, model)
   1117         if distribution is not None:
   1118             self.dshape = tuple(distribution.shape)
-> 1119             self.dsize = int(np.prod(distribution.shape))
   1120             self.distribution = distribution
   1121             self.tag.test_value = np.ones(

/home/glouis/anaconda/lib/python3.5/site-packages/numpy/core/fromnumeric.py in prod(a, axis, dtype, out, keepdims)
   2513             pass
   2514         else:
-> 2515             return prod(axis=axis, dtype=dtype, out=out, **kwargs)
   2516 
   2517     return _methods._prod(a, axis=axis, dtype=dtype,

TypeError: prod() got an unexpected keyword argument 'out'

Upvotes: 4

Views: 5141

Answers (1)

Maxim Kochurov
Maxim Kochurov

Reputation: 301

you should try this code

import pymc3 as pm
import numpy as np

df = ...
# pm.Normal('alpha', mu=0, sd=100) is random variable, named `alpha`,
# pm.Normal.dist(...) is distribution object used to create random variables
# 
# From https://docs.pymc.io/api/glm.html
#
# use Intercept key for defining Intercept prior
#    defaults to Flat.dist()
#
# use Regressor key for defining default prior for all regressors
#    defaults to Normal.dist(mu=0, tau=1.0E-6)

priors = {"Intercept": pm.Normal.dist(mu=0, sd=100),
          "Regressor": pm.Laplace.dist(mu=0, b=np.sqrt(2))
          }

with pm.Model() as logistic_model:
    pm.glm.GLM.from_formula('left ~ satisfaction_level + last_evaluation \
                        + number_project + average_montly_hours \
                        + time_spend_company + Work_accident +     promotion_last_5years\
                        + sales + salary', df, family=pm.glm.families.Binomial(), priors = priors)
    trace_logistic_model = pm.sample(4000)

Upvotes: 1

Related Questions