Reputation: 399
I am trying to estimate the following model
where I provide uniform priors and I code the likelihood
. The latter comes from this paper and goes as follows:
In the theano/pymc3 implementation I am computing the first and second term on the rhs in first_term
and second_term
. Finally logp
sum over the entire sample.
Theano, on his own is producing some output yet when I integrate it in a pymc3 model the following error appears:
TypeError: ('Bad input argument to theano function with name "<ipython-input-90-a5304bf41c50>:27" at index 0(0-based)', 'Expected an array-like object, but found a Variable: maybe you are trying to call a function on a (possibly shared) variable instead of a numeric array?')
I think the problem is how to supply pymc3 variables to theano.
from pymc3 import Model, Uniform, DensityDist
import theano.tensor as T
import theano
import numpy as np
p_test, theta_test = .1, .1
X = np.asarray([[1,2,3],[1,2,3]])
### theano test
theano.config.compute_test_value = 'off'
obss = T.matrix('obss')
p, theta = T.scalar('p'), T.scalar('theta')
def first_term(obs, p, theta):
x, tx, n = obs[0], obs[1], obs[2]
first_comp = p ** x * (1 - p) ** (n - x) * (1 - theta) ** n
return(first_comp)
def second_term(obs, p, theta):
x, tx, n = obs[0], obs[1], obs[2]
components, updates = theano.scan(
lambda t, p, theta, x, tx: p ** x * (1 - theta) ** (tx-x+t) * theta * (1 - theta) ** (tx + t),
sequences=theano.tensor.arange(n), non_sequences = [p, theta, x, tx]
)
return(components)
def logp(X, p_hat, theta_hat):
contributions, updates = theano.scan(lambda obs, p, theta: first_term(obs, p, theta) + T.sum( second_term(obs, p, theta) ) ,
sequences = obss, non_sequences = [p, theta]
)
ll = contributions.sum()
get_ll = theano.function(inputs = [obss, p, theta], outputs = ll)
return(get_ll(X, p_hat , theta_hat))
print( logp( X, p_test, theta_test ) ) # It works!
### pymc3 implementation
with Model() as bg_model:
p = Uniform('p', lower = 0, upper = 1)
theta = Uniform('theta', lower = 0, upper = .2)
def first_term(obs, p, theta):
x, tx, n = obs[0], obs[1], obs[2]
first_comp = p ** x * (1 - p) ** (n - x) * (1 - theta) ** n
return(first_comp)
def second_term(obs, p, theta):
x, tx, n = obs[0], obs[1], obs[2]
components, updates = theano.scan(
lambda t, p, theta, x, tx: p ** x * (1 - theta) ** (tx-x+t) * theta * (1 - theta) ** (tx + t),
sequences=theano.tensor.arange(n), non_sequences = [p, theta, x, tx]
)
return(components)
def logp(X):
contributions, updates = theano.scan(lambda obs, p, theta: first_term(obs, p, theta) + T.sum( second_term(obs, p, theta) ) ,
sequences = obss, non_sequences = [p, theta]
)
ll = contributions.sum()
get_ll = theano.function(inputs = [obss, p, theta], outputs = ll)
return(get_ll(X, p, theta))
y = pymc3.DensityDist('y', logp, observed = X) # Nx4 y = f(f,x,tx,n | p, theta)
My first guess was modifying logp
with return(get_ll(X, p.eval(), theta.eval()))
but then theano complains about some mysterious p_interval
missing from the graph. Any clue?
Upvotes: 2
Views: 508
Reputation: 399
I figured it out by: i) simplifying things ii) avoiding using theano operators when coding the likelihood and iii) using the build-in wrapper Deterministic for deterministic transformations of variables (my lifetime). To speed up computation I vectorised the likelihood by writing the second term on the rhs as solution of a geometric series. Here is the code in case someone want to test it on his own lifetime application.
from pymc3 import Model, Uniform, Deterministic
import pymc3
from scipy import optimize
import theano.tensor as T
X = array([[ 5, 64, 8, 13],[ 4, 71, 23, 41],[ 7, 70, 4, 19])
#f, n, x, tx where f stands for the frequency of the triple (n, x, tx)
class CustomDist(pymc3.distributions.Discrete):
def __init__(self, p, theta, *args, **kwargs):
super(CustomDist, self).__init__(*args, **kwargs)
self.p = p
self.theta = theta
def logp(self, X):
p = self.theta
theta = self.theta
f, n, x, tx = X[0],(X[1] + 1),X[2],(X[3] + 1) #time indexed at 0, x > n
ll = f * T.log( p ** x * (1 - p) ** (n - x) * (1 - theta) ** n +
[(1 - p) ** (tx - x) * (1 - theta) ** tx - (1 - p) ** (n - x) * (1 - theta) ** n] / (1 - (1 - p) * (1 - theta)) )
# eliminated -1 since would result in negatice ll
return(T.sum( ll ))
with Model() as bg_model:
p = Uniform('p', lower = 0, upper = 1)
theta = Uniform('theta', lower = 0, upper = 1)
like = CustomDist('like', p = p, theta = theta, observed=X.T) #likelihood
lt = pymc3.Deterministic('lt', p / theta)
# start = {'p':.5, 'theta':.1}
start = pymc3.find_MAP(fmin=optimize.fmin_powell)
step = pymc3.Slice([p, theta, lt])
trace = pymc3.sample(5000, start = start, step = [step])
pymc3.traceplot(trace[2000:])
Upvotes: 4