Reputation: 3291
I am trying to implement the Metropolis-Hastings algorithm for a simple linear regression in C (without use of other libraries (boost, Eigen etc.) and without two-dimensional arrays)*. For better testing of the code/evaluation of the trace plots, I have rewritten the code for R (see below) by keeping as much of the C-code as possible.
Unfortunately, the chains don't converge. I am wondering if
Assuming the latter, I am thinking about how to find good parameters of proposal distributions (currently I have picked arbitrary values) so that the algorithm works. Even with three parameters as in this case, it is quite hard to find suitable parameters. How does one normally handle this problem if say Gibbs sampling is not an alternative?
*I want to use this code for Cuda
#### posterior distribution
logPostDensity <- function(x, y, a, b, s2, N)
{
sumSqError = 0.0
for(i in 1:N)
{
sumSqError = sumSqError + (y[i] - (a + b*x[i]))^2
}
return(((-(N/2)+1) * log(s2)) + ((-0.5/s2) * sumSqError))
}
# x = x values
# y = actual datapoints
# N = sample size
# m = length of chain
# sigmaProp = uniform proposal for sigma squared
# paramAProp = uniform proposal for intercept
# paramBProp = uniform proposal for slope
mcmcSampling <- function(x,y,N,m,sigmaProp,paramAProp,paramBProp)
{
paramsA = vector("numeric",length=m) # intercept
paramsB = vector("numeric",length=m) # slope
s2 = vector("numeric",length=m) # sigma squared
paramsA[1] = 0
paramsB[1] = 0
s2[1] = 1
for(i in 2:m)
{
paramsA[i] = paramsA[i-1] + runif(1,-paramAProp,paramAProp)
if((logPostDensity(x,y,paramsA[i],paramsB[i],s2[i-1],N)
- logPostDensity(x,y,paramsA[i-1],paramsB[i-1],s2[i-1],N))
< log(runif(1)))
{
paramsA[i] = paramsA[i-1]
}
paramsB[i] = paramsB[i-1] + runif(1,-paramBProp,paramBProp)
if((logPostDensity(x,y,paramsA[i],paramsB[i],s2[i-1],N)
- logPostDensity(x,y,paramsA[i-1],paramsB[i-1],s2[i-1],N))
< log(runif(1)))
{
paramsB[i] = paramsB[i-1]
}
s2[i] = s2[i-1] + runif(1,-sigmaProp,sigmaProp)
if((s2[i] < 0) || (logPostDensity(x,y,paramsA[i],paramsB[i],s2[i],N)
- logPostDensity(x,y,paramsA[i],paramsB[i],s2[i-1],N))
< log(runif(1)))
{
s2[i] = s2[i-1]
}
}
res = data.frame(paramsA,paramsB,s2)
return(res)
}
#########################################
set.seed(321)
x <- runif(100)
y <- 2 + 5*x + rnorm(100)
summary(lm(y~x))
df <- mcmcSampling(x,y,10,5000,0.05,0.05,0.05)
par(mfrow=c(3,1))
plot(df$paramsA[3000:5000],type="l",main="intercept")
plot(df$paramsB[3000:5000],type="l",main="slope")
plot(df$s2[3000:5000],type="l",main="sigma")
Upvotes: 1
Views: 1514
Reputation: 3291
There was one mistake in the intercept section (paramsA). Everything else was fine. I've implemented what Alexey suggested in his comments. Here's the solution:
pow <- function(x,y)
{
return(x^y)
}
#### posterior distribution
posteriorDistribution <- function(x, y, a, b,s2,N)
{
sumSqError <- 0.0
for(i in 1:N)
{
sumSqError <- sumSqError + pow(y[i] - (a + b*x[i]),2)
}
return((-((N/2)+1) * log(s2)) + ((-0.5/s2) * sumSqError))
}
# x <- x values
# y <- actual datapoints
# N <- sample size
# m <- length of chain
# sigmaProposalWidth <- width of uniform proposal dist for sigma squared
# paramAProposalWidth <- width of uniform proposal dist for intercept
# paramBProposalWidth <- width of uniform proposal dist for slope
mcmcSampling <- function(x,y,N,m,sigmaProposalWidth,paramAProposalWidth,paramBProposalWidth)
{
desiredAcc <- 0.44
paramsA <- vector("numeric",length=m) # intercept
paramsB <- vector("numeric",length=m) # slope
s2 <- vector("numeric",length=m) # sigma squared
paramsA[1] <- 0
paramsB[1] <- 0
s2[1] <- 1
accATot <- 0
accBTot <- 0
accS2Tot <- 0
for(i in 2:m)
{
paramsA[i] <- paramsA[i-1] + runif(1,-paramAProposalWidth,paramAProposalWidth)
accA <- 1
if((posteriorDistribution(x,y,paramsA[i],paramsB[i-1],s2[i-1],N) -
posteriorDistribution(x,y,paramsA[i-1],paramsB[i-1],s2[i-1],N)) < log(runif(1)))
{
paramsA[i] <- paramsA[i-1]
accA <- 0
}
accATot <- accATot + accA
paramsB[i] <- paramsB[i-1] + runif(1,-paramBProposalWidth,paramBProposalWidth)
accB <- 1
if((posteriorDistribution(x,y,paramsA[i],paramsB[i],s2[i-1],N) -
posteriorDistribution(x,y,paramsA[i-1],paramsB[i-1],s2[i-1],N)) < log(runif(1)))
{
paramsB[i] <- paramsB[i-1]
accB <- 0
}
accBTot <- accBTot + accB
s2[i] <- s2[i-1] + runif(1,-sigmaProposalWidth,sigmaProposalWidth)
accS2 <- 1
if((s2[i] < 0) || (posteriorDistribution(x,y,paramsA[i],paramsB[i],s2[i],N) -
posteriorDistribution(x,y,paramsA[i],paramsB[i],s2[i-1],N)) < log(runif(1)))
{
s2[i] <- s2[i-1]
accS2 <- 0
}
accS2Tot <- accS2Tot + accS2
if(i%%100==0)
{
paramAProposalWidth <- paramAProposalWidth * ((accATot/100)/desiredAcc)
paramBProposalWidth <- paramBProposalWidth * ((accBTot/100)/desiredAcc)
sigmaProposalWidth <- sigmaProposalWidth * ((accS2Tot/100)/desiredAcc)
accATot <- 0
accBTot <- 0
accS2Tot <- 0
}
}
res <- data.frame(paramsA,paramsB,s2)
return(res)
}
Upvotes: 0