beginneR
beginneR

Reputation: 3291

Metropolis Hastings for linear regression model

I am trying to implement the Metropolis-Hastings algorithm for a simple linear regression in C (without use of other libraries (boost, Eigen etc.) and without two-dimensional arrays)*. For better testing of the code/evaluation of the trace plots, I have rewritten the code for R (see below) by keeping as much of the C-code as possible.

Unfortunately, the chains don't converge. I am wondering if

  1. there is a mistake in the implementation itself?
  2. "just" a bad choice of proposal distributions?

Assuming the latter, I am thinking about how to find good parameters of proposal distributions (currently I have picked arbitrary values) so that the algorithm works. Even with three parameters as in this case, it is quite hard to find suitable parameters. How does one normally handle this problem if say Gibbs sampling is not an alternative?

*I want to use this code for Cuda

#### posterior distribution
logPostDensity <- function(x, y, a, b, s2, N)
{
sumSqError = 0.0
for(i in 1:N)
{
  sumSqError = sumSqError + (y[i] - (a + b*x[i]))^2
}
return(((-(N/2)+1) * log(s2)) + ((-0.5/s2) * sumSqError))

}

# x = x values
# y = actual datapoints
# N = sample size
# m = length of chain
# sigmaProp = uniform proposal for sigma squared
# paramAProp = uniform proposal for intercept
# paramBProp = uniform proposal for slope

mcmcSampling <- function(x,y,N,m,sigmaProp,paramAProp,paramBProp)
{

  paramsA = vector("numeric",length=m) # intercept
  paramsB = vector("numeric",length=m) # slope
  s2 = vector("numeric",length=m) # sigma squared

  paramsA[1] = 0
  paramsB[1] = 0
  s2[1] = 1

  for(i in 2:m)
  {

    paramsA[i] = paramsA[i-1] + runif(1,-paramAProp,paramAProp)

    if((logPostDensity(x,y,paramsA[i],paramsB[i],s2[i-1],N)
        - logPostDensity(x,y,paramsA[i-1],paramsB[i-1],s2[i-1],N))
       < log(runif(1)))
    {
      paramsA[i] = paramsA[i-1]
    }

    paramsB[i] = paramsB[i-1] + runif(1,-paramBProp,paramBProp)

    if((logPostDensity(x,y,paramsA[i],paramsB[i],s2[i-1],N)
        - logPostDensity(x,y,paramsA[i-1],paramsB[i-1],s2[i-1],N))
       < log(runif(1)))
    {
      paramsB[i] = paramsB[i-1]
    }

    s2[i] = s2[i-1] + runif(1,-sigmaProp,sigmaProp)

    if((s2[i] < 0) || (logPostDensity(x,y,paramsA[i],paramsB[i],s2[i],N)
                       - logPostDensity(x,y,paramsA[i],paramsB[i],s2[i-1],N))
       < log(runif(1)))
    {
      s2[i] = s2[i-1]
    }


  }

  res = data.frame(paramsA,paramsB,s2)
  return(res)
}


#########################################

set.seed(321)
x <- runif(100)
y <- 2 + 5*x + rnorm(100)

summary(lm(y~x))


df <- mcmcSampling(x,y,10,5000,0.05,0.05,0.05)


par(mfrow=c(3,1))
plot(df$paramsA[3000:5000],type="l",main="intercept")
plot(df$paramsB[3000:5000],type="l",main="slope")
plot(df$s2[3000:5000],type="l",main="sigma")

Upvotes: 1

Views: 1514

Answers (1)

beginneR
beginneR

Reputation: 3291

There was one mistake in the intercept section (paramsA). Everything else was fine. I've implemented what Alexey suggested in his comments. Here's the solution:

pow <- function(x,y)
{
  return(x^y)
}


#### posterior distribution
posteriorDistribution <- function(x, y, a, b,s2,N)
{
sumSqError <- 0.0
for(i in 1:N)
{
  sumSqError <- sumSqError + pow(y[i] - (a + b*x[i]),2)
}
return((-((N/2)+1) * log(s2)) + ((-0.5/s2) * sumSqError))

}

# x <- x values
# y <- actual datapoints
# N <- sample size
# m <- length of chain
# sigmaProposalWidth <- width of uniform proposal dist for sigma squared
# paramAProposalWidth <- width of uniform proposal dist for intercept
# paramBProposalWidth <- width of uniform proposal dist for slope

mcmcSampling <- function(x,y,N,m,sigmaProposalWidth,paramAProposalWidth,paramBProposalWidth)
{

  desiredAcc <- 0.44

  paramsA <- vector("numeric",length=m) # intercept
  paramsB <- vector("numeric",length=m) # slope
  s2 <- vector("numeric",length=m) # sigma squared

  paramsA[1] <- 0
  paramsB[1] <- 0
  s2[1] <- 1

  accATot <- 0
  accBTot <- 0
  accS2Tot <- 0

  for(i in 2:m)
  {
    paramsA[i] <- paramsA[i-1] + runif(1,-paramAProposalWidth,paramAProposalWidth)
    accA <- 1
    if((posteriorDistribution(x,y,paramsA[i],paramsB[i-1],s2[i-1],N) - 
        posteriorDistribution(x,y,paramsA[i-1],paramsB[i-1],s2[i-1],N)) < log(runif(1)))
    {
      paramsA[i] <- paramsA[i-1]
      accA <- 0
    }


    accATot <- accATot + accA

    paramsB[i] <- paramsB[i-1] + runif(1,-paramBProposalWidth,paramBProposalWidth)
    accB <- 1
    if((posteriorDistribution(x,y,paramsA[i],paramsB[i],s2[i-1],N) - 
        posteriorDistribution(x,y,paramsA[i-1],paramsB[i-1],s2[i-1],N)) < log(runif(1)))
    {
      paramsB[i] <- paramsB[i-1]
      accB <- 0
    }

    accBTot <- accBTot + accB

    s2[i] <- s2[i-1] + runif(1,-sigmaProposalWidth,sigmaProposalWidth)
    accS2 <- 1

    if((s2[i] < 0) || (posteriorDistribution(x,y,paramsA[i],paramsB[i],s2[i],N) - 
                       posteriorDistribution(x,y,paramsA[i],paramsB[i],s2[i-1],N)) < log(runif(1)))
    {
      s2[i] <- s2[i-1]
      accS2 <- 0
    }

    accS2Tot <- accS2Tot + accS2

    if(i%%100==0)
    {

      paramAProposalWidth <- paramAProposalWidth * ((accATot/100)/desiredAcc)
      paramBProposalWidth <- paramBProposalWidth * ((accBTot/100)/desiredAcc)
      sigmaProposalWidth <- sigmaProposalWidth * ((accS2Tot/100)/desiredAcc)

      accATot <-  0
      accBTot <-  0 
      accS2Tot <-  0

    }


  }
    res <- data.frame(paramsA,paramsB,s2)
    return(res)

}

Upvotes: 0

Related Questions