setwd("C:/Users/Lisa/Dropbox/CSUMB stats class")
library("BRugs") 

#------------------------------------------------------------------
# Creating simulated data.
# Linear regression with normal variance.
Beta0 <- 0   # Intercept
Beta1 <- 1500   # Slope
X <- 1:40      # X values
Err <- rnorm(n = 40, mean = 0, sd = 5000)  # variability

Y <- Beta0 + (Beta1*X) + Err   # Y values

plot(X,Y)

#------------------------------------------------------------------
# Putting data in to OpenBugs format.
data.in <- list(X = X, Y = Y, N = 40)
bugsData(data = data.in, fileName = "linregdata.txt")


#------------------------------------------------------------------
# Setting initial values.
# I'm giving it 5 initial values to set up 5 chains.
# Here is a way to do it that will give you a more practical set of
#      starting values. This is one of the few times I use frequentist
#      statistics.
#    a) run the linear model and look at results.
freq_out <- lm(Y~X)
summary(freq_out)
#    b) my simulated data estimated these results, and I use them to create
#       starting values for the Bayesian analysis.
#       Note that the intercept is NOT significantly different from zero,
#        which is a good thing since it was simulated as zero.
#Coefficients:
#            Estimate Std. Error t value Pr(>|t|)    
#(Intercept) -1163.65    1948.76  -0.597    0.554    
#X            1518.62      82.83  18.334   <2e-16 ***
#---
#Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

#Residual standard error: 6047

#    c) create a function to sample each parameter from a normal.
inits.func <- function ()
  list (beta0 = rnorm(1,-1163.65,1948.76),
        beta1 = rnorm(1,1518.62,82.83), 
        tau = 1/(rnorm(1,6047,1000)^2))

#    d) Putting initial values in OpenBugs format.
# It creates separate initial values files for each chain.
# I recommend using exponential format since it handles extreme number values
#  (very close to zero and very far from zero) better.
# If you are using uniform priors, check the files to make sure the starting values are within
#  the bounds of the uniform set up in the model file.

bugsInits(inits = inits.func, numChains = 5, fileName = c("lri1.txt",
                                                        "lri2.txt",
                                                        "lri3.txt",
                                                        "lri4.txt",
                                                        "lri5.txt"),
          format="E")

#------------------------------------------------------------------
# Running the model.
## some usual steps (like clicking in WinBUGS):
modelCheck("LinRegmodel.txt") # check model file
modelData("linregdata.txt") # read data file
modelCompile(numChains=5) # compile model with 4 chains
modelInits(c("lri1.txt",
             "lri2.txt",
             "lri3.txt",
             "lri4.txt",
             "lri5.txt")) # read init data file
modelUpdate(15000) # burn in. This burn in time was increased after at least one run of the model.
samplesSet(c("beta0", "beta1", "tau")) # parameters to be monitored
modelUpdate(15000,  thin = 20) # 15000 more iterations, thinning determined after at least one run of the model.
samplesStats("*") # the summarized results

## some plots
# The important thing here is to make sure the autocorrelations are below 0.1.
samplesHistory("*") # plot the chain,
samplesDensity("*") # plot the densities,
samplesAutoC("*", 1) # plot autocorrelations of 1st chain
samplesAutoC("*", 2)... # plot autocorrelations of 2nd chain

#------------------------------------------------------------------
## Pulling out posterior samples of the parameters.
# List #    parameters
# ----------------------
#    1      beta0
#    2      beta1
#    3      tau (1/variance)
Para_OutLinReg <- samplesHistory("*", plot = FALSE)

#------------------------------------------------------------------
# Look at the posterior linear relationship with respect to the data.
# This is a function you can use for any posterior results of a linear regression with
#   one independent variable and normal error.

# Function variable description:
# postslope: posterior slope
# postint: posterior intercept
# postvar: posterior variance
# min_x: minimum sample of independent variable
# max_x: maximum sample of independent variable
# seedstart: setting the random number generator to fixed values

# Returns a matrix of seven columns: 
# 1. X sample value
# 2. Y distribution characteristics at X
#   a. lower 2.5% bound
#   b. upper 97.5% bound
#   c. mean
#   d. median
#   e. standard deviation
#   f. mode (if your sample size is large enough)

normlinout <- function(postslope,postint,postvar,min_x,max_x,seedstart){
  set.seed(seedstart)
  tlout <- matrix(nrow=(max_x-min_x+1),ncol=7)
  colnames(tlout) <- c("x","low025","high975","ave","med50","sd","mode")
  tmperror <- rnorm(n = length(postslope), mean = 0, sd = sqrt(postvar))       
  for(x in min_x:max_x){
    tlsamp <- (postslope*x) + postint
    tlsamp <- tlsamp + tmperror
    tmp <- hist(tlsamp,breaks=100,plot=FALSE)
    tlquant <- quantile(tlsamp,probs=c(0.025,0.5,0.975),na.rm=TRUE)
    tlout[(x-min_x+1),1] <- x
    tlout[(x-min_x+1),2] <- tlquant[1]
    tlout[(x-min_x+1),3] <- tlquant[3]
    tlout[(x-min_x+1),4] <- mean(tlsamp,na.rm=TRUE)
    tlout[(x-min_x+1),5] <- tlquant[2]
    tlout[(x-min_x+1),6] <- sd(tlsamp)
    tlout[(x-min_x+1),7] <- mean(tmp$mids[tmp$density==max(tmp$density)])
  }
  tlout
}

# Setting the range for which to sample the dependent variable.
# This will depend on the range of your data.
minX = min(X)
maxX = max(X)

# Estimate Y. Just use the first chain results.
LinRegPosterior <- normlinout(postslope=Para_OutLinReg$beta1[,1],
                             postint=Para_OutLinReg$beta0[,1],
                             postvar=(1/Para_OutLinReg$tau[,1]),
                             min_x=minX,
                             max_x=maxX,
                             seedstart=7619)

# Plot the results with the data.

par(mfcol = c(nrow=1,ncol=1))
par(mar = c(4.0,4.0,0.5,0.5))
par(ps = 14)
par(oma=c(1.5,1.5,1,1))
plot(X,Y, xlab = "Independent variable", ylab = "Dependent variable")
lines(x = LinRegPosterior[,1], y = LinRegPosterior [,4],col = "blue",lwd=3)
lines(x = LinRegPosterior[,1], y = LinRegPosterior [,2],col = "blue",lwd=3,lty="dashed")
lines(x = LinRegPosterior[,1], y = LinRegPosterior [,3],col = "blue",lwd=3,lty="dashed")


#------------------------------------------------------------------
# Another way to run the model that will give you DIC, but you can't
# play with posterior samples.

BRugsFit(modelFile = "LinRegmodel.txt",
         data = "linregdata.txt",
         inits = c("lri1.txt",
             "lri2.txt",
             "lri3.txt",
             "lri4.txt",
             "lri5.txt"),
         numChains = 5,
         parametersToSave = c("beta0", "beta1", "tau"),
         nBurnin = 15000,
         nIter = 15000,
         nThin = 20,
         coda = FALSE,
         DIC = TRUE,
         working.directory = NULL,
         digits = 5,
         seed=NULL,
         BRugsVerbose = getOption("BRugsVerbose"))