setwd("C:/Users/Lisa/Dropbox/CSUMB stats class") library("BRugs") #------------------------------------------------------------------ # Creating simulated data. # Linear regression with normal variance. Beta0 <- 0 # Intercept Beta1 <- 1500 # Slope X <- 1:40 # X values Err <- rnorm(n = 40, mean = 0, sd = 5000) # variability Y <- Beta0 + (Beta1*X) + Err # Y values plot(X,Y) #------------------------------------------------------------------ # Putting data in to OpenBugs format. data.in <- list(X = X, Y = Y, N = 40) bugsData(data = data.in, fileName = "linregdata.txt") #------------------------------------------------------------------ # Setting initial values. # I'm giving it 5 initial values to set up 5 chains. # Here is a way to do it that will give you a more practical set of # starting values. This is one of the few times I use frequentist # statistics. # a) run the linear model and look at results. freq_out <- lm(Y~X) summary(freq_out) # b) my simulated data estimated these results, and I use them to create # starting values for the Bayesian analysis. # Note that the intercept is NOT significantly different from zero, # which is a good thing since it was simulated as zero. #Coefficients: # Estimate Std. Error t value Pr(>|t|) #(Intercept) -1163.65 1948.76 -0.597 0.554 #X 1518.62 82.83 18.334 <2e-16 *** #--- #Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 #Residual standard error: 6047 # c) create a function to sample each parameter from a normal. inits.func <- function () list (beta0 = rnorm(1,-1163.65,1948.76), beta1 = rnorm(1,1518.62,82.83), tau = 1/(rnorm(1,6047,1000)^2)) # d) Putting initial values in OpenBugs format. # It creates separate initial values files for each chain. # I recommend using exponential format since it handles extreme number values # (very close to zero and very far from zero) better. # If you are using uniform priors, check the files to make sure the starting values are within # the bounds of the uniform set up in the model file. bugsInits(inits = inits.func, numChains = 5, fileName = c("lri1.txt", "lri2.txt", "lri3.txt", "lri4.txt", "lri5.txt"), format="E") #------------------------------------------------------------------ # Running the model. ## some usual steps (like clicking in WinBUGS): modelCheck("LinRegmodel.txt") # check model file modelData("linregdata.txt") # read data file modelCompile(numChains=5) # compile model with 4 chains modelInits(c("lri1.txt", "lri2.txt", "lri3.txt", "lri4.txt", "lri5.txt")) # read init data file modelUpdate(15000) # burn in. This burn in time was increased after at least one run of the model. samplesSet(c("beta0", "beta1", "tau")) # parameters to be monitored modelUpdate(15000, thin = 20) # 15000 more iterations, thinning determined after at least one run of the model. samplesStats("*") # the summarized results ## some plots # The important thing here is to make sure the autocorrelations are below 0.1. samplesHistory("*") # plot the chain, samplesDensity("*") # plot the densities, samplesAutoC("*", 1) # plot autocorrelations of 1st chain samplesAutoC("*", 2)... # plot autocorrelations of 2nd chain #------------------------------------------------------------------ ## Pulling out posterior samples of the parameters. # List # parameters # ---------------------- # 1 beta0 # 2 beta1 # 3 tau (1/variance) Para_OutLinReg <- samplesHistory("*", plot = FALSE) #------------------------------------------------------------------ # Look at the posterior linear relationship with respect to the data. # This is a function you can use for any posterior results of a linear regression with # one independent variable and normal error. # Function variable description: # postslope: posterior slope # postint: posterior intercept # postvar: posterior variance # min_x: minimum sample of independent variable # max_x: maximum sample of independent variable # seedstart: setting the random number generator to fixed values # Returns a matrix of seven columns: # 1. X sample value # 2. Y distribution characteristics at X # a. lower 2.5% bound # b. upper 97.5% bound # c. mean # d. median # e. standard deviation # f. mode (if your sample size is large enough) normlinout <- function(postslope,postint,postvar,min_x,max_x,seedstart){ set.seed(seedstart) tlout <- matrix(nrow=(max_x-min_x+1),ncol=7) colnames(tlout) <- c("x","low025","high975","ave","med50","sd","mode") tmperror <- rnorm(n = length(postslope), mean = 0, sd = sqrt(postvar)) for(x in min_x:max_x){ tlsamp <- (postslope*x) + postint tlsamp <- tlsamp + tmperror tmp <- hist(tlsamp,breaks=100,plot=FALSE) tlquant <- quantile(tlsamp,probs=c(0.025,0.5,0.975),na.rm=TRUE) tlout[(x-min_x+1),1] <- x tlout[(x-min_x+1),2] <- tlquant[1] tlout[(x-min_x+1),3] <- tlquant[3] tlout[(x-min_x+1),4] <- mean(tlsamp,na.rm=TRUE) tlout[(x-min_x+1),5] <- tlquant[2] tlout[(x-min_x+1),6] <- sd(tlsamp) tlout[(x-min_x+1),7] <- mean(tmp$mids[tmp$density==max(tmp$density)]) } tlout } # Setting the range for which to sample the dependent variable. # This will depend on the range of your data. minX = min(X) maxX = max(X) # Estimate Y. Just use the first chain results. LinRegPosterior <- normlinout(postslope=Para_OutLinReg$beta1[,1], postint=Para_OutLinReg$beta0[,1], postvar=(1/Para_OutLinReg$tau[,1]), min_x=minX, max_x=maxX, seedstart=7619) # Plot the results with the data. par(mfcol = c(nrow=1,ncol=1)) par(mar = c(4.0,4.0,0.5,0.5)) par(ps = 14) par(oma=c(1.5,1.5,1,1)) plot(X,Y, xlab = "Independent variable", ylab = "Dependent variable") lines(x = LinRegPosterior[,1], y = LinRegPosterior [,4],col = "blue",lwd=3) lines(x = LinRegPosterior[,1], y = LinRegPosterior [,2],col = "blue",lwd=3,lty="dashed") lines(x = LinRegPosterior[,1], y = LinRegPosterior [,3],col = "blue",lwd=3,lty="dashed") #------------------------------------------------------------------ # Another way to run the model that will give you DIC, but you can't # play with posterior samples. BRugsFit(modelFile = "LinRegmodel.txt", data = "linregdata.txt", inits = c("lri1.txt", "lri2.txt", "lri3.txt", "lri4.txt", "lri5.txt"), numChains = 5, parametersToSave = c("beta0", "beta1", "tau"), nBurnin = 15000, nIter = 15000, nThin = 20, coda = FALSE, DIC = TRUE, working.directory = NULL, digits = 5, seed=NULL, BRugsVerbose = getOption("BRugsVerbose"))