Kruschke's code. - antivax-attitudes - Reanalyses of data from Horne, Powell, Hummel & Holyoak (2015)

commit f40553d9f7b77eaa5bb5fe3172d2d14f94700f57
parent 5ed4b0b3fadd21e24bbcf6e42af9dc3098195be8
Author: eamoncaddigan <eamon.caddigan@gmail.com>
Date:   Fri, 21 Aug 2015 13:49:33 -0400

Kruschke's code.

Diffstat:
A Jags-Yord-Xnom1grp-Mnormal.R  | 266 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

1 file changed, 266 insertions(+), 0 deletions(-)
diff --git a/Jags-Yord-Xnom1grp-Mnormal.R b/Jags-Yord-Xnom1grp-Mnormal.R
@@ -0,0 +1,266 @@
+# Jags-Yord-Xnom1grp-Mnormal.R 
+# Accompanies the book:
+#   Kruschke, J. K. (2015). Doing Bayesian Data Analysis, Second Edition: 
+#   A Tutorial with R, JAGS, and Stan. Academic Press / Elsevier.
+
+source("DBDA2E-utilities.R")
+
+#===============================================================================
+
+genMCMC = function( datFrm, yName , numSavedSteps=50000 , thinSteps = 1,
+                    saveName=NULL ,
+                    runjagsMethod=runjagsMethodDefault , 
+                    nChains=nChainsDefault ) { 
+  #-----------------------------------------------------------------------------
+  # THE DATA.
+  y = as.numeric(datFrm[,yName])
+  # Do some checking that data make sense:
+  if ( any( y!=round(y) ) ) { stop("All y values must be integers (whole numbers).") }
+  if ( any( y < 1 ) ) { stop("All y values must be 1 or larger.") }
+  # COMPRESS OUT ANY EMPTY VALUES OF Y:
+  yOrig=y
+  y=as.numeric(factor(y,levels=names(table(y))))
+  if ( any(y != yOrig) ) { 
+    warning("*** WARNING: Y RE-CODED TO REMOVE EMPTY LEVELS ***")
+  }
+  Ntotal = length(y)
+  # Threshold 1 and nYlevels-1 are fixed; other thresholds are estimated.
+  # This allows all parameters to be interpretable on the response scale.
+  nYlevels = max(y)  
+  thresh = rep(NA,nYlevels-1)
+  thresh[1] = 1 + 0.5
+  thresh[nYlevels-1] = nYlevels-1 + 0.5
+  # Specify the data in a list, for later shipment to JAGS:
+  dataList = list(
+    y = y ,
+    nYlevels = nYlevels ,
+    thresh = thresh ,
+    Ntotal = Ntotal 
+  )
+  #-----------------------------------------------------------------------------
+  # THE MODEL.
+  modelString = "
+  model {
+    for ( i in 1:Ntotal ) {
+      y[i] ~ dcat( pr[i,1:nYlevels] )
+      pr[i,1] <- pnorm( thresh[1] , mu , 1/sigma^2 )
+      for ( k in 2:(nYlevels-1) ) {
+        pr[i,k] <- max( 0 ,  pnorm( thresh[ k ] , mu , 1/sigma^2 )
+                           - pnorm( thresh[k-1] , mu , 1/sigma^2 ) )
+      }
+      pr[i,nYlevels] <- 1 - pnorm( thresh[nYlevels-1] , mu , 1/sigma^2 )
+    }
+    mu ~ dnorm( (1+nYlevels)/2 , 1/(nYlevels)^2 )
+    sigma ~ dunif( nYlevels/1000 , nYlevels*10 )
+    for ( k in 2:(nYlevels-2) ) {  # 1 and nYlevels-1 are fixed, not stochastic
+      thresh[k] ~ dnorm( k+0.5 , 1/2^2 )
+    }
+  }
+  " # close quote for modelString
+  # Write out modelString to a text file
+  writeLines( modelString , con="TEMPmodel.txt" )
+  #-----------------------------------------------------------------------------
+  # INTIALIZE THE CHAINS.
+  #   # Initial values of MCMC chains based on data:
+  #   muInit = c( mean(y[x==1]) , mean(y[x==2]) )
+  #   sigmaInit = c( sd(y[x==1]) , sd(y[x==2]) )
+  #   threshInit = 1:(nYlevels-1)+0.5
+  #   threshInit[1] = NA
+  #   threshInit[nYlevels-1] = NA
+  #   # Regarding initial values in next line: (1) sigma will tend to be too big if 
+  #   # the data have outliers, and (2) nu starts at 5 as a moderate value. These
+  #   # initial values keep the burn-in period moderate.
+  #   initsList = list( mu=muInit, sigma=sigmaInit, nuMinusOne=4, thresh=threshInit )
+  initsList = NULL
+  #-----------------------------------------------------------------------------
+  # RUN THE CHAINS
+  parameters = c( "mu" , "sigma" , "thresh" )
+  adaptSteps = 500               # Number of steps to "tune" the samplers
+  burnInSteps = 1000
+  runJagsOut <- run.jags( method=runjagsMethod ,
+                          model="TEMPmodel.txt" , 
+                          monitor=parameters , 
+                          data=dataList ,  
+                          #inits=initsList , 
+                          n.chains=nChains ,
+                          adapt=adaptSteps ,
+                          burnin=burnInSteps , 
+                          sample=ceiling(numSavedSteps/nChains) ,
+                          thin=thinSteps ,
+                          summarise=FALSE ,
+                          plots=FALSE )
+  codaSamples = as.mcmc.list( runJagsOut )
+  # resulting codaSamples object has these indices: 
+  #   codaSamples[[ chainIdx ]][ stepIdx , paramIdx ]
+  if ( !is.null(saveName) ) {
+    save( codaSamples , file=paste(saveName,"Mcmc.Rdata",sep="") )
+  }
+  return( codaSamples )
+} # end function
+
+#===============================================================================
+
+smryMCMC = function(  codaSamples , compVal , #RopeEff=NULL , 
+                      saveName=NULL ) {
+  summaryInfo = NULL
+  mcmcMat = as.matrix(codaSamples,chains=TRUE)
+  summaryInfo = rbind( summaryInfo , 
+                       "mu" = summarizePost( mcmcMat[,"mu"] ) )
+  summaryInfo = rbind( summaryInfo , 
+                       "sigma" = summarizePost( mcmcMat[,"sigma"] ) )
+  summaryInfo = rbind( summaryInfo , 
+                       "effSz" = summarizePost( 
+                         ( mcmcMat[,"mu"] - compVal ) / mcmcMat[,"sigma"] ,
+                         compVal=compVal , ROPE=NULL ) )
+  for ( colName in grep( "thresh" , colnames(mcmcMat) , value=TRUE ) ) {
+    summaryInfo = rbind( summaryInfo , 
+                         summarizePost( mcmcMat[,colName] ) )
+    rownames(summaryInfo)[nrow(summaryInfo)] = colName
+  }
+  if ( !is.null(saveName) ) {
+    write.csv( summaryInfo , file=paste(saveName,"SummaryInfo.csv",sep="") )
+  }
+  return( summaryInfo )
+}
+
+#===============================================================================
+
+plotMCMC = function( codaSamples , datFrm , yName , compVal , #RopeEff=NULL ,
+                     showCurve=FALSE , pairsPlot=FALSE ,
+                     saveName=NULL , saveType="jpg" ) {
+  #-----------------------------------------------------------------------------
+  mcmcMat = as.matrix(codaSamples,chains=TRUE)
+  chainLength = NROW( mcmcMat )
+  mu = mcmcMat[,"mu"]
+  sigma = mcmcMat[,"sigma"]
+  #-----------------------------------------------------------------------------
+  if ( pairsPlot ) {
+    # Plot the parameters pairwise, to see correlations:
+    openGraph(width=7,height=7)
+    nPtToPlot = 1000
+    plotIdx = floor(seq(1,length(mu),by=length(mu)/nPtToPlot))
+    panel.cor = function(x, y, digits=2, prefix="", cex.cor, ...) {
+      usr = par("usr"); on.exit(par(usr))
+      par(usr = c(0, 1, 0, 1))
+      r = (cor(x, y))
+      txt = format(c(r, 0.123456789), digits=digits)[1]
+      txt = paste(prefix, txt, sep="")
+      if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt)
+      text(0.5, 0.5, txt, cex=1.25 ) # was cex=cex.cor*r
+    }
+    pairs( cbind( mu , sigma )[plotIdx,] ,
+           labels=c( expression(mu) , expression(sigma) ) , 
+           lower.panel=panel.cor , col="skyblue" )
+    if ( !is.null(saveName) ) {
+      saveGraph( file=paste(saveName,"PostPairs",sep=""), type=saveType)
+    }
+  }
+  #-----------------------------------------------------------------------------
+  # Plot thresholds:
+  threshCols = grep("thresh",colnames(mcmcMat),value=TRUE)
+  threshMean = rowMeans( mcmcMat[,threshCols] )
+  xLim = range(mcmcMat[,threshCols])
+  nPtToPlot = 1000
+  plotIdx = floor(seq(1,nrow(mcmcMat),length=nPtToPlot))
+  openGraph(width=7.0,height=5.0)
+  par( mar=c(3.5,3.5,1,1) , mgp=c(2.25,0.7,0) )
+  plot( mcmcMat[plotIdx,threshCols[1]] , threshMean[plotIdx] , col="skyblue" ,
+        xlim=xLim , xlab="Threshold" , ylab="Mean Threshold" )
+  abline(v=mean(mcmcMat[plotIdx,threshCols[1]]),lty="dashed",col="skyblue")
+  for ( i in 2:length(threshCols) ) {
+    points( mcmcMat[plotIdx,threshCols[i]] , threshMean[plotIdx] , col="skyblue" )
+    abline(v=mean(mcmcMat[plotIdx,threshCols[i]]),lty="dashed",col="skyblue")
+  }
+  if ( !is.null(saveName) ) {
+    saveGraph( file=paste0(saveName,"Thresh"), type=saveType)
+  }
+  #-----------------------------------------------------------------------------
+  # Set up window and layout:
+  openGraph(width=6.0,height=5.0)
+  layout( matrix( c(1,2,3,4,5,6) , nrow=3, byrow=TRUE ) )
+  par( mar=c(3.5,3.5,2.5,0.5) , mgp=c(2.25,0.7,0) )
+  # Compute limits for plots of data with posterior pred. distributions
+  y = as.numeric(datFrm[,yName])
+  # COMPRESS OUT ANY EMPTY VALUES OF Y:
+  yOrig=y
+  y=as.numeric(factor(y,levels=names(table(y))))
+  if ( any(y != yOrig) ) { 
+    warning("*** WARNING: Y RE-CODED TO REMOVE EMPTY LEVELS ***")
+  }
+  xLim = c( min(y)-0.5 , max(y)+0.5 )
+  xBreaks = seq( xLim[1] , xLim[2] , 1 )  
+  histInfo = hist(y,breaks=xBreaks,plot=FALSE)
+  yMax = 1.2 * max( c( histInfo$density ) )
+  xVec = seq( xLim[1] , xLim[2] , length=501 )
+  #-----------------------------------------------------------------------------
+  # 1. mu
+  xlim = range( c( mu ) )
+  histInfo = plotPost( mu , cex.lab = 1.75 ,
+                       showCurve=showCurve , compVal=compVal ,
+                       xlab=bquote(mu) , main=paste("Mean") , 
+                       col="skyblue" )
+  #-----------------------------------------------------------------------------
+  # 2. Plot data y and smattering of posterior predictive curves:
+  # Data histogram:
+  histInfo = hist( y , prob=TRUE , xlim=xLim , ylim=c(0,yMax) , breaks=xBreaks,
+                   col="pink" , border="white" , xlab="y" , ylab="" , 
+                   yaxt="n" , cex.lab=1.5 , 
+                   main=paste("Data w. Post. Pred.") )
+  # Posterior predictive probabilities of outcomes:
+  outProb=matrix(0,nrow=chainLength,ncol=max(y))
+  for ( stepIdx in 1:chainLength ) {
+    threshCumProb = ( pnorm( ( mcmcMat[ stepIdx , 
+                                        paste("thresh[",1:(max(y)-1),"]",sep="") ]
+                               - mu[stepIdx] )
+                             / sigma[stepIdx] ) )
+    outProb[stepIdx,] = c(threshCumProb,1) - c(0,threshCumProb)
+  }
+  outHdi = apply( outProb , 2 , HDIofMCMC )
+  outMean = apply( outProb , 2 , median , na.rm=TRUE )
+  show(outMean)
+  points( x=1:max(y) , y=outMean  , pch=19 , cex=2 , col="skyblue" )
+  segments( x0=1:max(y) , y0=outHdi[1,] , 
+            x1=1:max(y) , y1=outHdi[2,] , lwd=4 , col="skyblue" )
+  # annotate N:
+  text( max(xVec) , yMax , bquote(N==.(length(y))) , adj=c(1.1,1.1) )
+  #-----------------------------------------------------------------------------
+  # 3. sigma
+  xlim=range( c( sigma ) )
+  histInfo = plotPost( sigma ,  xlim=xlim , cex.lab = 1.75 ,
+                       showCurve=showCurve ,
+                       xlab=bquote(sigma) , 
+                       main=paste("Std. Dev.") , 
+                       col="skyblue"  )
+  #-----------------------------------------------------------------------------
+  # 4. effect size. 
+  effectSize = ( mu - compVal ) / sigma
+  histInfo = plotPost( effectSize , compVal=0.0 , # ROPE=RopeEff ,
+                       showCurve=showCurve ,
+                       xlab=bquote( ( mu - .(compVal) ) / sigma ) ,
+                       cex.lab=1.75 , main="Effect Size" ,
+                       col="skyblue" )
+  #-----------------------------------------------------------------------------  
+  # 5. Thresholds:
+  threshCols = grep("thresh",colnames(mcmcMat),value=TRUE)
+  threshMean = rowMeans( mcmcMat[,threshCols] )
+  xLim = range(mcmcMat[,threshCols])
+  nPtToPlot = 500
+  plotIdx = floor(seq(1,nrow(mcmcMat),length=nPtToPlot))
+  plot( mcmcMat[plotIdx,threshCols[1]] , threshMean[plotIdx] , col="skyblue" ,
+        xlim=xLim , xlab="Threshold" , ylab="Mean Threshold" )
+  abline(v=mean(mcmcMat[plotIdx,threshCols[1]]),lty="dashed",col="skyblue")
+  for ( i in 2:length(threshCols) ) {
+    points( mcmcMat[plotIdx,threshCols[i]] , threshMean[plotIdx] , col="skyblue" )
+    abline(v=mean(mcmcMat[plotIdx,threshCols[i]]),lty="dashed",col="skyblue")
+  }
+  #   # Blank plot (was occupied by nu parameter in robust version)
+  #   plot(1, ann=FALSE, axes=FALSE, xlim=c(0,1) , ylim=c(0,1) , 
+  #        type="n" , xaxs="i" , yaxs="i" )  
+  #   text(.5,.5,"[assumes y~normal]",adj=c(.5,.5))
+  #-----------------------------------------------------------------------------
+  if ( !is.null(saveName) ) {
+    saveGraph( file=paste(saveName,"Post",sep=""), type=saveType)
+  }
+}
+
+#===============================================================================

	antivax-attitudes Reanalyses of data from Horne, Powell, Hummel & Holyoak (2015)
	git clone https://git.eamoncaddigan.net/antivax-attitudes.git
	Log \| Files \| Refs \| README \| LICENSE