commit 5ed4b0b3fadd21e24bbcf6e42af9dc3098195be8
parent c386979f91fbfc5e5309c2384b3f6ebd4f66bc42
Author: eamoncaddigan <eamon.caddigan@gmail.com>
Date: Fri, 21 Aug 2015 13:33:05 -0400
Pulled from Kruschke.
Diffstat:
A | DBDA2E-utilities.R | | | 555 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
1 file changed, 555 insertions(+), 0 deletions(-)
diff --git a/DBDA2E-utilities.R b/DBDA2E-utilities.R
@@ -0,0 +1,555 @@
+# Utility programs for use with the book,
+# Kruschke, J. K. (2015). Doing Bayesian Data Analysis, Second Edition:
+# A Tutorial with R, JAGS, and Stan. Academic Press / Elsevier.
+# This file contains several functions that are called by other programs
+# or can be called directly by the user. To load all the functions into
+# R's working memory, at R's command line type:
+# source("DBDA2E-utilities.R")
+
+#------------------------------------------------------------------------------
+
+bookInfo = "Kruschke, J. K. (2015). Doing Bayesian Data Analysis, Second Edition:\nA Tutorial with R, JAGS, and Stan. Academic Press / Elsevier."
+bannerBreak = "\n*********************************************************************\n"
+cat(paste0(bannerBreak,bookInfo,bannerBreak,"\n"))
+
+#------------------------------------------------------------------------------
+# Check that required packages are installed:
+want = c("parallel","rjags","runjags")
+have = want %in% rownames(installed.packages())
+if ( any(!have) ) { install.packages( want[!have] ) }
+
+# load rjags:
+library(rjags)
+# load runjags and set some options in runjags:
+library(runjags)
+runjags.options( inits.warning=FALSE , rng.warning=FALSE )
+
+# set default number of chains and parallelness for MCMC:
+library(parallel) # for detectCores()
+nCores = detectCores()
+if ( !is.finite(nCores) ) { nCores = 1 }
+if ( nCores > 4 ) {
+ nChainsDefault = 4 # because JAGS has only 4 rng's.
+ runjagsMethodDefault = "parallel"
+}
+if ( nCores == 4 ) {
+ nChainsDefault = 3 # save 1 core for other processes.
+ runjagsMethodDefault = "parallel"
+}
+if ( nCores < 4 ) {
+ nChainsDefault = 3
+ runjagsMethodDefault = "rjags" # NOT parallel
+}
+
+#------------------------------------------------------------------------------
+# Functions for opening and saving graphics that operate the same for
+# Windows and Macintosh and Linux operating systems. At least, that's the hope!
+
+openGraph = function( width=7 , height=7 , mag=1.0 , ... ) {
+ if ( .Platform$OS.type != "windows" ) { # Mac OS, Linux
+ tryInfo = try( X11( width=width*mag , height=height*mag , type="cairo" ,
+ ... ) )
+ if ( class(tryInfo)=="try-error" ) {
+ lineInput = readline("WARNING: Previous graphics windows will be closed because of too many open windows.\nTO CONTINUE, PRESS <ENTER> IN R CONSOLE.\n")
+ graphics.off()
+ X11( width=width*mag , height=height*mag , type="cairo" , ... )
+ }
+ } else { # Windows OS
+ tryInfo = try( windows( width=width*mag , height=height*mag , ... ) )
+ if ( class(tryInfo)=="try-error" ) {
+ lineInput = readline("WARNING: Previous graphics windows will be closed because of too many open windows.\nTO CONTINUE, PRESS <ENTER> IN R CONSOLE.\n")
+ graphics.off()
+ windows( width=width*mag , height=height*mag , ... )
+ }
+ }
+}
+
+saveGraph = function( file="saveGraphOutput" , type="pdf" , ... ) {
+ if ( .Platform$OS.type != "windows" ) { # Mac OS, Linux
+ if ( any( type == c("png","jpeg","jpg","tiff","bmp")) ) {
+ sptype = type
+ if ( type == "jpg" ) { sptype = "jpeg" }
+ savePlot( file=paste0(file,".",type) , type=sptype , ... )
+ }
+ if ( type == "pdf" ) {
+ dev.copy2pdf(file=paste0(file,".",type) , ... )
+ }
+ if ( type == "eps" ) {
+ dev.copy2eps(file=paste0(file,".",type) , ... )
+ }
+ } else { # Windows OS
+ file=paste0(file,".",type)
+ savePlot( file=file , type=type , ... )
+ }
+}
+
+#------------------------------------------------------------------------------
+# Functions for computing limits of HDI's:
+
+HDIofMCMC = function( sampleVec , credMass=0.95 ) {
+ # Computes highest density interval from a sample of representative values,
+ # estimated as shortest credible interval.
+ # Arguments:
+ # sampleVec
+ # is a vector of representative values from a probability distribution.
+ # credMass
+ # is a scalar between 0 and 1, indicating the mass within the credible
+ # interval that is to be estimated.
+ # Value:
+ # HDIlim is a vector containing the limits of the HDI
+ sortedPts = sort( sampleVec )
+ ciIdxInc = ceiling( credMass * length( sortedPts ) )
+ nCIs = length( sortedPts ) - ciIdxInc
+ ciWidth = rep( 0 , nCIs )
+ for ( i in 1:nCIs ) {
+ ciWidth[ i ] = sortedPts[ i + ciIdxInc ] - sortedPts[ i ]
+ }
+ HDImin = sortedPts[ which.min( ciWidth ) ]
+ HDImax = sortedPts[ which.min( ciWidth ) + ciIdxInc ]
+ HDIlim = c( HDImin , HDImax )
+ return( HDIlim )
+}
+
+HDIofICDF = function( ICDFname , credMass=0.95 , tol=1e-8 , ... ) {
+ # Arguments:
+ # ICDFname is R's name for the inverse cumulative density function
+ # of the distribution.
+ # credMass is the desired mass of the HDI region.
+ # tol is passed to R's optimize function.
+ # Return value:
+ # Highest density iterval (HDI) limits in a vector.
+ # Example of use: For determining HDI of a beta(30,12) distribution, type
+ # HDIofICDF( qbeta , shape1 = 30 , shape2 = 12 )
+ # Notice that the parameters of the ICDFname must be explicitly named;
+ # e.g., HDIofICDF( qbeta , 30 , 12 ) does not work.
+ # Adapted and corrected from Greg Snow's TeachingDemos package.
+ incredMass = 1.0 - credMass
+ intervalWidth = function( lowTailPr , ICDFname , credMass , ... ) {
+ ICDFname( credMass + lowTailPr , ... ) - ICDFname( lowTailPr , ... )
+ }
+ optInfo = optimize( intervalWidth , c( 0 , incredMass ) , ICDFname=ICDFname ,
+ credMass=credMass , tol=tol , ... )
+ HDIlowTailPr = optInfo$minimum
+ return( c( ICDFname( HDIlowTailPr , ... ) ,
+ ICDFname( credMass + HDIlowTailPr , ... ) ) )
+}
+
+HDIofGrid = function( probMassVec , credMass=0.95 ) {
+ # Arguments:
+ # probMassVec is a vector of probability masses at each grid point.
+ # credMass is the desired mass of the HDI region.
+ # Return value:
+ # A list with components:
+ # indices is a vector of indices that are in the HDI
+ # mass is the total mass of the included indices
+ # height is the smallest component probability mass in the HDI
+ # Example of use: For determining HDI of a beta(30,12) distribution
+ # approximated on a grid:
+ # > probDensityVec = dbeta( seq(0,1,length=201) , 30 , 12 )
+ # > probMassVec = probDensityVec / sum( probDensityVec )
+ # > HDIinfo = HDIofGrid( probMassVec )
+ # > show( HDIinfo )
+ sortedProbMass = sort( probMassVec , decreasing=TRUE )
+ HDIheightIdx = min( which( cumsum( sortedProbMass ) >= credMass ) )
+ HDIheight = sortedProbMass[ HDIheightIdx ]
+ HDImass = sum( probMassVec[ probMassVec >= HDIheight ] )
+ return( list( indices = which( probMassVec >= HDIheight ) ,
+ mass = HDImass , height = HDIheight ) )
+}
+
+#------------------------------------------------------------------------------
+# Function(s) for plotting properties of mcmc coda objects.
+
+DbdaAcfPlot = function( codaObject , parName=varnames(codaObject)[1] , plColors=NULL ) {
+ if ( all( parName != varnames(codaObject) ) ) {
+ stop("parName must be a column name of coda object")
+ }
+ nChain = length(codaObject)
+ if ( is.null(plColors) ) plColors=1:nChain
+ xMat = NULL
+ yMat = NULL
+ for ( cIdx in 1:nChain ) {
+ acfInfo = acf(codaObject[,c(parName)][[cIdx]],plot=FALSE)
+ xMat = cbind(xMat,acfInfo$lag)
+ yMat = cbind(yMat,acfInfo$acf)
+ }
+ matplot( xMat , yMat , type="o" , pch=20 , col=plColors , ylim=c(0,1) ,
+ main="" , xlab="Lag" , ylab="Autocorrelation" )
+ abline(h=0,lty="dashed")
+ EffChnLngth = effectiveSize(codaObject[,c(parName)])
+ text( x=max(xMat) , y=max(yMat) , adj=c(1.0,1.0) , cex=1.25 ,
+ labels=paste("ESS =",round(EffChnLngth,1)) )
+}
+
+DbdaDensPlot = function( codaObject , parName=varnames(codaObject)[1] , plColors=NULL ) {
+ if ( all( parName != varnames(codaObject) ) ) {
+ stop("parName must be a column name of coda object")
+ }
+ nChain = length(codaObject) # or nchain(codaObject)
+ if ( is.null(plColors) ) plColors=1:nChain
+ xMat = NULL
+ yMat = NULL
+ hdiLims = NULL
+ for ( cIdx in 1:nChain ) {
+ densInfo = density(codaObject[,c(parName)][[cIdx]])
+ xMat = cbind(xMat,densInfo$x)
+ yMat = cbind(yMat,densInfo$y)
+ hdiLims = cbind(hdiLims,HDIofMCMC(codaObject[,c(parName)][[cIdx]]))
+ }
+ matplot( xMat , yMat , type="l" , col=plColors ,
+ main="" , xlab="Param. Value" , ylab="Density" )
+ abline(h=0)
+ points( hdiLims[1,] , rep(0,nChain) , col=plColors , pch="|" )
+ points( hdiLims[2,] , rep(0,nChain) , col=plColors , pch="|" )
+ text( mean(hdiLims) , 0 , "95% HDI" , adj=c(0.5,-0.2) )
+ EffChnLngth = effectiveSize(codaObject[,c(parName)])
+ MCSE = sd(as.matrix(codaObject[,c(parName)]))/sqrt(EffChnLngth)
+ text( max(xMat) , max(yMat) , adj=c(1.0,1.0) , cex=1.25 ,
+ paste("MCSE =\n",signif(MCSE,3)) )
+}
+
+diagMCMC = function( codaObject , parName=varnames(codaObject)[1] ,
+ saveName=NULL , saveType="jpg" ) {
+ DBDAplColors = c("skyblue","black","royalblue","steelblue")
+ openGraph(height=5,width=7)
+ par( mar=0.5+c(3,4,1,0) , oma=0.1+c(0,0,2,0) , mgp=c(2.25,0.7,0) ,
+ cex.lab=1.5 )
+ layout(matrix(1:4,nrow=2))
+ # traceplot and gelman.plot are from CODA package:
+ require(coda)
+ coda::traceplot( codaObject[,c(parName)] , main="" , ylab="Param. Value" ,
+ col=DBDAplColors )
+ tryVal = try(
+ coda::gelman.plot( codaObject[,c(parName)] , main="" , auto.layout=FALSE ,
+ col=DBDAplColors )
+ )
+ # if it runs, gelman.plot returns a list with finite shrink values:
+ if ( class(tryVal)=="try-error" ) {
+ plot.new()
+ print(paste0("Warning: coda::gelman.plot fails for ",parName))
+ } else {
+ if ( class(tryVal)=="list" & !is.finite(tryVal$shrink[1]) ) {
+ plot.new()
+ print(paste0("Warning: coda::gelman.plot fails for ",parName))
+ }
+ }
+ DbdaAcfPlot(codaObject,parName,plColors=DBDAplColors)
+ DbdaDensPlot(codaObject,parName,plColors=DBDAplColors)
+ mtext( text=parName , outer=TRUE , adj=c(0.5,0.5) , cex=2.0 )
+ if ( !is.null(saveName) ) {
+ saveGraph( file=paste0(saveName,"Diag",parName), type=saveType)
+ }
+}
+
+diagStanFit = function( stanFit , parName ,
+ saveName=NULL , saveType="jpg" ) {
+ codaFit = mcmc.list( lapply( 1:ncol(stanFit) ,
+ function(x) { mcmc(as.array(stanFit)[,x,]) } ) )
+ DBDAplColors = c("skyblue","black","royalblue","steelblue")
+ openGraph(height=5,width=7)
+ par( mar=0.5+c(3,4,1,0) , oma=0.1+c(0,0,2,0) , mgp=c(2.25,0.7,0) , cex.lab=1.5 )
+ layout(matrix(1:4,nrow=2))
+ # traceplot is from rstan package
+ require(rstan)
+ traceplot(stanFit,pars=parName,nrow=1,ncol=1)#,main="",ylab="Param. Value",col=DBDAplColors)
+ # gelman.plot are from CODA package:
+ require(coda)
+ tryVal = try(
+ coda::gelman.plot( codaObject[,c(parName)] , main="" , auto.layout=FALSE ,
+ col=DBDAplColors )
+ )
+ # if it runs, gelman.plot returns a list with finite shrink values:
+ if ( class(tryVal)=="try-error" ) {
+ plot.new()
+ print(paste0("Warning: coda::gelman.plot fails for ",parName))
+ } else {
+ if ( class(tryVal)=="list" & !is.finite(tryVal$shrink[1]) ) {
+ plot.new()
+ print(paste0("Warning: coda::gelman.plot fails for ",parName))
+ }
+ }
+ DbdaAcfPlot(codaFit,parName,plColors=DBDAplColors)
+ DbdaDensPlot(codaFit,parName,plColors=DBDAplColors)
+ mtext( text=parName , outer=TRUE , adj=c(0.5,0.5) , cex=2.0 )
+ if ( !is.null(saveName) ) {
+ saveGraph( file=paste0(saveName,"Diag",parName), type=saveType)
+ }
+}
+
+#------------------------------------------------------------------------------
+# Functions for summarizing and plotting distribution of a large sample;
+# typically applied to MCMC posterior.
+
+normalize = function( v ){ return( v / sum(v) ) }
+
+require(coda) # loaded by rjags, but redundancy doesn't hurt
+
+summarizePost = function( paramSampleVec ,
+ compVal=NULL , ROPE=NULL , credMass=0.95 ) {
+ meanParam = mean( paramSampleVec )
+ medianParam = median( paramSampleVec )
+ dres = density( paramSampleVec )
+ modeParam = dres$x[which.max(dres$y)]
+ mcmcEffSz = round( effectiveSize( paramSampleVec ) , 1 )
+ names(mcmcEffSz) = NULL
+ hdiLim = HDIofMCMC( paramSampleVec , credMass=credMass )
+ if ( !is.null(compVal) ) {
+ pcgtCompVal = ( 100 * sum( paramSampleVec > compVal )
+ / length( paramSampleVec ) )
+ } else {
+ compVal=NA
+ pcgtCompVal=NA
+ }
+ if ( !is.null(ROPE) ) {
+ pcltRope = ( 100 * sum( paramSampleVec < ROPE[1] )
+ / length( paramSampleVec ) )
+ pcgtRope = ( 100 * sum( paramSampleVec > ROPE[2] )
+ / length( paramSampleVec ) )
+ pcinRope = 100-(pcltRope+pcgtRope)
+ } else {
+ ROPE = c(NA,NA)
+ pcltRope=NA
+ pcgtRope=NA
+ pcinRope=NA
+ }
+ return( c( Mean=meanParam , Median=medianParam , Mode=modeParam ,
+ ESS=mcmcEffSz ,
+ HDImass=credMass , HDIlow=hdiLim[1] , HDIhigh=hdiLim[2] ,
+ CompVal=compVal , PcntGtCompVal=pcgtCompVal ,
+ ROPElow=ROPE[1] , ROPEhigh=ROPE[2] ,
+ PcntLtROPE=pcltRope , PcntInROPE=pcinRope , PcntGtROPE=pcgtRope ) )
+}
+
+plotPost = function( paramSampleVec , cenTend=c("mode","median","mean")[1] ,
+ compVal=NULL, ROPE=NULL, credMass=0.95, HDItextPlace=0.7,
+ xlab=NULL , xlim=NULL , yaxt=NULL , ylab=NULL ,
+ main=NULL , cex=NULL , cex.lab=NULL ,
+ col=NULL , border=NULL , showCurve=FALSE , breaks=NULL ,
+ ... ) {
+ # Override defaults of hist function, if not specified by user:
+ # (additional arguments "..." are passed to the hist function)
+ if ( is.null(xlab) ) xlab="Param. Val."
+ if ( is.null(cex.lab) ) cex.lab=1.5
+ if ( is.null(cex) ) cex=1.4
+ if ( is.null(xlim) ) xlim=range( c( compVal , ROPE , paramSampleVec ) )
+ if ( is.null(main) ) main=""
+ if ( is.null(yaxt) ) yaxt="n"
+ if ( is.null(ylab) ) ylab=""
+ if ( is.null(col) ) col="skyblue"
+ if ( is.null(border) ) border="white"
+
+ # convert coda object to matrix:
+ if ( class(paramSampleVec) == "mcmc.list" ) {
+ paramSampleVec = as.matrix(paramSampleVec)
+ }
+
+ summaryColNames = c("ESS","mean","median","mode",
+ "hdiMass","hdiLow","hdiHigh",
+ "compVal","pGtCompVal",
+ "ROPElow","ROPEhigh","pLtROPE","pInROPE","pGtROPE")
+ postSummary = matrix( NA , nrow=1 , ncol=length(summaryColNames) ,
+ dimnames=list( c( xlab ) , summaryColNames ) )
+
+ # require(coda) # for effectiveSize function
+ postSummary[,"ESS"] = effectiveSize(paramSampleVec)
+
+ postSummary[,"mean"] = mean(paramSampleVec)
+ postSummary[,"median"] = median(paramSampleVec)
+ mcmcDensity = density(paramSampleVec)
+ postSummary[,"mode"] = mcmcDensity$x[which.max(mcmcDensity$y)]
+
+ HDI = HDIofMCMC( paramSampleVec , credMass )
+ postSummary[,"hdiMass"]=credMass
+ postSummary[,"hdiLow"]=HDI[1]
+ postSummary[,"hdiHigh"]=HDI[2]
+
+ # Plot histogram.
+ cvCol = "darkgreen"
+ ropeCol = "darkred"
+ if ( is.null(breaks) ) {
+ if ( max(paramSampleVec) > min(paramSampleVec) ) {
+ breaks = c( seq( from=min(paramSampleVec) , to=max(paramSampleVec) ,
+ by=(HDI[2]-HDI[1])/18 ) , max(paramSampleVec) )
+ } else {
+ breaks=c(min(paramSampleVec)-1.0E-6,max(paramSampleVec)+1.0E-6)
+ border="skyblue"
+ }
+ }
+ if ( !showCurve ) {
+ par(xpd=NA)
+ histinfo = hist( paramSampleVec , xlab=xlab , yaxt=yaxt , ylab=ylab ,
+ freq=F , border=border , col=col ,
+ xlim=xlim , main=main , cex=cex , cex.lab=cex.lab ,
+ breaks=breaks , ... )
+ }
+ if ( showCurve ) {
+ par(xpd=NA)
+ histinfo = hist( paramSampleVec , plot=F )
+ densCurve = density( paramSampleVec , adjust=2 )
+ plot( densCurve$x , densCurve$y , type="l" , lwd=5 , col=col , bty="n" ,
+ xlim=xlim , xlab=xlab , yaxt=yaxt , ylab=ylab ,
+ main=main , cex=cex , cex.lab=cex.lab , ... )
+ }
+ cenTendHt = 0.9*max(histinfo$density)
+ cvHt = 0.7*max(histinfo$density)
+ ROPEtextHt = 0.55*max(histinfo$density)
+ # Display central tendency:
+ mn = mean(paramSampleVec)
+ med = median(paramSampleVec)
+ mcmcDensity = density(paramSampleVec)
+ mo = mcmcDensity$x[which.max(mcmcDensity$y)]
+ if ( cenTend=="mode" ){
+ text( mo , cenTendHt ,
+ bquote(mode==.(signif(mo,3))) , adj=c(.5,0) , cex=cex )
+ }
+ if ( cenTend=="median" ){
+ text( med , cenTendHt ,
+ bquote(median==.(signif(med,3))) , adj=c(.5,0) , cex=cex , col=cvCol )
+ }
+ if ( cenTend=="mean" ){
+ text( mn , cenTendHt ,
+ bquote(mean==.(signif(mn,3))) , adj=c(.5,0) , cex=cex )
+ }
+ # Display the comparison value.
+ if ( !is.null( compVal ) ) {
+ pGtCompVal = sum( paramSampleVec > compVal ) / length( paramSampleVec )
+ pLtCompVal = 1 - pGtCompVal
+ lines( c(compVal,compVal) , c(0.96*cvHt,0) ,
+ lty="dotted" , lwd=2 , col=cvCol )
+ text( compVal , cvHt ,
+ bquote( .(round(100*pLtCompVal,1)) * "% < " *
+ .(signif(compVal,3)) * " < " *
+ .(round(100*pGtCompVal,1)) * "%" ) ,
+ adj=c(pLtCompVal,0) , cex=0.8*cex , col=cvCol )
+ postSummary[,"compVal"] = compVal
+ postSummary[,"pGtCompVal"] = pGtCompVal
+ }
+ # Display the ROPE.
+ if ( !is.null( ROPE ) ) {
+ pInROPE = ( sum( paramSampleVec > ROPE[1] & paramSampleVec < ROPE[2] )
+ / length( paramSampleVec ) )
+ pGtROPE = ( sum( paramSampleVec >= ROPE[2] ) / length( paramSampleVec ) )
+ pLtROPE = ( sum( paramSampleVec <= ROPE[1] ) / length( paramSampleVec ) )
+ lines( c(ROPE[1],ROPE[1]) , c(0.96*ROPEtextHt,0) , lty="dotted" , lwd=2 ,
+ col=ropeCol )
+ lines( c(ROPE[2],ROPE[2]) , c(0.96*ROPEtextHt,0) , lty="dotted" , lwd=2 ,
+ col=ropeCol)
+ text( mean(ROPE) , ROPEtextHt ,
+ bquote( .(round(100*pLtROPE,1)) * "% < " * .(ROPE[1]) * " < " *
+ .(round(100*pInROPE,1)) * "% < " * .(ROPE[2]) * " < " *
+ .(round(100*pGtROPE,1)) * "%" ) ,
+ adj=c(pLtROPE+.5*pInROPE,0) , cex=1 , col=ropeCol )
+
+ postSummary[,"ROPElow"]=ROPE[1]
+ postSummary[,"ROPEhigh"]=ROPE[2]
+ postSummary[,"pLtROPE"]=pLtROPE
+ postSummary[,"pInROPE"]=pInROPE
+ postSummary[,"pGtROPE"]=pGtROPE
+ }
+ # Display the HDI.
+ lines( HDI , c(0,0) , lwd=4 , lend=1 )
+ text( mean(HDI) , 0 , bquote(.(100*credMass) * "% HDI" ) ,
+ adj=c(.5,-1.7) , cex=cex )
+ text( HDI[1] , 0 , bquote(.(signif(HDI[1],3))) ,
+ adj=c(HDItextPlace,-0.5) , cex=cex )
+ text( HDI[2] , 0 , bquote(.(signif(HDI[2],3))) ,
+ adj=c(1.0-HDItextPlace,-0.5) , cex=cex )
+ par(xpd=F)
+ #
+ return( postSummary )
+}
+
+#------------------------------------------------------------------------------
+
+# Shape parameters from central tendency and scale:
+
+betaABfromMeanKappa = function( mean , kappa ) {
+ if ( mean <=0 | mean >= 1) stop("must have 0 < mean < 1")
+ if ( kappa <=0 ) stop("kappa must be > 0")
+ a = mean * kappa
+ b = ( 1.0 - mean ) * kappa
+ return( list( a=a , b=b ) )
+}
+
+betaABfromModeKappa = function( mode , kappa ) {
+ if ( mode <=0 | mode >= 1) stop("must have 0 < mode < 1")
+ if ( kappa <=2 ) stop("kappa must be > 2 for mode parameterization")
+ a = mode * ( kappa - 2 ) + 1
+ b = ( 1.0 - mode ) * ( kappa - 2 ) + 1
+ return( list( a=a , b=b ) )
+}
+
+betaABfromMeanSD = function( mean , sd ) {
+ if ( mean <=0 | mean >= 1) stop("must have 0 < mean < 1")
+ if ( sd <= 0 ) stop("sd must be > 0")
+ kappa = mean*(1-mean)/sd^2 - 1
+ if ( kappa <= 0 ) stop("invalid combination of mean and sd")
+ a = mean * kappa
+ b = ( 1.0 - mean ) * kappa
+ return( list( a=a , b=b ) )
+}
+
+gammaShRaFromMeanSD = function( mean , sd ) {
+ if ( mean <=0 ) stop("mean must be > 0")
+ if ( sd <=0 ) stop("sd must be > 0")
+ shape = mean^2/sd^2
+ rate = mean/sd^2
+ return( list( shape=shape , rate=rate ) )
+}
+
+gammaShRaFromModeSD = function( mode , sd ) {
+ if ( mode <=0 ) stop("mode must be > 0")
+ if ( sd <=0 ) stop("sd must be > 0")
+ rate = ( mode + sqrt( mode^2 + 4 * sd^2 ) ) / ( 2 * sd^2 )
+ shape = 1 + mode * rate
+ return( list( shape=shape , rate=rate ) )
+}
+
+#------------------------------------------------------------------------------
+
+# Make some data files for examples...
+createDataFiles=FALSE
+if ( createDataFiles ) {
+
+ source("HtWtDataGenerator.R")
+ N=300
+ m = HtWtDataGenerator( N , rndsd=47405 )
+ write.csv( file=paste0("HtWtData",N,".csv") , row.names=FALSE , m )
+
+
+ # Function for generating normal data with normal outliers:
+ genYwithOut = function( N , pcntOut=15 , sdOut=3.0 ) {
+ inl = rnorm( N-ceiling(pcntOut/100*N) )
+ out = rnorm( ceiling(pcntOut/100*N) )
+ inl = (inl-mean(inl))/sd(inl)
+ out = (out-mean(out))/sd(out) * sdOut
+ return(c(inl,out))
+ }
+
+ # Two-group IQ scores with outliers
+ set.seed(47405)
+ y1 = round(pmax(50,genYwithOut(63,20,3.5)*17.5+106))
+ y2 = round(pmax(50,genYwithOut(57,20,3.5)*10+100))
+ write.csv( file="TwoGroupIQ.csv" , row.names=FALSE ,
+ data.frame( Score=c(y1,y2) ,
+ Group=c(rep("Smart Drug",length(y1)),
+ rep("Placebo",length(y2))) ) )
+
+ # One-group log-normal
+ set.seed(47405)
+ z = rnorm(123)
+ logY = (z-mean(z))/sd(z) * 0.5 + 5.5 # logY has mean 5.5 and sd 0.5
+ y = round( exp(logY) , 2 )
+ write.csv( file="OneGroupLogNormal.csv" , row.names=FALSE ,
+ cbind(y) )
+
+ # One-group gamma
+ desiredMode = 250
+ desiredSD = 100
+ desiredRate = (desiredMode+sqrt(desiredMode^2+4*desiredSD^2))/(2*desiredSD^2)
+ desiredShape = 1+desiredMode*desiredRate
+ set.seed(47405)
+ y = round( rgamma( 153 , shape=desiredShape , rate=desiredRate ) , 2 )
+ write.csv( file="OneGroupGamma.csv" , row.names=FALSE , cbind(y) )
+
+} # end if createDataFiles