DBDA2E-utilities.R (21789B)
1 # Utility programs for use with the book, 2 # Kruschke, J. K. (2015). Doing Bayesian Data Analysis, Second Edition: 3 # A Tutorial with R, JAGS, and Stan. Academic Press / Elsevier. 4 # This file contains several functions that are called by other programs 5 # or can be called directly by the user. To load all the functions into 6 # R's working memory, at R's command line type: 7 # source("DBDA2E-utilities.R") 8 9 #------------------------------------------------------------------------------ 10 11 bookInfo = "Kruschke, J. K. (2015). Doing Bayesian Data Analysis, Second Edition:\nA Tutorial with R, JAGS, and Stan. Academic Press / Elsevier." 12 bannerBreak = "\n*********************************************************************\n" 13 cat(paste0(bannerBreak,bookInfo,bannerBreak,"\n")) 14 15 #------------------------------------------------------------------------------ 16 # Check that required packages are installed: 17 want = c("parallel","rjags","runjags") 18 have = want %in% rownames(installed.packages()) 19 if ( any(!have) ) { install.packages( want[!have] ) } 20 21 # load rjags: 22 library(rjags) 23 # load runjags and set some options in runjags: 24 library(runjags) 25 runjags.options( inits.warning=FALSE , rng.warning=FALSE ) 26 27 # set default number of chains and parallelness for MCMC: 28 library(parallel) # for detectCores() 29 nCores = detectCores() 30 if ( !is.finite(nCores) ) { nCores = 1 } 31 if ( nCores > 4 ) { 32 nChainsDefault = 4 # because JAGS has only 4 rng's. 33 runjagsMethodDefault = "parallel" 34 } 35 if ( nCores == 4 ) { 36 nChainsDefault = 3 # save 1 core for other processes. 37 runjagsMethodDefault = "parallel" 38 } 39 if ( nCores < 4 ) { 40 nChainsDefault = 3 41 runjagsMethodDefault = "rjags" # NOT parallel 42 } 43 44 #------------------------------------------------------------------------------ 45 # Functions for opening and saving graphics that operate the same for 46 # Windows and Macintosh and Linux operating systems. At least, that's the hope! 47 48 openGraph = function( width=7 , height=7 , mag=1.0 , ... ) { 49 if ( .Platform$OS.type != "windows" ) { # Mac OS, Linux 50 tryInfo = try( X11( width=width*mag , height=height*mag , type="cairo" , 51 ... ) ) 52 if ( class(tryInfo)=="try-error" ) { 53 lineInput = readline("WARNING: Previous graphics windows will be closed because of too many open windows.\nTO CONTINUE, PRESS <ENTER> IN R CONSOLE.\n") 54 graphics.off() 55 X11( width=width*mag , height=height*mag , type="cairo" , ... ) 56 } 57 } else { # Windows OS 58 tryInfo = try( windows( width=width*mag , height=height*mag , ... ) ) 59 if ( class(tryInfo)=="try-error" ) { 60 lineInput = readline("WARNING: Previous graphics windows will be closed because of too many open windows.\nTO CONTINUE, PRESS <ENTER> IN R CONSOLE.\n") 61 graphics.off() 62 windows( width=width*mag , height=height*mag , ... ) 63 } 64 } 65 } 66 67 saveGraph = function( file="saveGraphOutput" , type="pdf" , ... ) { 68 if ( .Platform$OS.type != "windows" ) { # Mac OS, Linux 69 if ( any( type == c("png","jpeg","jpg","tiff","bmp")) ) { 70 sptype = type 71 if ( type == "jpg" ) { sptype = "jpeg" } 72 savePlot( file=paste0(file,".",type) , type=sptype , ... ) 73 } 74 if ( type == "pdf" ) { 75 dev.copy2pdf(file=paste0(file,".",type) , ... ) 76 } 77 if ( type == "eps" ) { 78 dev.copy2eps(file=paste0(file,".",type) , ... ) 79 } 80 } else { # Windows OS 81 file=paste0(file,".",type) 82 savePlot( file=file , type=type , ... ) 83 } 84 } 85 86 #------------------------------------------------------------------------------ 87 # Functions for computing limits of HDI's: 88 89 HDIofMCMC = function( sampleVec , credMass=0.95 ) { 90 # Computes highest density interval from a sample of representative values, 91 # estimated as shortest credible interval. 92 # Arguments: 93 # sampleVec 94 # is a vector of representative values from a probability distribution. 95 # credMass 96 # is a scalar between 0 and 1, indicating the mass within the credible 97 # interval that is to be estimated. 98 # Value: 99 # HDIlim is a vector containing the limits of the HDI 100 sortedPts = sort( sampleVec ) 101 ciIdxInc = ceiling( credMass * length( sortedPts ) ) 102 nCIs = length( sortedPts ) - ciIdxInc 103 ciWidth = rep( 0 , nCIs ) 104 for ( i in 1:nCIs ) { 105 ciWidth[ i ] = sortedPts[ i + ciIdxInc ] - sortedPts[ i ] 106 } 107 HDImin = sortedPts[ which.min( ciWidth ) ] 108 HDImax = sortedPts[ which.min( ciWidth ) + ciIdxInc ] 109 HDIlim = c( HDImin , HDImax ) 110 return( HDIlim ) 111 } 112 113 HDIofICDF = function( ICDFname , credMass=0.95 , tol=1e-8 , ... ) { 114 # Arguments: 115 # ICDFname is R's name for the inverse cumulative density function 116 # of the distribution. 117 # credMass is the desired mass of the HDI region. 118 # tol is passed to R's optimize function. 119 # Return value: 120 # Highest density iterval (HDI) limits in a vector. 121 # Example of use: For determining HDI of a beta(30,12) distribution, type 122 # HDIofICDF( qbeta , shape1 = 30 , shape2 = 12 ) 123 # Notice that the parameters of the ICDFname must be explicitly named; 124 # e.g., HDIofICDF( qbeta , 30 , 12 ) does not work. 125 # Adapted and corrected from Greg Snow's TeachingDemos package. 126 incredMass = 1.0 - credMass 127 intervalWidth = function( lowTailPr , ICDFname , credMass , ... ) { 128 ICDFname( credMass + lowTailPr , ... ) - ICDFname( lowTailPr , ... ) 129 } 130 optInfo = optimize( intervalWidth , c( 0 , incredMass ) , ICDFname=ICDFname , 131 credMass=credMass , tol=tol , ... ) 132 HDIlowTailPr = optInfo$minimum 133 return( c( ICDFname( HDIlowTailPr , ... ) , 134 ICDFname( credMass + HDIlowTailPr , ... ) ) ) 135 } 136 137 HDIofGrid = function( probMassVec , credMass=0.95 ) { 138 # Arguments: 139 # probMassVec is a vector of probability masses at each grid point. 140 # credMass is the desired mass of the HDI region. 141 # Return value: 142 # A list with components: 143 # indices is a vector of indices that are in the HDI 144 # mass is the total mass of the included indices 145 # height is the smallest component probability mass in the HDI 146 # Example of use: For determining HDI of a beta(30,12) distribution 147 # approximated on a grid: 148 # > probDensityVec = dbeta( seq(0,1,length=201) , 30 , 12 ) 149 # > probMassVec = probDensityVec / sum( probDensityVec ) 150 # > HDIinfo = HDIofGrid( probMassVec ) 151 # > show( HDIinfo ) 152 sortedProbMass = sort( probMassVec , decreasing=TRUE ) 153 HDIheightIdx = min( which( cumsum( sortedProbMass ) >= credMass ) ) 154 HDIheight = sortedProbMass[ HDIheightIdx ] 155 HDImass = sum( probMassVec[ probMassVec >= HDIheight ] ) 156 return( list( indices = which( probMassVec >= HDIheight ) , 157 mass = HDImass , height = HDIheight ) ) 158 } 159 160 #------------------------------------------------------------------------------ 161 # Function(s) for plotting properties of mcmc coda objects. 162 163 DbdaAcfPlot = function( codaObject , parName=varnames(codaObject)[1] , plColors=NULL ) { 164 if ( all( parName != varnames(codaObject) ) ) { 165 stop("parName must be a column name of coda object") 166 } 167 nChain = length(codaObject) 168 if ( is.null(plColors) ) plColors=1:nChain 169 xMat = NULL 170 yMat = NULL 171 for ( cIdx in 1:nChain ) { 172 acfInfo = acf(codaObject[,c(parName)][[cIdx]],plot=FALSE) 173 xMat = cbind(xMat,acfInfo$lag) 174 yMat = cbind(yMat,acfInfo$acf) 175 } 176 matplot( xMat , yMat , type="o" , pch=20 , col=plColors , ylim=c(0,1) , 177 main="" , xlab="Lag" , ylab="Autocorrelation" ) 178 abline(h=0,lty="dashed") 179 EffChnLngth = effectiveSize(codaObject[,c(parName)]) 180 text( x=max(xMat) , y=max(yMat) , adj=c(1.0,1.0) , cex=1.25 , 181 labels=paste("ESS =",round(EffChnLngth,1)) ) 182 } 183 184 DbdaDensPlot = function( codaObject , parName=varnames(codaObject)[1] , plColors=NULL ) { 185 if ( all( parName != varnames(codaObject) ) ) { 186 stop("parName must be a column name of coda object") 187 } 188 nChain = length(codaObject) # or nchain(codaObject) 189 if ( is.null(plColors) ) plColors=1:nChain 190 xMat = NULL 191 yMat = NULL 192 hdiLims = NULL 193 for ( cIdx in 1:nChain ) { 194 densInfo = density(codaObject[,c(parName)][[cIdx]]) 195 xMat = cbind(xMat,densInfo$x) 196 yMat = cbind(yMat,densInfo$y) 197 hdiLims = cbind(hdiLims,HDIofMCMC(codaObject[,c(parName)][[cIdx]])) 198 } 199 matplot( xMat , yMat , type="l" , col=plColors , 200 main="" , xlab="Param. Value" , ylab="Density" ) 201 abline(h=0) 202 points( hdiLims[1,] , rep(0,nChain) , col=plColors , pch="|" ) 203 points( hdiLims[2,] , rep(0,nChain) , col=plColors , pch="|" ) 204 text( mean(hdiLims) , 0 , "95% HDI" , adj=c(0.5,-0.2) ) 205 EffChnLngth = effectiveSize(codaObject[,c(parName)]) 206 MCSE = sd(as.matrix(codaObject[,c(parName)]))/sqrt(EffChnLngth) 207 text( max(xMat) , max(yMat) , adj=c(1.0,1.0) , cex=1.25 , 208 paste("MCSE =\n",signif(MCSE,3)) ) 209 } 210 211 diagMCMC = function( codaObject , parName=varnames(codaObject)[1] , 212 saveName=NULL , saveType="jpg" ) { 213 DBDAplColors = c("skyblue","black","royalblue","steelblue") 214 #openGraph(height=5,width=7) 215 par( mar=0.5+c(3,4,1,0) , oma=0.1+c(0,0,2,0) , mgp=c(2.25,0.7,0) , 216 cex.lab=1.5 ) 217 layout(matrix(1:4,nrow=2)) 218 # traceplot and gelman.plot are from CODA package: 219 require(coda) 220 coda::traceplot( codaObject[,c(parName)] , main="" , ylab="Param. Value" , 221 col=DBDAplColors ) 222 tryVal = try( 223 coda::gelman.plot( codaObject[,c(parName)] , main="" , auto.layout=FALSE , 224 col=DBDAplColors ) 225 ) 226 # if it runs, gelman.plot returns a list with finite shrink values: 227 if ( class(tryVal)=="try-error" ) { 228 plot.new() 229 print(paste0("Warning: coda::gelman.plot fails for ",parName)) 230 } else { 231 if ( class(tryVal)=="list" & !is.finite(tryVal$shrink[1]) ) { 232 plot.new() 233 print(paste0("Warning: coda::gelman.plot fails for ",parName)) 234 } 235 } 236 DbdaAcfPlot(codaObject,parName,plColors=DBDAplColors) 237 DbdaDensPlot(codaObject,parName,plColors=DBDAplColors) 238 mtext( text=parName , outer=TRUE , adj=c(0.5,0.5) , cex=2.0 ) 239 if ( !is.null(saveName) ) { 240 saveGraph( file=paste0(saveName,"Diag",parName), type=saveType) 241 } 242 } 243 244 diagStanFit = function( stanFit , parName , 245 saveName=NULL , saveType="jpg" ) { 246 codaFit = mcmc.list( lapply( 1:ncol(stanFit) , 247 function(x) { mcmc(as.array(stanFit)[,x,]) } ) ) 248 DBDAplColors = c("skyblue","black","royalblue","steelblue") 249 openGraph(height=5,width=7) 250 par( mar=0.5+c(3,4,1,0) , oma=0.1+c(0,0,2,0) , mgp=c(2.25,0.7,0) , cex.lab=1.5 ) 251 layout(matrix(1:4,nrow=2)) 252 # traceplot is from rstan package 253 require(rstan) 254 traceplot(stanFit,pars=parName,nrow=1,ncol=1)#,main="",ylab="Param. Value",col=DBDAplColors) 255 # gelman.plot are from CODA package: 256 require(coda) 257 tryVal = try( 258 coda::gelman.plot( codaObject[,c(parName)] , main="" , auto.layout=FALSE , 259 col=DBDAplColors ) 260 ) 261 # if it runs, gelman.plot returns a list with finite shrink values: 262 if ( class(tryVal)=="try-error" ) { 263 plot.new() 264 print(paste0("Warning: coda::gelman.plot fails for ",parName)) 265 } else { 266 if ( class(tryVal)=="list" & !is.finite(tryVal$shrink[1]) ) { 267 plot.new() 268 print(paste0("Warning: coda::gelman.plot fails for ",parName)) 269 } 270 } 271 DbdaAcfPlot(codaFit,parName,plColors=DBDAplColors) 272 DbdaDensPlot(codaFit,parName,plColors=DBDAplColors) 273 mtext( text=parName , outer=TRUE , adj=c(0.5,0.5) , cex=2.0 ) 274 if ( !is.null(saveName) ) { 275 saveGraph( file=paste0(saveName,"Diag",parName), type=saveType) 276 } 277 } 278 279 #------------------------------------------------------------------------------ 280 # Functions for summarizing and plotting distribution of a large sample; 281 # typically applied to MCMC posterior. 282 283 normalize = function( v ){ return( v / sum(v) ) } 284 285 require(coda) # loaded by rjags, but redundancy doesn't hurt 286 287 summarizePost = function( paramSampleVec , 288 compVal=NULL , ROPE=NULL , credMass=0.95 ) { 289 meanParam = mean( paramSampleVec ) 290 medianParam = median( paramSampleVec ) 291 dres = density( paramSampleVec ) 292 modeParam = dres$x[which.max(dres$y)] 293 mcmcEffSz = round( effectiveSize( paramSampleVec ) , 1 ) 294 names(mcmcEffSz) = NULL 295 hdiLim = HDIofMCMC( paramSampleVec , credMass=credMass ) 296 if ( !is.null(compVal) ) { 297 pcgtCompVal = ( 100 * sum( paramSampleVec > compVal ) 298 / length( paramSampleVec ) ) 299 } else { 300 compVal=NA 301 pcgtCompVal=NA 302 } 303 if ( !is.null(ROPE) ) { 304 pcltRope = ( 100 * sum( paramSampleVec < ROPE[1] ) 305 / length( paramSampleVec ) ) 306 pcgtRope = ( 100 * sum( paramSampleVec > ROPE[2] ) 307 / length( paramSampleVec ) ) 308 pcinRope = 100-(pcltRope+pcgtRope) 309 } else { 310 ROPE = c(NA,NA) 311 pcltRope=NA 312 pcgtRope=NA 313 pcinRope=NA 314 } 315 return( c( Mean=meanParam , Median=medianParam , Mode=modeParam , 316 ESS=mcmcEffSz , 317 HDImass=credMass , HDIlow=hdiLim[1] , HDIhigh=hdiLim[2] , 318 CompVal=compVal , PcntGtCompVal=pcgtCompVal , 319 ROPElow=ROPE[1] , ROPEhigh=ROPE[2] , 320 PcntLtROPE=pcltRope , PcntInROPE=pcinRope , PcntGtROPE=pcgtRope ) ) 321 } 322 323 plotPost = function( paramSampleVec , cenTend=c("mode","median","mean")[1] , 324 compVal=NULL, ROPE=NULL, credMass=0.95, HDItextPlace=0.7, 325 xlab=NULL , xlim=NULL , yaxt=NULL , ylab=NULL , 326 main=NULL , cex=NULL , cex.lab=NULL , 327 col=NULL , border=NULL , showCurve=FALSE , breaks=NULL , 328 ... ) { 329 # Override defaults of hist function, if not specified by user: 330 # (additional arguments "..." are passed to the hist function) 331 if ( is.null(xlab) ) xlab="Param. Val." 332 if ( is.null(cex.lab) ) cex.lab=1.5 333 if ( is.null(cex) ) cex=1.4 334 if ( is.null(xlim) ) xlim=range( c( compVal , ROPE , paramSampleVec ) ) 335 if ( is.null(main) ) main="" 336 if ( is.null(yaxt) ) yaxt="n" 337 if ( is.null(ylab) ) ylab="" 338 if ( is.null(col) ) col="skyblue" 339 if ( is.null(border) ) border="white" 340 341 # convert coda object to matrix: 342 if ( class(paramSampleVec) == "mcmc.list" ) { 343 paramSampleVec = as.matrix(paramSampleVec) 344 } 345 346 summaryColNames = c("ESS","mean","median","mode", 347 "hdiMass","hdiLow","hdiHigh", 348 "compVal","pGtCompVal", 349 "ROPElow","ROPEhigh","pLtROPE","pInROPE","pGtROPE") 350 postSummary = matrix( NA , nrow=1 , ncol=length(summaryColNames) , 351 dimnames=list( c( xlab ) , summaryColNames ) ) 352 353 # require(coda) # for effectiveSize function 354 postSummary[,"ESS"] = effectiveSize(paramSampleVec) 355 356 postSummary[,"mean"] = mean(paramSampleVec) 357 postSummary[,"median"] = median(paramSampleVec) 358 mcmcDensity = density(paramSampleVec) 359 postSummary[,"mode"] = mcmcDensity$x[which.max(mcmcDensity$y)] 360 361 HDI = HDIofMCMC( paramSampleVec , credMass ) 362 postSummary[,"hdiMass"]=credMass 363 postSummary[,"hdiLow"]=HDI[1] 364 postSummary[,"hdiHigh"]=HDI[2] 365 366 # Plot histogram. 367 cvCol = "darkgreen" 368 ropeCol = "darkred" 369 if ( is.null(breaks) ) { 370 if ( max(paramSampleVec) > min(paramSampleVec) ) { 371 breaks = c( seq( from=min(paramSampleVec) , to=max(paramSampleVec) , 372 by=(HDI[2]-HDI[1])/18 ) , max(paramSampleVec) ) 373 } else { 374 breaks=c(min(paramSampleVec)-1.0E-6,max(paramSampleVec)+1.0E-6) 375 border="skyblue" 376 } 377 } 378 if ( !showCurve ) { 379 par(xpd=NA) 380 histinfo = hist( paramSampleVec , xlab=xlab , yaxt=yaxt , ylab=ylab , 381 freq=F , border=border , col=col , 382 xlim=xlim , main=main , cex=cex , cex.lab=cex.lab , 383 breaks=breaks , ... ) 384 } 385 if ( showCurve ) { 386 par(xpd=NA) 387 histinfo = hist( paramSampleVec , plot=F ) 388 densCurve = density( paramSampleVec , adjust=2 ) 389 plot( densCurve$x , densCurve$y , type="l" , lwd=5 , col=col , bty="n" , 390 xlim=xlim , xlab=xlab , yaxt=yaxt , ylab=ylab , 391 main=main , cex=cex , cex.lab=cex.lab , ... ) 392 } 393 cenTendHt = 0.9*max(histinfo$density) 394 cvHt = 0.7*max(histinfo$density) 395 ROPEtextHt = 0.55*max(histinfo$density) 396 # Display central tendency: 397 mn = mean(paramSampleVec) 398 med = median(paramSampleVec) 399 mcmcDensity = density(paramSampleVec) 400 mo = mcmcDensity$x[which.max(mcmcDensity$y)] 401 if ( cenTend=="mode" ){ 402 text( mo , cenTendHt , 403 bquote(mode==.(signif(mo,3))) , adj=c(.5,0) , cex=cex ) 404 } 405 if ( cenTend=="median" ){ 406 text( med , cenTendHt , 407 bquote(median==.(signif(med,3))) , adj=c(.5,0) , cex=cex , col=cvCol ) 408 } 409 if ( cenTend=="mean" ){ 410 text( mn , cenTendHt , 411 bquote(mean==.(signif(mn,3))) , adj=c(.5,0) , cex=cex ) 412 } 413 # Display the comparison value. 414 if ( !is.null( compVal ) ) { 415 pGtCompVal = sum( paramSampleVec > compVal ) / length( paramSampleVec ) 416 pLtCompVal = 1 - pGtCompVal 417 lines( c(compVal,compVal) , c(0.96*cvHt,0) , 418 lty="dotted" , lwd=2 , col=cvCol ) 419 text( compVal , cvHt , 420 bquote( .(round(100*pLtCompVal,1)) * "% < " * 421 .(signif(compVal,3)) * " < " * 422 .(round(100*pGtCompVal,1)) * "%" ) , 423 adj=c(pLtCompVal,0) , cex=0.8*cex , col=cvCol ) 424 postSummary[,"compVal"] = compVal 425 postSummary[,"pGtCompVal"] = pGtCompVal 426 } 427 # Display the ROPE. 428 if ( !is.null( ROPE ) ) { 429 pInROPE = ( sum( paramSampleVec > ROPE[1] & paramSampleVec < ROPE[2] ) 430 / length( paramSampleVec ) ) 431 pGtROPE = ( sum( paramSampleVec >= ROPE[2] ) / length( paramSampleVec ) ) 432 pLtROPE = ( sum( paramSampleVec <= ROPE[1] ) / length( paramSampleVec ) ) 433 lines( c(ROPE[1],ROPE[1]) , c(0.96*ROPEtextHt,0) , lty="dotted" , lwd=2 , 434 col=ropeCol ) 435 lines( c(ROPE[2],ROPE[2]) , c(0.96*ROPEtextHt,0) , lty="dotted" , lwd=2 , 436 col=ropeCol) 437 text( mean(ROPE) , ROPEtextHt , 438 bquote( .(round(100*pLtROPE,1)) * "% < " * .(ROPE[1]) * " < " * 439 .(round(100*pInROPE,1)) * "% < " * .(ROPE[2]) * " < " * 440 .(round(100*pGtROPE,1)) * "%" ) , 441 adj=c(pLtROPE+.5*pInROPE,0) , cex=1 , col=ropeCol ) 442 443 postSummary[,"ROPElow"]=ROPE[1] 444 postSummary[,"ROPEhigh"]=ROPE[2] 445 postSummary[,"pLtROPE"]=pLtROPE 446 postSummary[,"pInROPE"]=pInROPE 447 postSummary[,"pGtROPE"]=pGtROPE 448 } 449 # Display the HDI. 450 lines( HDI , c(0,0) , lwd=4 , lend=1 ) 451 text( mean(HDI) , 0 , bquote(.(100*credMass) * "% HDI" ) , 452 adj=c(.5,-1.7) , cex=cex ) 453 text( HDI[1] , 0 , bquote(.(signif(HDI[1],3))) , 454 adj=c(HDItextPlace,-0.5) , cex=cex ) 455 text( HDI[2] , 0 , bquote(.(signif(HDI[2],3))) , 456 adj=c(1.0-HDItextPlace,-0.5) , cex=cex ) 457 par(xpd=F) 458 # 459 return( postSummary ) 460 } 461 462 #------------------------------------------------------------------------------ 463 464 # Shape parameters from central tendency and scale: 465 466 betaABfromMeanKappa = function( mean , kappa ) { 467 if ( mean <=0 | mean >= 1) stop("must have 0 < mean < 1") 468 if ( kappa <=0 ) stop("kappa must be > 0") 469 a = mean * kappa 470 b = ( 1.0 - mean ) * kappa 471 return( list( a=a , b=b ) ) 472 } 473 474 betaABfromModeKappa = function( mode , kappa ) { 475 if ( mode <=0 | mode >= 1) stop("must have 0 < mode < 1") 476 if ( kappa <=2 ) stop("kappa must be > 2 for mode parameterization") 477 a = mode * ( kappa - 2 ) + 1 478 b = ( 1.0 - mode ) * ( kappa - 2 ) + 1 479 return( list( a=a , b=b ) ) 480 } 481 482 betaABfromMeanSD = function( mean , sd ) { 483 if ( mean <=0 | mean >= 1) stop("must have 0 < mean < 1") 484 if ( sd <= 0 ) stop("sd must be > 0") 485 kappa = mean*(1-mean)/sd^2 - 1 486 if ( kappa <= 0 ) stop("invalid combination of mean and sd") 487 a = mean * kappa 488 b = ( 1.0 - mean ) * kappa 489 return( list( a=a , b=b ) ) 490 } 491 492 gammaShRaFromMeanSD = function( mean , sd ) { 493 if ( mean <=0 ) stop("mean must be > 0") 494 if ( sd <=0 ) stop("sd must be > 0") 495 shape = mean^2/sd^2 496 rate = mean/sd^2 497 return( list( shape=shape , rate=rate ) ) 498 } 499 500 gammaShRaFromModeSD = function( mode , sd ) { 501 if ( mode <=0 ) stop("mode must be > 0") 502 if ( sd <=0 ) stop("sd must be > 0") 503 rate = ( mode + sqrt( mode^2 + 4 * sd^2 ) ) / ( 2 * sd^2 ) 504 shape = 1 + mode * rate 505 return( list( shape=shape , rate=rate ) ) 506 } 507 508 #------------------------------------------------------------------------------ 509 510 # Make some data files for examples... 511 createDataFiles=FALSE 512 if ( createDataFiles ) { 513 514 source("HtWtDataGenerator.R") 515 N=300 516 m = HtWtDataGenerator( N , rndsd=47405 ) 517 write.csv( file=paste0("HtWtData",N,".csv") , row.names=FALSE , m ) 518 519 520 # Function for generating normal data with normal outliers: 521 genYwithOut = function( N , pcntOut=15 , sdOut=3.0 ) { 522 inl = rnorm( N-ceiling(pcntOut/100*N) ) 523 out = rnorm( ceiling(pcntOut/100*N) ) 524 inl = (inl-mean(inl))/sd(inl) 525 out = (out-mean(out))/sd(out) * sdOut 526 return(c(inl,out)) 527 } 528 529 # Two-group IQ scores with outliers 530 set.seed(47405) 531 y1 = round(pmax(50,genYwithOut(63,20,3.5)*17.5+106)) 532 y2 = round(pmax(50,genYwithOut(57,20,3.5)*10+100)) 533 write.csv( file="TwoGroupIQ.csv" , row.names=FALSE , 534 data.frame( Score=c(y1,y2) , 535 Group=c(rep("Smart Drug",length(y1)), 536 rep("Placebo",length(y2))) ) ) 537 538 # One-group log-normal 539 set.seed(47405) 540 z = rnorm(123) 541 logY = (z-mean(z))/sd(z) * 0.5 + 5.5 # logY has mean 5.5 and sd 0.5 542 y = round( exp(logY) , 2 ) 543 write.csv( file="OneGroupLogNormal.csv" , row.names=FALSE , 544 cbind(y) ) 545 546 # One-group gamma 547 desiredMode = 250 548 desiredSD = 100 549 desiredRate = (desiredMode+sqrt(desiredMode^2+4*desiredSD^2))/(2*desiredSD^2) 550 desiredShape = 1+desiredMode*desiredRate 551 set.seed(47405) 552 y = round( rgamma( 153 , shape=desiredShape , rate=desiredRate ) , 2 ) 553 write.csv( file="OneGroupGamma.csv" , row.names=FALSE , cbind(y) ) 554 555 } # end if createDataFiles