antivax-attitudes

Reanalyses of data from Horne, Powell, Hummel & Holyoak (2015)
git clone https://git.eamoncaddigan.net/antivax-attitudes.git
Log | Files | Refs | README | LICENSE

commit 5ed4b0b3fadd21e24bbcf6e42af9dc3098195be8
parent c386979f91fbfc5e5309c2384b3f6ebd4f66bc42
Author: eamoncaddigan <eamon.caddigan@gmail.com>
Date:   Fri, 21 Aug 2015 13:33:05 -0400

Pulled from Kruschke.

Diffstat:
ADBDA2E-utilities.R | 555+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 555 insertions(+), 0 deletions(-)

diff --git a/DBDA2E-utilities.R b/DBDA2E-utilities.R @@ -0,0 +1,555 @@ +# Utility programs for use with the book, +# Kruschke, J. K. (2015). Doing Bayesian Data Analysis, Second Edition: +# A Tutorial with R, JAGS, and Stan. Academic Press / Elsevier. +# This file contains several functions that are called by other programs +# or can be called directly by the user. To load all the functions into +# R's working memory, at R's command line type: +# source("DBDA2E-utilities.R") + +#------------------------------------------------------------------------------ + +bookInfo = "Kruschke, J. K. (2015). Doing Bayesian Data Analysis, Second Edition:\nA Tutorial with R, JAGS, and Stan. Academic Press / Elsevier." +bannerBreak = "\n*********************************************************************\n" +cat(paste0(bannerBreak,bookInfo,bannerBreak,"\n")) + +#------------------------------------------------------------------------------ +# Check that required packages are installed: +want = c("parallel","rjags","runjags") +have = want %in% rownames(installed.packages()) +if ( any(!have) ) { install.packages( want[!have] ) } + +# load rjags: +library(rjags) +# load runjags and set some options in runjags: +library(runjags) +runjags.options( inits.warning=FALSE , rng.warning=FALSE ) + +# set default number of chains and parallelness for MCMC: +library(parallel) # for detectCores() +nCores = detectCores() +if ( !is.finite(nCores) ) { nCores = 1 } +if ( nCores > 4 ) { + nChainsDefault = 4 # because JAGS has only 4 rng's. + runjagsMethodDefault = "parallel" +} +if ( nCores == 4 ) { + nChainsDefault = 3 # save 1 core for other processes. + runjagsMethodDefault = "parallel" +} +if ( nCores < 4 ) { + nChainsDefault = 3 + runjagsMethodDefault = "rjags" # NOT parallel +} + +#------------------------------------------------------------------------------ +# Functions for opening and saving graphics that operate the same for +# Windows and Macintosh and Linux operating systems. At least, that's the hope! + +openGraph = function( width=7 , height=7 , mag=1.0 , ... ) { + if ( .Platform$OS.type != "windows" ) { # Mac OS, Linux + tryInfo = try( X11( width=width*mag , height=height*mag , type="cairo" , + ... ) ) + if ( class(tryInfo)=="try-error" ) { + lineInput = readline("WARNING: Previous graphics windows will be closed because of too many open windows.\nTO CONTINUE, PRESS <ENTER> IN R CONSOLE.\n") + graphics.off() + X11( width=width*mag , height=height*mag , type="cairo" , ... ) + } + } else { # Windows OS + tryInfo = try( windows( width=width*mag , height=height*mag , ... ) ) + if ( class(tryInfo)=="try-error" ) { + lineInput = readline("WARNING: Previous graphics windows will be closed because of too many open windows.\nTO CONTINUE, PRESS <ENTER> IN R CONSOLE.\n") + graphics.off() + windows( width=width*mag , height=height*mag , ... ) + } + } +} + +saveGraph = function( file="saveGraphOutput" , type="pdf" , ... ) { + if ( .Platform$OS.type != "windows" ) { # Mac OS, Linux + if ( any( type == c("png","jpeg","jpg","tiff","bmp")) ) { + sptype = type + if ( type == "jpg" ) { sptype = "jpeg" } + savePlot( file=paste0(file,".",type) , type=sptype , ... ) + } + if ( type == "pdf" ) { + dev.copy2pdf(file=paste0(file,".",type) , ... ) + } + if ( type == "eps" ) { + dev.copy2eps(file=paste0(file,".",type) , ... ) + } + } else { # Windows OS + file=paste0(file,".",type) + savePlot( file=file , type=type , ... ) + } +} + +#------------------------------------------------------------------------------ +# Functions for computing limits of HDI's: + +HDIofMCMC = function( sampleVec , credMass=0.95 ) { + # Computes highest density interval from a sample of representative values, + # estimated as shortest credible interval. + # Arguments: + # sampleVec + # is a vector of representative values from a probability distribution. + # credMass + # is a scalar between 0 and 1, indicating the mass within the credible + # interval that is to be estimated. + # Value: + # HDIlim is a vector containing the limits of the HDI + sortedPts = sort( sampleVec ) + ciIdxInc = ceiling( credMass * length( sortedPts ) ) + nCIs = length( sortedPts ) - ciIdxInc + ciWidth = rep( 0 , nCIs ) + for ( i in 1:nCIs ) { + ciWidth[ i ] = sortedPts[ i + ciIdxInc ] - sortedPts[ i ] + } + HDImin = sortedPts[ which.min( ciWidth ) ] + HDImax = sortedPts[ which.min( ciWidth ) + ciIdxInc ] + HDIlim = c( HDImin , HDImax ) + return( HDIlim ) +} + +HDIofICDF = function( ICDFname , credMass=0.95 , tol=1e-8 , ... ) { + # Arguments: + # ICDFname is R's name for the inverse cumulative density function + # of the distribution. + # credMass is the desired mass of the HDI region. + # tol is passed to R's optimize function. + # Return value: + # Highest density iterval (HDI) limits in a vector. + # Example of use: For determining HDI of a beta(30,12) distribution, type + # HDIofICDF( qbeta , shape1 = 30 , shape2 = 12 ) + # Notice that the parameters of the ICDFname must be explicitly named; + # e.g., HDIofICDF( qbeta , 30 , 12 ) does not work. + # Adapted and corrected from Greg Snow's TeachingDemos package. + incredMass = 1.0 - credMass + intervalWidth = function( lowTailPr , ICDFname , credMass , ... ) { + ICDFname( credMass + lowTailPr , ... ) - ICDFname( lowTailPr , ... ) + } + optInfo = optimize( intervalWidth , c( 0 , incredMass ) , ICDFname=ICDFname , + credMass=credMass , tol=tol , ... ) + HDIlowTailPr = optInfo$minimum + return( c( ICDFname( HDIlowTailPr , ... ) , + ICDFname( credMass + HDIlowTailPr , ... ) ) ) +} + +HDIofGrid = function( probMassVec , credMass=0.95 ) { + # Arguments: + # probMassVec is a vector of probability masses at each grid point. + # credMass is the desired mass of the HDI region. + # Return value: + # A list with components: + # indices is a vector of indices that are in the HDI + # mass is the total mass of the included indices + # height is the smallest component probability mass in the HDI + # Example of use: For determining HDI of a beta(30,12) distribution + # approximated on a grid: + # > probDensityVec = dbeta( seq(0,1,length=201) , 30 , 12 ) + # > probMassVec = probDensityVec / sum( probDensityVec ) + # > HDIinfo = HDIofGrid( probMassVec ) + # > show( HDIinfo ) + sortedProbMass = sort( probMassVec , decreasing=TRUE ) + HDIheightIdx = min( which( cumsum( sortedProbMass ) >= credMass ) ) + HDIheight = sortedProbMass[ HDIheightIdx ] + HDImass = sum( probMassVec[ probMassVec >= HDIheight ] ) + return( list( indices = which( probMassVec >= HDIheight ) , + mass = HDImass , height = HDIheight ) ) +} + +#------------------------------------------------------------------------------ +# Function(s) for plotting properties of mcmc coda objects. + +DbdaAcfPlot = function( codaObject , parName=varnames(codaObject)[1] , plColors=NULL ) { + if ( all( parName != varnames(codaObject) ) ) { + stop("parName must be a column name of coda object") + } + nChain = length(codaObject) + if ( is.null(plColors) ) plColors=1:nChain + xMat = NULL + yMat = NULL + for ( cIdx in 1:nChain ) { + acfInfo = acf(codaObject[,c(parName)][[cIdx]],plot=FALSE) + xMat = cbind(xMat,acfInfo$lag) + yMat = cbind(yMat,acfInfo$acf) + } + matplot( xMat , yMat , type="o" , pch=20 , col=plColors , ylim=c(0,1) , + main="" , xlab="Lag" , ylab="Autocorrelation" ) + abline(h=0,lty="dashed") + EffChnLngth = effectiveSize(codaObject[,c(parName)]) + text( x=max(xMat) , y=max(yMat) , adj=c(1.0,1.0) , cex=1.25 , + labels=paste("ESS =",round(EffChnLngth,1)) ) +} + +DbdaDensPlot = function( codaObject , parName=varnames(codaObject)[1] , plColors=NULL ) { + if ( all( parName != varnames(codaObject) ) ) { + stop("parName must be a column name of coda object") + } + nChain = length(codaObject) # or nchain(codaObject) + if ( is.null(plColors) ) plColors=1:nChain + xMat = NULL + yMat = NULL + hdiLims = NULL + for ( cIdx in 1:nChain ) { + densInfo = density(codaObject[,c(parName)][[cIdx]]) + xMat = cbind(xMat,densInfo$x) + yMat = cbind(yMat,densInfo$y) + hdiLims = cbind(hdiLims,HDIofMCMC(codaObject[,c(parName)][[cIdx]])) + } + matplot( xMat , yMat , type="l" , col=plColors , + main="" , xlab="Param. Value" , ylab="Density" ) + abline(h=0) + points( hdiLims[1,] , rep(0,nChain) , col=plColors , pch="|" ) + points( hdiLims[2,] , rep(0,nChain) , col=plColors , pch="|" ) + text( mean(hdiLims) , 0 , "95% HDI" , adj=c(0.5,-0.2) ) + EffChnLngth = effectiveSize(codaObject[,c(parName)]) + MCSE = sd(as.matrix(codaObject[,c(parName)]))/sqrt(EffChnLngth) + text( max(xMat) , max(yMat) , adj=c(1.0,1.0) , cex=1.25 , + paste("MCSE =\n",signif(MCSE,3)) ) +} + +diagMCMC = function( codaObject , parName=varnames(codaObject)[1] , + saveName=NULL , saveType="jpg" ) { + DBDAplColors = c("skyblue","black","royalblue","steelblue") + openGraph(height=5,width=7) + par( mar=0.5+c(3,4,1,0) , oma=0.1+c(0,0,2,0) , mgp=c(2.25,0.7,0) , + cex.lab=1.5 ) + layout(matrix(1:4,nrow=2)) + # traceplot and gelman.plot are from CODA package: + require(coda) + coda::traceplot( codaObject[,c(parName)] , main="" , ylab="Param. Value" , + col=DBDAplColors ) + tryVal = try( + coda::gelman.plot( codaObject[,c(parName)] , main="" , auto.layout=FALSE , + col=DBDAplColors ) + ) + # if it runs, gelman.plot returns a list with finite shrink values: + if ( class(tryVal)=="try-error" ) { + plot.new() + print(paste0("Warning: coda::gelman.plot fails for ",parName)) + } else { + if ( class(tryVal)=="list" & !is.finite(tryVal$shrink[1]) ) { + plot.new() + print(paste0("Warning: coda::gelman.plot fails for ",parName)) + } + } + DbdaAcfPlot(codaObject,parName,plColors=DBDAplColors) + DbdaDensPlot(codaObject,parName,plColors=DBDAplColors) + mtext( text=parName , outer=TRUE , adj=c(0.5,0.5) , cex=2.0 ) + if ( !is.null(saveName) ) { + saveGraph( file=paste0(saveName,"Diag",parName), type=saveType) + } +} + +diagStanFit = function( stanFit , parName , + saveName=NULL , saveType="jpg" ) { + codaFit = mcmc.list( lapply( 1:ncol(stanFit) , + function(x) { mcmc(as.array(stanFit)[,x,]) } ) ) + DBDAplColors = c("skyblue","black","royalblue","steelblue") + openGraph(height=5,width=7) + par( mar=0.5+c(3,4,1,0) , oma=0.1+c(0,0,2,0) , mgp=c(2.25,0.7,0) , cex.lab=1.5 ) + layout(matrix(1:4,nrow=2)) + # traceplot is from rstan package + require(rstan) + traceplot(stanFit,pars=parName,nrow=1,ncol=1)#,main="",ylab="Param. Value",col=DBDAplColors) + # gelman.plot are from CODA package: + require(coda) + tryVal = try( + coda::gelman.plot( codaObject[,c(parName)] , main="" , auto.layout=FALSE , + col=DBDAplColors ) + ) + # if it runs, gelman.plot returns a list with finite shrink values: + if ( class(tryVal)=="try-error" ) { + plot.new() + print(paste0("Warning: coda::gelman.plot fails for ",parName)) + } else { + if ( class(tryVal)=="list" & !is.finite(tryVal$shrink[1]) ) { + plot.new() + print(paste0("Warning: coda::gelman.plot fails for ",parName)) + } + } + DbdaAcfPlot(codaFit,parName,plColors=DBDAplColors) + DbdaDensPlot(codaFit,parName,plColors=DBDAplColors) + mtext( text=parName , outer=TRUE , adj=c(0.5,0.5) , cex=2.0 ) + if ( !is.null(saveName) ) { + saveGraph( file=paste0(saveName,"Diag",parName), type=saveType) + } +} + +#------------------------------------------------------------------------------ +# Functions for summarizing and plotting distribution of a large sample; +# typically applied to MCMC posterior. + +normalize = function( v ){ return( v / sum(v) ) } + +require(coda) # loaded by rjags, but redundancy doesn't hurt + +summarizePost = function( paramSampleVec , + compVal=NULL , ROPE=NULL , credMass=0.95 ) { + meanParam = mean( paramSampleVec ) + medianParam = median( paramSampleVec ) + dres = density( paramSampleVec ) + modeParam = dres$x[which.max(dres$y)] + mcmcEffSz = round( effectiveSize( paramSampleVec ) , 1 ) + names(mcmcEffSz) = NULL + hdiLim = HDIofMCMC( paramSampleVec , credMass=credMass ) + if ( !is.null(compVal) ) { + pcgtCompVal = ( 100 * sum( paramSampleVec > compVal ) + / length( paramSampleVec ) ) + } else { + compVal=NA + pcgtCompVal=NA + } + if ( !is.null(ROPE) ) { + pcltRope = ( 100 * sum( paramSampleVec < ROPE[1] ) + / length( paramSampleVec ) ) + pcgtRope = ( 100 * sum( paramSampleVec > ROPE[2] ) + / length( paramSampleVec ) ) + pcinRope = 100-(pcltRope+pcgtRope) + } else { + ROPE = c(NA,NA) + pcltRope=NA + pcgtRope=NA + pcinRope=NA + } + return( c( Mean=meanParam , Median=medianParam , Mode=modeParam , + ESS=mcmcEffSz , + HDImass=credMass , HDIlow=hdiLim[1] , HDIhigh=hdiLim[2] , + CompVal=compVal , PcntGtCompVal=pcgtCompVal , + ROPElow=ROPE[1] , ROPEhigh=ROPE[2] , + PcntLtROPE=pcltRope , PcntInROPE=pcinRope , PcntGtROPE=pcgtRope ) ) +} + +plotPost = function( paramSampleVec , cenTend=c("mode","median","mean")[1] , + compVal=NULL, ROPE=NULL, credMass=0.95, HDItextPlace=0.7, + xlab=NULL , xlim=NULL , yaxt=NULL , ylab=NULL , + main=NULL , cex=NULL , cex.lab=NULL , + col=NULL , border=NULL , showCurve=FALSE , breaks=NULL , + ... ) { + # Override defaults of hist function, if not specified by user: + # (additional arguments "..." are passed to the hist function) + if ( is.null(xlab) ) xlab="Param. Val." + if ( is.null(cex.lab) ) cex.lab=1.5 + if ( is.null(cex) ) cex=1.4 + if ( is.null(xlim) ) xlim=range( c( compVal , ROPE , paramSampleVec ) ) + if ( is.null(main) ) main="" + if ( is.null(yaxt) ) yaxt="n" + if ( is.null(ylab) ) ylab="" + if ( is.null(col) ) col="skyblue" + if ( is.null(border) ) border="white" + + # convert coda object to matrix: + if ( class(paramSampleVec) == "mcmc.list" ) { + paramSampleVec = as.matrix(paramSampleVec) + } + + summaryColNames = c("ESS","mean","median","mode", + "hdiMass","hdiLow","hdiHigh", + "compVal","pGtCompVal", + "ROPElow","ROPEhigh","pLtROPE","pInROPE","pGtROPE") + postSummary = matrix( NA , nrow=1 , ncol=length(summaryColNames) , + dimnames=list( c( xlab ) , summaryColNames ) ) + + # require(coda) # for effectiveSize function + postSummary[,"ESS"] = effectiveSize(paramSampleVec) + + postSummary[,"mean"] = mean(paramSampleVec) + postSummary[,"median"] = median(paramSampleVec) + mcmcDensity = density(paramSampleVec) + postSummary[,"mode"] = mcmcDensity$x[which.max(mcmcDensity$y)] + + HDI = HDIofMCMC( paramSampleVec , credMass ) + postSummary[,"hdiMass"]=credMass + postSummary[,"hdiLow"]=HDI[1] + postSummary[,"hdiHigh"]=HDI[2] + + # Plot histogram. + cvCol = "darkgreen" + ropeCol = "darkred" + if ( is.null(breaks) ) { + if ( max(paramSampleVec) > min(paramSampleVec) ) { + breaks = c( seq( from=min(paramSampleVec) , to=max(paramSampleVec) , + by=(HDI[2]-HDI[1])/18 ) , max(paramSampleVec) ) + } else { + breaks=c(min(paramSampleVec)-1.0E-6,max(paramSampleVec)+1.0E-6) + border="skyblue" + } + } + if ( !showCurve ) { + par(xpd=NA) + histinfo = hist( paramSampleVec , xlab=xlab , yaxt=yaxt , ylab=ylab , + freq=F , border=border , col=col , + xlim=xlim , main=main , cex=cex , cex.lab=cex.lab , + breaks=breaks , ... ) + } + if ( showCurve ) { + par(xpd=NA) + histinfo = hist( paramSampleVec , plot=F ) + densCurve = density( paramSampleVec , adjust=2 ) + plot( densCurve$x , densCurve$y , type="l" , lwd=5 , col=col , bty="n" , + xlim=xlim , xlab=xlab , yaxt=yaxt , ylab=ylab , + main=main , cex=cex , cex.lab=cex.lab , ... ) + } + cenTendHt = 0.9*max(histinfo$density) + cvHt = 0.7*max(histinfo$density) + ROPEtextHt = 0.55*max(histinfo$density) + # Display central tendency: + mn = mean(paramSampleVec) + med = median(paramSampleVec) + mcmcDensity = density(paramSampleVec) + mo = mcmcDensity$x[which.max(mcmcDensity$y)] + if ( cenTend=="mode" ){ + text( mo , cenTendHt , + bquote(mode==.(signif(mo,3))) , adj=c(.5,0) , cex=cex ) + } + if ( cenTend=="median" ){ + text( med , cenTendHt , + bquote(median==.(signif(med,3))) , adj=c(.5,0) , cex=cex , col=cvCol ) + } + if ( cenTend=="mean" ){ + text( mn , cenTendHt , + bquote(mean==.(signif(mn,3))) , adj=c(.5,0) , cex=cex ) + } + # Display the comparison value. + if ( !is.null( compVal ) ) { + pGtCompVal = sum( paramSampleVec > compVal ) / length( paramSampleVec ) + pLtCompVal = 1 - pGtCompVal + lines( c(compVal,compVal) , c(0.96*cvHt,0) , + lty="dotted" , lwd=2 , col=cvCol ) + text( compVal , cvHt , + bquote( .(round(100*pLtCompVal,1)) * "% < " * + .(signif(compVal,3)) * " < " * + .(round(100*pGtCompVal,1)) * "%" ) , + adj=c(pLtCompVal,0) , cex=0.8*cex , col=cvCol ) + postSummary[,"compVal"] = compVal + postSummary[,"pGtCompVal"] = pGtCompVal + } + # Display the ROPE. + if ( !is.null( ROPE ) ) { + pInROPE = ( sum( paramSampleVec > ROPE[1] & paramSampleVec < ROPE[2] ) + / length( paramSampleVec ) ) + pGtROPE = ( sum( paramSampleVec >= ROPE[2] ) / length( paramSampleVec ) ) + pLtROPE = ( sum( paramSampleVec <= ROPE[1] ) / length( paramSampleVec ) ) + lines( c(ROPE[1],ROPE[1]) , c(0.96*ROPEtextHt,0) , lty="dotted" , lwd=2 , + col=ropeCol ) + lines( c(ROPE[2],ROPE[2]) , c(0.96*ROPEtextHt,0) , lty="dotted" , lwd=2 , + col=ropeCol) + text( mean(ROPE) , ROPEtextHt , + bquote( .(round(100*pLtROPE,1)) * "% < " * .(ROPE[1]) * " < " * + .(round(100*pInROPE,1)) * "% < " * .(ROPE[2]) * " < " * + .(round(100*pGtROPE,1)) * "%" ) , + adj=c(pLtROPE+.5*pInROPE,0) , cex=1 , col=ropeCol ) + + postSummary[,"ROPElow"]=ROPE[1] + postSummary[,"ROPEhigh"]=ROPE[2] + postSummary[,"pLtROPE"]=pLtROPE + postSummary[,"pInROPE"]=pInROPE + postSummary[,"pGtROPE"]=pGtROPE + } + # Display the HDI. + lines( HDI , c(0,0) , lwd=4 , lend=1 ) + text( mean(HDI) , 0 , bquote(.(100*credMass) * "% HDI" ) , + adj=c(.5,-1.7) , cex=cex ) + text( HDI[1] , 0 , bquote(.(signif(HDI[1],3))) , + adj=c(HDItextPlace,-0.5) , cex=cex ) + text( HDI[2] , 0 , bquote(.(signif(HDI[2],3))) , + adj=c(1.0-HDItextPlace,-0.5) , cex=cex ) + par(xpd=F) + # + return( postSummary ) +} + +#------------------------------------------------------------------------------ + +# Shape parameters from central tendency and scale: + +betaABfromMeanKappa = function( mean , kappa ) { + if ( mean <=0 | mean >= 1) stop("must have 0 < mean < 1") + if ( kappa <=0 ) stop("kappa must be > 0") + a = mean * kappa + b = ( 1.0 - mean ) * kappa + return( list( a=a , b=b ) ) +} + +betaABfromModeKappa = function( mode , kappa ) { + if ( mode <=0 | mode >= 1) stop("must have 0 < mode < 1") + if ( kappa <=2 ) stop("kappa must be > 2 for mode parameterization") + a = mode * ( kappa - 2 ) + 1 + b = ( 1.0 - mode ) * ( kappa - 2 ) + 1 + return( list( a=a , b=b ) ) +} + +betaABfromMeanSD = function( mean , sd ) { + if ( mean <=0 | mean >= 1) stop("must have 0 < mean < 1") + if ( sd <= 0 ) stop("sd must be > 0") + kappa = mean*(1-mean)/sd^2 - 1 + if ( kappa <= 0 ) stop("invalid combination of mean and sd") + a = mean * kappa + b = ( 1.0 - mean ) * kappa + return( list( a=a , b=b ) ) +} + +gammaShRaFromMeanSD = function( mean , sd ) { + if ( mean <=0 ) stop("mean must be > 0") + if ( sd <=0 ) stop("sd must be > 0") + shape = mean^2/sd^2 + rate = mean/sd^2 + return( list( shape=shape , rate=rate ) ) +} + +gammaShRaFromModeSD = function( mode , sd ) { + if ( mode <=0 ) stop("mode must be > 0") + if ( sd <=0 ) stop("sd must be > 0") + rate = ( mode + sqrt( mode^2 + 4 * sd^2 ) ) / ( 2 * sd^2 ) + shape = 1 + mode * rate + return( list( shape=shape , rate=rate ) ) +} + +#------------------------------------------------------------------------------ + +# Make some data files for examples... +createDataFiles=FALSE +if ( createDataFiles ) { + + source("HtWtDataGenerator.R") + N=300 + m = HtWtDataGenerator( N , rndsd=47405 ) + write.csv( file=paste0("HtWtData",N,".csv") , row.names=FALSE , m ) + + + # Function for generating normal data with normal outliers: + genYwithOut = function( N , pcntOut=15 , sdOut=3.0 ) { + inl = rnorm( N-ceiling(pcntOut/100*N) ) + out = rnorm( ceiling(pcntOut/100*N) ) + inl = (inl-mean(inl))/sd(inl) + out = (out-mean(out))/sd(out) * sdOut + return(c(inl,out)) + } + + # Two-group IQ scores with outliers + set.seed(47405) + y1 = round(pmax(50,genYwithOut(63,20,3.5)*17.5+106)) + y2 = round(pmax(50,genYwithOut(57,20,3.5)*10+100)) + write.csv( file="TwoGroupIQ.csv" , row.names=FALSE , + data.frame( Score=c(y1,y2) , + Group=c(rep("Smart Drug",length(y1)), + rep("Placebo",length(y2))) ) ) + + # One-group log-normal + set.seed(47405) + z = rnorm(123) + logY = (z-mean(z))/sd(z) * 0.5 + 5.5 # logY has mean 5.5 and sd 0.5 + y = round( exp(logY) , 2 ) + write.csv( file="OneGroupLogNormal.csv" , row.names=FALSE , + cbind(y) ) + + # One-group gamma + desiredMode = 250 + desiredSD = 100 + desiredRate = (desiredMode+sqrt(desiredMode^2+4*desiredSD^2))/(2*desiredSD^2) + desiredShape = 1+desiredMode*desiredRate + set.seed(47405) + y = round( rgamma( 153 , shape=desiredShape , rate=desiredRate ) , 2 ) + write.csv( file="OneGroupGamma.csv" , row.names=FALSE , cbind(y) ) + +} # end if createDataFiles