#
#
#
# doublenormal
#
#
# Simulate draws from a double normal
#
#
#
#
#
#
#
#
doublenormal=function(N=1e4,m=0,sd1=1,sd2=seq(0,2,by=.1))
{
n1=rnorm(N,m,sd1)
newsd=numeric()
for(i in 1:length(sd2))
{
n2=rnorm(N,n1,sd2[i])
newsd[i]=sd(n2)
}
sqrtdiff=1/sqrt(newsd-sd2)
plot(sd2,sqrtdiff)
cor=lm(sqrtdiff~sd2)
abline(cor)
return(data.frame(sd1,sd2,newsd))
}
#
#
#
#
#
#
#
# dnormprod0
#
#
# Probability density of product of two normal variates, both with mean 0, SD sx and sy
#
#
#
#
#
#
#
#
dnormprod0=function(z,sx,sy)
{
K=besselK(abs(z)/(sx*sy),nu=0)
return(K/(pi*sx*sy))
}
#
#
#
#
#
#
#
# dgammadexp
#
#
# PDF of a function formed by adding a gamma distribution to a symmetrical exponential
# distribution. This means simply adding a PDF for a gamma minus an exponential to the
# PDF for a gamma plus an exponential.
#
#
#
#
#
#
#
#
dgammadexp=function(z,mean,sd,lambda,draws=10000,div=.1,xrange=c(-10,30),graphit=F)
{
sumpart=dgammaPlusdexp(z,mean,sd,lambda)
diffpart=dgammaMinusdexp(z,mean,sd,lambda)
result=0.5*sumpart+0.5*diffpart
if(graphit)
{
r=mean/(sd^2)
a=mean*r
growth=rgamma(draws,shape=a,rate=r)
error=rsymexp(draws,center=0,rate=lambda)
obs=growth+error
minx=min(obs)-div
maxx=max(obs)+div
x=seq(minx,maxx,by=div)
hist(obs,breaks=x,xlim=xrange)
lines(z,draws*div*result)
}
return(result)
}
#
#
#
#
#
#
# dgammaMinusdexp
#
#
# The PDF of the difference between a gamma and a negative exponential distribution. The shape and rate of the gamma
# are a and r; mean and sd are the mean and sd of the gamma. Lambda is the rate of the exponential.
# This comes from the convolution of the two distributions, which is also a gamma, and the integral
# of the new gamma evaluated with pgamma. Note that lambda is the rate of the exponential.
#
#
#
#
#
#
#
#
dgammaMinusdexp=function(z,mean,sd,lambda,draws=10000,div=.01,xrange=c(0,25),xmax=4,graphit=F)
{
r=mean/(sd^2)
a=mean*r
part1=a*(log(r/(r+lambda)))+log(lambda)+lambda*z
part2=pgamma(z,shape=a,rate=r+lambda,lower.tail=FALSE,log=TRUE)
pdf.z=numeric()
pdf.z[z>0]=exp(part1+part2)[z>0]
pdf.z[z<=0]=exp(part1[z<=0])
if(graphit)
{
gamma=rgamma(draws,shape=a,rate=r)
expon=rexp(draws,rate=lambda)
net=gamma-expon
highvalue=max(max(z),max(gamma),max(expon),max(net))
lowvalue=min(min(z),min(gamma),min(expon),min(net))
x=seq(lowvalue-div,highvalue+div,by=div)
dev.set(2)
hist(gamma,breaks=x,xlim=xrange)
dev.set(3)
hist(expon,breaks=x,xlim=xrange)
dev.set(4)
hist(net,breaks=x,xlim=xrange)
}
# browser()
return(pdf.z)
}
#
#
#
#
#
#
#
# dgammaPlusdexp
#
#
# The PDF of the sum of gamma and negative exponential distribution. The shape and rate of the gamma
# are a and r; mean and sd are the mean and sd of the gamma. Lambda is the rate of the exponential.
# This is only an appoximation based on the observation that the resulting distribution is very
# close to a gamma. So I simply work out a new gamma whose mean is the sum of the means of
# the initial gamma and exponential, and likewise for the new variance.
# As long as gamma\'s rate > the exponential lambda, the distribution
# can be specified (using pgamma) as in dgammaMinusdexp. But if rate < lambda, this fails.
# The gamma approximation fails if the sd is sufficiently higher than the mean, and the mean
# is low. Then the gamma is absurdly skewed, and the shape of the sum is dominated by the exponential
# at low z, nothing like a true gamma. It appears to work reasonably well as long as the
# mean >= sd, or even if mean>=0.5*sd.
#
#
#
#
#
#
#
#
dgammaPlusdexp=function(z,mean,sd,lambda,draws=10000,div=.01,xrange=c(0,25),xmax=4,graphit=FALSE)
{
r=mean/(sd^2)
a=mean*r
pdf.z=numeric()
pdf.z[z<=0]=0
part2=numeric()
if(graphit)
{
gamma=rgamma(draws,shape=a,rate=r)
expon=rexp(draws,rate=lambda)
net=gamma+expon
highvalue=max(max(z),max(gamma),max(expon),max(net))
x=seq(0,highvalue+div,by=div)
dev.set(2)
hist(gamma,breaks=x,xlim=xrange)
dev.set(3)
hist(expon,breaks=x,xlim=xrange)
dev.set(4)
hist(net,breaks=x,xlim=xrange)
}
newmean=mean+1/lambda
newvar=sd^2+1/(lambda^2)
newshape=newmean^2/newvar
newrate=newmean/newvar
pdf.z[z>0]=dgamma(z[z>0],shape=newshape,rate=newrate)
if(graphit) lines(z,draws*div*pdf.z)
# browser()
return(pdf.z)
}
#
#
#
#
#
#
# dgamma.meansd
#
#
# Probability distribution of gamma, parameterized with mean and sd instead of shape and scale.
#
#
#
#
#
#
#
#
dgamma.meansd=function(x,mean,sd,log=FALSE)
{
k=(sd^2)/mean
s=mean/k
return(dgamma(x,shape=s,scale=k,log=log))
}
#
#
#
#
#
#
# rgamma.meansd
#
#
# Random draws of gamma, parameterized with mean and sd instead of shape and scale.
#
#
#
#
#
#
#
#
rgamma.meansd=function(n,mean,sd)
{
k=(sd^2)/mean
s=mean/k
return(rgamma(n=n,shape=s,scale=k))
}
#
#
#
#
#
#
# dgamma.mean
#
#
# A version of dgamma where the parameters are ordered so that the mean (mu = shape x scale) is the first argument.
# The second argument is x, the value at which dgammma is evaluated, and the third is scale. This is needed
# for use with mcmc1step, to do a metropolis step on the mean. Only mu can be a vector.
#
#
#
#
#
#
#
#
dgamma.mean=function(mu,x,k)
{
s=mu/k
if(k<=0 | x<=0) return(-Inf)
if(length(mu[mu<=0])>0) return(-Inf)
llike=dgamma(x,shape=s,scale=k,log=TRUE)
return(llike)
}
#
#
#
#
#
#
#
# dgamma.scale
#
#
# Like above, but with scale as the first parameter.
#
#
#
#
#
#
#
#
dgamma.scale=function(k,x,mu)
{
s=mu/k
return(dgamma(x,shape=s,scale=k))
}
#
#
#
#
#
#
#
# dpower
#
#
# A probability distribution defined by a power function. There is a dpareto in R, but quite different, with
# two parameters. In this, the exponent is the only parameter, and it must be < (-1); x must be >= 0.
#
#
#
#
#
#
#
#
dpower=function(x,beta,log=FALSE)
{
if(beta>=(-1)) return(rep(NA,length(x)))
xpos=x>=0
y=numeric()
y[xpos]=log(-beta-1)+beta*log(x[xpos]+1)
if(!log) y[xpos]=exp(y[xpos])
return(y)
}
#
#
#
#
#
#
#
# rpower
#
#
# Random draws based on the integral.
#
#
#
#
#
#
#
#
rpower=function(n,beta)
{
r=runif(n)
k=1/(beta+1)
return((1-r)^k-1)
}
#
#
#
#
#
#
# dasympower
#
#
# A bilateral power distribution, centered at center, decaying with exponent rate1 for positive x and rate2 for negative x. Both rate1 and rate2
# must be < (-1). See dpower, this is analogous to dasymexp for dpower. By R. Chisholm.
#
#
#
#
#
#
#
#
dasympower=function(x,center,rate1,rate2,log=FALSE)
{
logy=numeric()
right=x>=center
left=x
#
#
#
#
#
# rasympower
#
#
# Random draws from the bilateral power distribution, dasympower. By R. Chisholm.
#
#
#
#
#
#
#
#
rasympower <- function( n, rate1, rate2, c )
return (qasympower(runif(n),rate1,rate2,c))
#
#
#
#
# qasympower
#
#
# Quantiles from the bilateral power distribution, dasympower. By R. Chisholm.
#
#
#
#
#
#
#
#
qasympower <- function(y, rate1, rate2, c)
{
y=1-y
a=1/(1/(rate1-1)+1/(rate2-1))
return (ifelse( y < a/(rate1-1),
1+c-((rate1-1)*y/a)^(1/(1-rate1)),
((a/(rate1-1)+a/(rate2-1)-y)*(rate2-1)/a)^(1/(1-rate2))+c-1 ))
}
#
#
#
#
# dsymexp
#
#
# Probability distribution for a folded, symmetrical exponential. When x>=center,
# it's just a standard exponential. When x
#
#
#
#
#
# psymexp
#
#
# The CDF for the symmetric exponential.
#
#
#
#
#
#
#
#
psymexp=function(x,center,rate)
{
y=numeric()
right=x>=center
left=x
#
#
#
#
#
#
# rsymexp
#
#
# Drawing a random variate on the symmetric exponential, based on the cumulative
# probability, as given in psymexp. A random uniform number on (0,1) is plugged in
# the inverse of the cumulative distribution.
#
#
#
#
#
#
#
#
rsymexp=function(n,center,rate)
{
y=numeric()
r=runif(n)
left=r<0.5
right=r>=0.5
y[left]=center+log(2*r[left])/rate
y[right]=center+-log(2*(1-r[right]))/rate
return(y)
}
#
#
#
#
#
#
#
# dasymexp
#
#
# Probability distributions for a folded but asymmetrical exponential.
# When x>=center, it's a standard exponential. When x
center has integral rate2/(rate1+rate2),
# and the section x
#
#
#
# qasymexp
#
#
# Quantiles of dasymexp
#
#
# y is the vector of desired quantiles; c is the center parameter; rate1 is the rate for the right half, and rate2 the left.
#
#
#
#
#
qasymexp=function(y,rate1,rate2,c)
{
lower=y
#
#
#
#
# dasymexp
#
#
# Probability distributions for an asymmetrical Gaussian, that is with different standard deviations
# above and below the mode, or center. The mode is not the mean, though. The SD on the right is sigma1,
# and on the left, sigma2.
#
#
#
#
#
#
#
#
dasymnorm=function(x,center,sigma1,sigma2,log=FALSE)
{
y=numeric()
right=x>=center
left=x
#
#
#
#
# minum.normal
#
#
# The likelihood function for use by fitnorm.
#
#
#
#
#
#
#
#
minum.normal=function(param,x,y)
{
m=param[1]
s=param[2]
k=param[3]
SD=param[4]
if(SD<=0 | s<=0) return(-Inf)
pred=k*dnorm(x,mean=m,sd=s)
llike=dnorm(y,mean=pred,sd=SD,log=TRUE)
return(sum(llike))
}
#
#
#
#
#
#
#
# fitnorm
#
#
# Fitting a normal distribution to data
# Parameters are a mean and sd of the normal being fitted, a scaling parameter k
# and the last SD is for the likelihood of the deviations.
#
#
# y is vector data to be fitted, at points x
#
#
#
#
#
fitnorm=function(start.param=c(-1,-1,1),x,y)
{
# y=y/sum(y)
if(start.param[1]<0)
{
mstart=sum(x*y)
varstart=sum(y*(x-mstart)^2)
start.param[1]=mstart
start.param[2]=sqrt(varstart)
}
fit=optim(start.param,minum.normal,x=x,y=y,control=list(fnscale=-1))
return(fit$par)
}
#
#
#
#
#
#
#
# fit.pdf
#
#
# Fit a random variable x to any submitted probability distribution. The number of start parameters
# must match what the pdf needs.
#
#
#
#
#
#
#
#
fit.pdf=function(start.param=c(1,1),data,pdf=dnorm,xrange=c(0,1),badpar=default.badpar)
{
if(length(start.param)==1)
{
optim.pdf = function(param,x,func,bad)
{
if(bad(par)) return(-Inf)
return(sum(pdf(x,param,log=TRUE)))
}
fit=optimize(f=optim.pdf,x=data,interval=xrange,bad=badpar,maximum=TRUE)
return(fit$maximum)
}
if(length(start.param)==2)
{
optim.pdf = function(param,x,func,bad)
{
if(bad(param)) return(-Inf)
return(sum(pdf(x,param[1],param[2],log=TRUE)))
}
fit=optim(par=start.param,fn=optim.pdf,x=data,func=pdf,bad=badpar,control=list(fnscale=-1))
return(fit$par)
}
return('start.param must be length 1 or 2')
}
#
#
#
#
#
#
#
# default.badpar
#
#
# None given.
#
#
#
#
#
#
#
#
default.badpar=function(param)
return(FALSE)
#
#
#
#
#
#
#
# bad.paretopar
#
#
# Test whether parameters for the Pareto distribution are acceptable.
#
#
#
#
#
#
#
#
bad.paretopar=function(param)
{
if(param[2]<=1) return(TRUE)
return(FALSE)
}
#
#
#
#
#
#
#
# normalproduct
#
#
# A function which returns the product of 2 normal distributions, the first
# at x (a vector), the second at lag-x (lag is a scalar). The mean and SD
# of the second normal are linear functions of x, with meanint being the
# intercept, meanslope the slope, and CV the coefficient of variation.
# A convolution!
#
#
#
#
#
#
#
#
normalproduct=function(x,lag,mean1,sd1,meanint,meanslope,CV)
{
mean2=meanint+meanslope*x
sd2=CV*mean2
n1=dnorm(x,mean1,sd1)
n2=dnorm(lag-x,mean2,sd2)
return(n1*n2)
}
#
#
#
#
#
#
#
# dbeta.reparam
#
#
# This reparameterizes the beta distribution as a function of its mean and
# standard deviation. The mean must be between 0 and 1, and sd>0.
#
#
#
#
#
#
#
#
dbeta.reparam=function(x,mu,sd)
{
a=(1-mu)*mu^2/(sd^2)
b=(a/mu)-a
return(dbeta(x,shape1=a,shape2=b))
}
#
#
#
#
#
#
#
# betaproduct
#
#
# This is equivalent to the normal product above.
#
#
#
#
#
#
#
#
betaproduct=function(x,lag,mean1,sd1,meanint,meanslope,CV)
{
mean2=meanint+meanslope*x
sd2=CV*mean2
p1=dbeta.reparam(x,mean1,sd1)
p2=dbeta.reparam(lag-x,mean2,sd2)
return(p1*p2)
}
#
#
#
#
#
#
#
# beta.normalized
#
#
# Normalzing beta.total. No longer used.
#
#
#
#
#
#
#
#
beta.normalized=function(x,par1,xmin,par2,xmax)
{
intvalue=try(integrate(beta.total,lower=xmin,upper=xmax,
par1=par1,xmin=xmin,par2=par2,xmax=xmax)$value)
if(!is.null(attributes(intvalue))) intvalue=0
integral.beta=intvalue
y=rep(0,length(x))
if(integral.beta==0) return(y)
inrange=x>=xmin & x<=xmax
y[inrange]=beta.total(x[inrange],par1,xmin,par2,xmax)/integral.beta
return(y)
}
#
#
#
#
#
#
#
# beta.total
#
#
# A beta distribution on the interval xmin to xmax, instead of 0 to 1. No longer used.
#
#
#
#
#
#
#
#
beta.total=function(x,par1,xmin,par2,xmax)
{
y=(x-xmin)^par1*(xmax-x)^par2
if(length(y[is.infinite(y)])>0) return(rep(0,length(y)))
return(y)
}
#
#
#
#
#
#
#
# fit.beta.normal
#
#
# Finding a normal distribution which most closely fits a given beta distribution.
# Parameters for a beta function are submitted, and the best fit mean and SD of a
# normal distribution returned.
#
#
#
#
#
#
#
#
fit.beta.normal=function(x,par1,xmin,par2,xmax)
{
y=beta.normalized(x,par1,xmin,par2,xmax)
plot(x,y,type="l")
fit=optim(c((xmax-xmin)/2,(xmax-xmin)/2),minum.beta.normal,x=x,y=y)
pred=dnorm(x,mean=fit$par[1],sd=fit$par[2])
lines(x,pred,col="red")
return(fit$par)
}
#
#
#
#
#
#
#
# minum.beta.normal
#
#
# Function to be minimized for fitting normal to beta.
#
#
#
#
#
#
#
#
minum.beta.normal=function(param,x,y)
{
pred=dnorm(x,mean=param[1],sd=param[2])
dev=(y-pred)
return(sum(dev^2))
}
#
#
#
#
#
#
#
# dbinomrev
#
#
# A version of dbinom in which parameters are submitted in a different order.
#
#
#
#
#
#
#
#
dbinomrev=function(trial,p,success)
return(dbinom(x=success,size=trial,prob=p))
#
#
#
#
#
#
#
# dnormrev
#
#
# This reverses the order of parameters to dnorm, so that outer can be used
# with a vector of x, and two vectors for mean and sd (the latter two equal in
# length).
#
#
#
#
#
#
#
#
dnormrev=function(m,x,s,log=F) return(dnorm(x,mean=m,sd=s,log=log))
#
#
#
#
#
#
#
# dpois.rearrange
#
#
# This rearranges dpois so that it works on a single vector, with the first
# element being x and the remaining all being used as lambdas.
#
#
#
#
#
#
#
#
dpois.rearrange=function(v,log=F)
return(dpois(v[1],lambda=v[-1],log=log))
#
#
#
#
#
#
#
# logit
#
#
# Logit transformation for a probability >0 and < 1
#
#
#
#
#
#
#
#
logit=function(p)
{
result=rep(NA,length(p))
inc=(p>0 & p<1)
result[inc]=log(p[inc]/(1-p[inc]))
return(result)
}
#
#
#
#
#
#
#
# invlogit
#
#
# Inverse logit transformation, turns a logit back into a probability.
#
#
#
#
#
#
#
#
invlogit=function(x) return(exp(x)/(1+exp(x)))
#
#
#
#
#
#
#
# pweibull.3param
#
#
# CDF of three-parameter Weibull
# (http://www.itl.nist.gov/div898/handbook/apr/section1/apr162.htm)
#
#
#
#
#
#
#
#
pweibull.3param=function(x,x0,shape,scale)
{
z=(x-x0)/scale
y=1-exp(-z^shape)
y[x<=x0]=0
return(y)
}
#
#
#
#
#
#
#
# dweibull.3param
#
#
# PDF of three-parameter Weibull
#
#
#
#
#
#
#
#
dweibull.3param=function(x,x0,shape,scale)
{
z=(x-x0)/scale
y=(shape/(x-x0))*(z^shape)*exp(-z^shape)
y[x<=x0]=0
return(y)
}
#
#
#
#
#
#
#
# weibull.median.3param
#
#
# Median of three-parameter Weibull
#
#
#
#
#
#
#
#
weibull.median.3param=function(x0,shape,scale)
{
return(scale*(log(2))^(1/shape)+x0)
}
#
#
#
#
#
#
#
# weibull.mean.3param
#
#
# Mean of three-parameter Weibull
#
#
#
#
#
#
#
#
weibull.mean.3param=function(x0,shape,scale)
{
return(scale*gamma(1+1/shape)+x0)
}
#
#
#
#
#
#
#
# weibull.sd.3param
#
#
# SD of three-parameter Weibull
#
#
#
#
#
#
#
#
weibull.sd.3param=function(x0,shape,scale)
{
return(sqrt(scale*scale*gamma(1+2/shape)-(scale*gamma(1+1/shape))^2))
}
#
#
#
#
#
#
#
# dexp.sin
#
#
# Four-parameter exponential sin, as a probability distribution
#
#
#
#
#
#
#
#
dexp.sin=function(x,lo,hi,skew,tail)
{
integral=integrate(exponential.sin,lower=lo,upper=lo+hi,asymp=1,b=lo,c=hi,d=skew,e=tail)$val
return(exponential.sin(x,asymp=1,b=lo,c=hi,d=skew,e=tail)/integral)
}
#
#
#
#
#
#
#
# exponential.sin
#
#
# Five-parameter exponential sin
#
#
#
#
#
#
#
#
exponential.sin=function(x,asymp,b,c,d,e)
{
z=pi*((x-b)/c)^d
y=asymp*((sin(z))^e)
y[xb+c]=0
return(y)
}
#
#
#
#
#
#
#
# pexp.sin
#
#
# CDF of four-parameter exponential sin
#
#
#
#
#
#
#
#
pexp.sin=function(x,b,c,d,e)
{
return(cumsum(dexp.sin(x,b,c,d,e))/sum(dexp.sin(x,b,c,d,e)))
}
#
#
#
#
#
#
#
#
# mvrnormRC
#
#
# Function that takes a variance-covariance matrix and produces normal variates
# following it, but with means 0. The R function mvrnorm does this too; this was a
# test of the algorithm from Tommaso Zillio. Sigma must be square. N is the number
# to draw.
#
#
#
#
#
#
#
#
mvrnormRC=function(N,Sigma)
{
dimension=dim(Sigma)[1]
SVD=svd(Sigma)
M = SVD$u %*% diag(sqrt(SVD$d))
norm=x=matrix(nrow=N,ncol=dimension)
for(i in 1:N) norm[i,]=rnorm(dimension)
for(i in 1:N) x[i,] = t(M %*% norm[i,])
return(x)
}
#
#
#
#
#
#
#
# dmixnorm
#
#
# Mixed normal distribution. The parameter f is the probability
# of following the first, with mean1 and sd1; 1-f is the probability
# for the second normal
#
#
#
#
#
#
#
#
dmixnorm=function(x,mean1,mean2,sd1,sd2,f,log=FALSE)
{
y1=dnorm(x,mean=mean1,sd=sd1)
y2=dnorm(x,mean=mean2,sd=sd2)
result=f*y1+(1-f)*y2
if(log) return(log(result))
else return(result)
}
#
#
#
#
#
#
#
# rmixnorm
#
#
# Random draw on the mixed normal distribution.
#
#
#
#
#
#
#
#
rmixnorm=function(n,mean1,mean2,sd1,sd2,f)
{
r=runif(n)
part1=which(r>f)
part2=which(r<=f)
y1=rnorm(length(part1),mean=mean1,sd=sd1)
y2=rnorm(length(part2),mean=mean2,sd=sd2)
return(c(y1,y2))
}
#
#
#
#
#
#
#
# minum.mixnorm
#
#
# Fit a mixture of 2 normals.
#
#
#
#
#
#
#
#
minum.mixnorm=function(param,data)
{
mean1=param[1]
mean2=param[2]
sd1=param[3]
sd2=param[4]
f=param[5]
llike=dmixnorm(data,mean1=mean1,mean2=mean2,sd1=sd1,sd2=sd2,f=f,log=TRUE)
return(sum(llike))
}
#
#
#
#
#
#
#
# logistic.inter
#
#
# Logistic function with intercept parameterization (ie, first parameter is y when all x=0). The input x are all independent variables, in a matrix
# with each column one of the variables. The number of rows is the number of datapoints. Just one inter, which is the value
# at all x=0, and passed as param[1]. Slope parameters follow, one per column of x.
# This is identical to standard
#
#
#
#
#
#
#
#
logistic.inter=function(x,param,log=FALSE,...)
{
x=as.matrix(x)
nopredictor=dim(x)[2]
inter=param[1]
if(inter<=0 | inter>=1) return(rep(NA,dim(x)[1]))
slope=param[2:(nopredictor+1)]
asymp=IfElse(length(param)>nopredictor+1,param[nopredictor+2],1)
basement=IfElse(length(param)>nopredictor+2,param[nopredictor+3],0)
X=x%*%slope
d=(1-inter)/inter
y=exp(X)/(d+exp(X))
result=y*(asymp-basement)+basement
infinite.pos=which(is.infinite(exp(X)))
result[infinite.pos]=asymp
if(log) return(log(result))
return(result)
}
#
#
#
#
#
#
# logistic.standard
#
#
# This is standard logistic function, but with asymptote and basement allowed. The latter are only implemented
# if extra parameters are passed. Moved from calc.surviv.r on 25 July 2010 to provide the standard logistic.
#
#
#
#
#
#
#
#
logistic.standard=function(x,param,log=FALSE,...)
{
x=as.matrix(x)
nopredictor=dim(x)[2]
a=param[1]
b=param[2:(nopredictor+1)]
asymp=IfElse(length(param)>nopredictor+1,param[nopredictor+2],1)
basement=IfElse(length(param)>nopredictor+2,param[nopredictor+3],0)
X=x%*%b
pwr=a+X
y=invlogit(pwr)
prob=y*(asymp-basement)+basement
infinite.pos=which(is.infinite(exp(pwr)))
prob[infinite.pos]=basement+asymp
if(log) return(log(prob))
return(prob)
}
#
#
#
# #
#
# logistic.power
#
#
# This is the Gaussian logistic function, where logit is a second-order polynomial of x; with asymptote and basement allowed.
# There must be 1+2*nopredictors parameters; the asympotote and basement are only implemented
# if extra parameters are passed.
#
#
#
#
#
#
#
#
logistic.power=function(x,param,log=FALSE,...)
{
x=as.matrix(x)
nopredictor=dim(x)[2]
a=param[1]
slopeindex=1:(2*nopredictor+1)
b=param[slopeindex][!is.odd(slopeindex)]
b2=param[slopeindex][is.odd(slopeindex)][-1]
asymp=IfElse(length(param)>2*nopredictor+1,param[2*nopredictor+2],1)
basement=IfElse(length(param)>2*nopredictor+2,param[2*nopredictor+3],0)
X=x%*%b
X2=(x^2)%*%b2
pwr=a+X+X2
y=invlogit(pwr)
prob=y*(asymp-basement)+basement
infinite.pos=which(is.infinite(exp(pwr)))
# prob[infinite.pos]=basement+asymp
prob[infinite.pos]=NA
if(log) return(log(prob))
return(prob)
}
#
#
#
#
#
# logistic.power.mode
#
#
# This is the Gaussian logistic function, where logit is a second-order polynomial of x, but with third parameter the position
# of the critical point (peak or trough). Given 3 parameters for standard logistic.power, the mode is at -param[2]/2*param[3]).
# Asymptote and basement are allowed. There must be 1+2*nopredictors parameters; the asympotote and basement are only implemented
# if extra parameters are passed.
#
#
#
#
#
#
#
#
logistic.power.mode=function(x,param,log=FALSE,...)
{
x=as.matrix(x)
nopredictor=dim(x)[2]
a=param[1]
slopeindex=1:(2*nopredictor+1)
b=param[slopeindex][!is.odd(slopeindex)]
b2=param[slopeindex][is.odd(slopeindex)][-1]
b2=(-1)*b/(2*b2)
param[slopeindex][is.odd(slopeindex)][-1]=b2
prob=logistic.power(x=x,param=param,log=log)
return(prob)
}
#
#
#
#
#
# logistic.power_simple
#
#
# This is a mixture of logistic and logistic-standard models. The predictors n get a power model, the remaining a simple
# model. So if nopredictors==8, and n=c(1,7), then the first and seventh predictors use a power model, while the rest a simple model.
# There must be 1+length(n)+nopredictors parameters, plus additional 1 or 2 for asymptote and basement.
#
#
#
#
#
#
#
#
logistic.power_simple=function(x,param,log=FALSE,N=1,...)
{
x=as.matrix(x)
nopredictor=dim(x)[2]
noparam=length(param)
n=N
non=length(n)
if(non==nopredictor) return(logistic.power(x=x,param=param,log=log))
x1=as.matrix(x[,n])
x2=as.matrix(x[,-n])
a=param[1]
slopes=param[-1]
b=b2=standardparam=numeric()
counter=1
for(j in 1:nopredictor)
{
if(j %in% n)
{
b=c(b,slopes[counter])
b2=c(b2,slopes[counter+1])
counter=counter+2
}
else
{
standardparam=c(standardparam,slopes[counter])
counter=counter+1
}
}
asymp=IfElse(length(param)>non+nopredictor+1,param[noparam-1],1)
basement=IfElse(length(param)>non+nopredictor+2,param[noparam-2],0)
# browser()
X=x1%*%b
X2=x1^2%*%b2
Xstan=x2%*%standardparam
pwr=a+X+X2+Xstan
y=exp(pwr)/(exp(pwr)+1)
prob=y*(asymp-basement)+basement
infinite.pos=which(is.infinite(exp(pwr)))
# prob[infinite.pos]=basement+asymp
prob[infinite.pos]=NA
if(log) return(log(prob))
return(prob)
}
#
#
#
#
# logistic.ctr
#
#
# This is logistic function with intercept parameterization (see logistic above), but with centering on x allowed.
# If center==NA, then the x values are centered on their median.
# Or center can be a number. If NULL, no centering is done.
# Moved from calc.surviv.r on 25 July 2010 to provide the standard logistic.
#
#
#
#
#
#
#
#
logistic.ctr=function(x,param,log=FALSE,center=NA)
{
x=as.matrix(x)
nopredictor=dim(x)[2]
# inter=param[1]
# if(inter<=0 | inter>=1) return(rep(NA,dim(x)[1]))
slope=param[2:(nopredictor+1)]
asymp=IfElse(length(param)>nopredictor+1,param[nopredictor+2],1)
basement=IfElse(length(param)>nopredictor+2,param[nopredictor+3],0)
if(is.null(center)) x=x
else if(is.na(center)) x=sweep(x,2,colMedians(x),'-')
else x=sweep(x,2,center,'-')
X=x%*%slope
d=(1-inter)/inter
y=exp(X)/(d+exp(X))
result=y*(asymp-basement)+basement
infinite.pos=which(is.infinite(exp(X)))
result[infinite.pos]=asymp
if(log) return(log(result))
return(result)
}
#
#
#
#
#
#
#
# logistic.multiplicative
#
#
# Logistic with a pair of parameters for each x; y=product of all the logistics. First set of parameters are intercepts, then
# an equal number of slopes. If there are additional parameters, they are asymptote and basement.
#
#
#
#
#
#
#
#
logistic.multiplicative=function(x,param,log=FALSE)
{
x=as.matrix(x)
nopredictor=dim(x)[2]
noparam=length(param)
inter=param[1:nopredictor]
slope=param[(nopredictor+1):(2*nopredictor)]
# if(length(which(inter<=0 | inter>=1))>0) return(rep(NA,dim(x)[1]))
asymp=IfElse(length(param)>2*nopredictor,param[2*nopredictor+1],1)
basement=IfElse(length(param)>2*nopredictor+1,param[2*nopredictor+2],0)
result=rep(1,dim(x)[1])
for(i in 1:nopredictor)
{
paramset=param[c(i,i+nopredictor)]
result=result*logistic.standard(x[,i],paramset)
}
result=basement+asymp*result
if(log) return(log(result))
return(result)
}
#
#
#
#
#
#
#
# constant
#
#
# A function to return a constant at all predictors x. The predictors are a numeric vector, or a matrix of
# many predictors (each column a single predictor). This function is useful in modeling, where the name of a function
# is passed; this allows modeling where a response is a constant across all values of x.
#
#
#
#
#
#
#
#
constant=function(x,param)
{
if(is.null(dim(x))) return(rep(param,length(x)))
return(rep(param,dim(x)[1]))
}
#
#
#
#
#
#
#
# gen.logistic
#
#
# A general logistic function, similar to SSlogis. The various functions logistic above should subsume this.
#
#
#
#
#
#
#
#
gen.logistic=function(x,param)
{
a=param[1]
b=param[2]
c=param[3]
if(length(param)>3) d=param[4]
else d=0
expon=exp(c*x-b)
logis=expon/(1+expon)
return(a*logis+d)
}
#
#
#
#
#
#
# center.predictors
#
#
# Transform all data by subtracting a constant, either the mean, median value, or a submitted constant.
# The input may be a vector or a matrix. If a matrix, each column is centered on its mean (median), or by passing a
# vector of constants. Note that setting by=0 amounts to no change.
#
#
#
#
#
# center.predictors(x=c(1,4,7,0),by='median')
# center.predictors(x=c(1,4,7,0),by=2)
#
#
center.predictors=function(x,by='mean')
{
error='Type must be mean, median, or numbers to be subtracted\n'
if(is.null(dim(x)))
{
if(by[1]=='mean') return(x-mean(x,na.rm=TRUE))
if(by[1]=='median') return(x-median(x,na.rm=TRUE))
if(is.numeric(by)) return(x-by)
return(error)
}
if(by[1]=='mean') return(sweep(x,2,colMeans(x)))
if(by[1]=='median') return(sweep(x,2,colMedians(x)))
if(is.numeric(by))
{
k=by
for(i in 2:dim(x)[1]) k=rbind(k,by)
return(x-k)
}
return(error)
}
#
#
#
#
#
#
#
# fit.logistic
#
#
# A function to fit a set of data y, observed at the vector x, to a generalized
# logistic function, using least squares.
#
#
#
#
#
#
#
#
fit.logistic=function(x,y,start.param,tech="Nelder-Mead")
{
fit=optim(start.param,logistic.sum.squares,x=x,obs=y,method=tech,control=list(maxit=50000))
return(fit)
}
#
#
#
#
#
#
#
# logistic.sum.squares
#
#
# Sets a prediction based on a generalized logistic, then returns the sum
# of squared deviations
#
#
#
#
#
#
#
#
logistic.sum.squares=function(param,x,obs)
{
pred=gen.logistic(x,param)
if(length(which(pred<1e-5))>0) pred=rep(0,length(x))
return(sum((obs-pred)^2))
}
#
#
#
#
#
#
# asymp.ht
#
#
# The function from Sean Thomas which produces an asymptote for y as a function of x.
# Original version: y=ymax*(1-exp(-a*x^b))
# This is the centered version, with x normalized by dividing by parameter k, which is the x value at which
# y is half ymax. This eliminates correlation between the a and b parameters in the above version, but
# not the correlation between parameters 1 and 2.
#
#
#
#
#
#
#
#
asymp.ht=function(x,param,...)
{
ymax=param[1]
k=param[2]
b=param[3]
xcent=x/k
return(ymax*(1-2^(-xcent^b)))
}
#
#
#
#
#
#
# asymp.ht.fixmax
#
#
# Same formulation, but the asymptote is fixed, so only two parameters fitted.
#
#
#
#
#
#
#
#
asymp.ht.fixmax=function(x,param,asymp)
{
ymax=asymp
k=param[1]
b=param[2]
xcent=x/k
return(ymax*(1-2^(-xcent^b)))
}
#
#
#
#
#
#
#
# exp.2par
#
#
# An exponential distribution with an asymptote.
#
#
#
#
#
#
#
#
exp.2par=function(x,param,asymp)
{
ymax=asymp
a=param[1]
rate=param[2]
return(ymax*(1-a*exp(-rate*x)))
}
#
#
#
#
#
#
#
# linear.model
#
#
# A simple linear model, where the first parameter is intercept, remaining parameters are slopes
#
#
#
#
#
#
#
#
linear.model=function(x,param)
{
x=as.matrix(x)
nopredictor=dim(x)[2]
a=param[1]
b=param[2:(nopredictor+1)]
return(a+x%*%b)
}
#
#
#
#
#
#
# simple.model
#
#
# A trivial model to return a different value at every x. The values are passed as a vector of the same length as x.
#
#
#
#
#
#
#
#
simple.model=function(x,param)
{
N=length(x)
if(length(param)!=N) return(rep(NA,N))
return(param)
}
#
#
#
#
# linear.model.ctr
#
#
# A simple linear model, where the first parameter is intercept, second the slope, and x can be centered on their median.
#
#
#
#
#
#
#
#
linear.model.ctr=function(x,param,xcenter=NULL)
{
if(is.null(xcenter)) x=x-median(x)
return(param[2]*x+param[1])
}
#
#
#
#
#
#
# expon.model
#
#
# Exponential model, y = a exp(b1*x1 + b2*x2) for any number of predictors x. Compare to linear.model.
#
#
#
#
#
#
#
#
expon.model=function(x,param)
{
x=as.matrix(x)
nopredictor=dim(x)[2]
a=param[1]
b=param[2:(nopredictor+1)]
expon=a+x%*%b
return(exp(expon))
}
#
#
#
#
#
# log.model
#
#
# Logarithmic model, y = a + b1 log(x1) + b2 log(x2) for any number of predictors x. Compare to linear.model. All x should be positive.
#
#
#
#
#
#
#
#
log.model=function(x,param)
{
x=as.matrix(x)
nopredictor=dim(x)[2]
a=param[1]
b=param[2:(nopredictor+1)]
y=a+log(x)%*%b
return(y)
}
#
#
#
#
#
# constant.linear
#
#
# A model which is constant for xlim. The first parameter is the slope,
# second the x value of break point, third the lower limit.
#
#
#
#
#
#
#
#
constant.linear=function(x,param,xcenter=NULL)
{
line=drp(parallel.line(0,m=param[1],param[2],param[3])[1,])
y=linear.model(x,line)
y[x
#
#
#
#
#
#
# linearmodel.bin
#
#
# Multiple bin model predicting y as a function of x.
# A model with B bins has B-1 parameters for breaks points, B parameters as slopes, and one intercept (y at x=0).
# Within each bin, y is a linear function of x.
# Predictor is centered at medv.
# This function accepts one set of parameters, separates the bin, slope, and intercept, and submits to the
# general version of the function.
#
#
#
#
#
#
#
#
linearmodel.bin=function(x,param,...)
{
extra=list(...)
if(is.null(extra$LINEARBINMEDIAN)) medv=0
else medv=extra$LINEARBINMEDIAN
noparam=length(param)
bins=(noparam)/2
if(is.null(medv)) medv=median(x)
if(bins==1) return(linear.model(x,param,medv))
b=param[1:(bins-1)]-medv
v=x-medv
N=length(b)
m=param[bins:(noparam-1)]
inter=param[noparam]
pred=linearmodel.bin.set(v=v,binparam=b,param=c(m,inter))
return(pred)
}
#
#
#
#
#
#
#
# linearmodel.bin.set
#
#
# This does the work of calculating predicted values at each independent variable, given bin and line parameters separately,
# the latter being slope and intercept parameters in one vector.
# Completely revised June 2011 to use geometry.r functions for lines.
# Create a list of lines, one for each bin, and an intersection (intercept), one for each bin:
# Bin 1: use parallel.line to find line[[1]] through b[1],inter (since inter is defined as y(b[1]) with slope[1]
# Bin 2: IBID for line[[2]], but using slope[2]
# Bin >=3: If there are more bins, find the intersection of line[[2]] with b[2], then parallel.line to get line[[3]] with slope[3] through that intersection
#
#
#
#
#
#
#
#
linearmodel.bin.set=function(v,param,binparam)
{
m=param[-length(param)]
inter=param[length(param)]
failed=rep(NA,length(v))
if((length(binparam)+1)!=length(m)) return(failed)
sorted=sort(binparam)
if(length(which(sorted!=binparam))>0) return(failed)
K=IfElse(diff(range(v))>diff(range(binparam)),diff(range(v)),diff(range(binparam)))
lower=IfElse(min(v)max(binparam),max(v)+K,max(binparam)+K)
b=c(lower,binparam,upper)
bins=length(m)
N=length(b)
pts=data.frame(x=b,y=rep(NA,N))
if(upper<0) ## True if every binparam and every v < 0, so every b is < 0
{
pts$y[N]=inter+m[bins]*upper
# browser()
for(i in (N-1):1) pts$y[i]=pts$y[i+1]-m[i]*(b[i+1]-b[i])
}
else if(lower>0) ## True if every binparam and every v > 0, so very first b > 0
{
pts$y[1]=inter+m[1]*lower
# browser()
for(i in 2:N) pts$y[i]=pts$y[i-1]+m[i-1]*(b[i]-b[i-1])
}
else ## True when the b or the v span 0
{
z=which(b>0)[1]
pts$y[z]=inter+m[z-1]*(b[z])
# browser()
for(i in (z-1):1) pts$y[i]=pts$y[i+1]-m[i]*(b[i+1]-b[i])
if(z=start & v<=end
thisline=pts.to.interceptslope(pts[i,],pts[i+1,])
pred[insection]=linear.model(v[insection],param=thisline)
}
return(pred)
}
#
#
#
#
#
#
#
# addBinParam
#
#
# Given parameters for a model with N linear bins, creates parameters for N+1 bins which produce the same model.
#
#
#
#
#
#
#
#
addBinParam=function(x,best,bin)
{
if(bin==1)
return(c(median(x),best[2],best[2],best[1]))
internal=best[1:(bin-1)]
slope=best[bin:(length(best)-1)]
intercept=best[length(best)]
div=c(min(x),internal,max(x))
widest=which.max(diff(div))
newbreak=0.5*diff(div[c(widest:(widest+1))])+div[widest]
newinternal=sort(c(internal,newbreak))
if(widest
#
#
#
# constant.bin
#
#
# A model like piecewise regression (linearmodel.bin), but y is a constant within each bin.
# With B bins, 2B-1 parameters are needed. First B-1 parameters are bin breaks. The remaining B
# parameters are the constant value of y in each bin.
#
#
#
#
#
#
#
#
constant.bin=function(x,param)
{
noparam=length(param)
if(noparam<3 | !is.odd(noparam)) return(rep(NA,length(x)))
bins=(noparam+1)/2
b=param[1:(bins-1)]
m=param[bins:noparam]
fullbreaks=c(min(x)-1,b,max(x)+1)
xcat=cut(x,breaks=fullbreaks,right=FALSE,labels=1:bins)
y=rep(m[1],length(x))
for(i in 2:bins) y[xcat==i]=m[i]
return(y)
}
#
#
#
#
#
#
#
# dpois.max
#
#
# A probability distribution which is simply a curtailed poisson: all probability above a maximum integer,
# maxx, is given to that maximum. For all x
#
#
#
#
#
#
#
dpois.max=function(x,lambda,maxx,log=FALSE)
{
result=standard=dpois(x,lambda=lambda)
below=which(xmaxx)
exact=which(x==maxx)
if(length(below)>0) result[below]=standard[below]
if(length(above)>0) result[above]=0
if(length(exact)>0) result[exact]=1-sum(dpois(0:(maxx-1),lambda=lambda))
if(log) return(log(result))
else return(result)
}
#
#
# dpois.trunc
#
#
# A zero-truncated Poisson distribution.
#
#
#
#
#
#
#
#
dpois.trunc=function(x,lambda,log=FALSE)
{
result=dpois(x,lambda=lambda)/(1-exp(-lambda))
zero=x==0
result[zero]=0
if(log) return(log(result))
else return(result)
}
#
#
#
#
#
# dpois.maxtrunc
#
#
# A zero-truncated Poisson distribution with a ceiling (combining dpois.max and dpois.trunc).
#
#
#
#
#
#
#
#
dpois.maxtrunc=function(x,lambda,maxx,log=FALSE)
{
result=dpois.max(x,lambda=lambda,maxx=maxx)/(1-exp(-lambda))
zero=x==0
result[zero]=0
if(log) return(log(result))
else return(result)
}
#
#
#
#
#
#
# rpois.max
#
#
# Random draws on dpois.max
#
#
#
#
#
#
#
#
rpois.max=function(N,lambda,maxx)
{
result=rpois(N,lambda=lambda)
result[result>maxx]=maxx
return(result)
}
#
#
#
#
#
#
#
# rpois.trunc
#
#
# Random draws on dpois.trunc. This is taken unchanged from an answer Peter Dalgaard posted to a list serve in 2005. I checked
# by comparing to dpois.trunc and it was spot on.
#
#
#
#
#
#
#
#
rpois.trunc=function(N,lambda)
{
return(qpois(runif(N, dpois(0, lambda), 1), lambda))
}
#
#
#
#
#
#
#
# asymptote.exp
#
#
# A 3-parameter function which asymptotes as x->infinity. The 3rd param must be >=0 and x>=0. The asymptote is a, the intercept a-b.
#
#
#
#
#
#
#
#
asymptote.exp=function(x,param)
{
a=param[1]
b=param[2]
k=param[3]
if(k<0) return(rep(NA,length(x)))
y=a-b*exp(-k*x)
y[x<0]=NA
return(y)
}
#
#
#
#
#
#
#
# graph.mvnorm
#
#
# Graphs contours for an mvnorm, with parameters submitted as a
# vector, as described above, for a single 2D Gaussian.
# The probability has to be calculated on a grid so contours can be drawn.
# The argument exclude allows parts to be set to zero.
#
#
#
#
#
#
#
#
graph.mvnorm=function(param,x,y,div=10,add=FALSE,clr="gray",lw=.5,pw=2,exclude=NULL,returnit=FALSE)
{
arranged=composeParam.GaussianMap(drop(as.matrix(param)),N=1)
pts=full.xygrid(x,y)
probdensity=dmvnorm(pts,mean=arranged$center,sigma=arranged$sigma[[1]])
if(!is.null(exclude)) probdensity[exclude]=0
breaks=seq(0,max(probdensity),len=div)
contour.quaddata(probdensity,x=x,y=y,breaks=breaks,add=add,clr=clr,lwidth=lw,w=pw)
if(returnit) return(probdensity)
}
#
#
#
#
#
#
# pospower
#
#
# Raise to any power, but with negative numbers converted to positive first, then reverted afterward. It thus follows what is normal behavior when
# the exponent were a negative integer, but works also for even integers or any real exponent. For example, pospower(-4,0.5)=-2.
#
#
#
#
# pospower(-4,0.5)
#
#
pospower=function(x,expon)
{
result=sign(x)*(abs(x)^(expon))
return(result)
}
#
#
#
#