[R] tricks

November 17, 2009

Quantitative link strength for APE cophyloplot

Filed under: Plot — Tags: , — Timothée Poisot @ 10:42 pm

Just add a third column with link strength to the association matrix

plotCophylo2 <- function (x, y, assoc = assoc, use.edge.length = use.edge.length,
space = space, length.line = length.line, gap = gap, type = type,
return = return, col = col, show.tip.label = show.tip.label,
font = font)
{
if(ncol(assoc)==2)
{
assoc <- cbind(assoc,rep(1,nrow(assoc)))
}
res <- list()
left <- max(nchar(x$tip.label, type = "width")) + length.line
right <- max(nchar(y$tip.label, type = "width")) + length.line
space.min <- left + right + gap * 2
if ((space <= 0) || (space < space.min))
space <- space.min
N.tip.x <- Ntip(x)
N.tip.y <- Ntip(y)
res$N.tip.x <- N.tip.x
res$N.tip.y <- N.tip.y
a <- plotPhyloCoor(x, use.edge.length = use.edge.length,
type = type)
res$a <- a
b <- plotPhyloCoor(y, use.edge.length = use.edge.length,
direction = "leftwards", type = type)
a[, 2] <- a[, 2] - min(a[, 2])
b[, 2] <- b[, 2] - min(b[, 2])
res$b <- b
b2 <- b
b2[, 1] <- b[1:nrow(b), 1] * (max(a[, 1])/max(b[, 1])) +
space + max(a[, 1])
b2[, 2] <- b[1:nrow(b), 2] * (max(a[, 2])/max(b[, 2]))
res$b2 <- b2
c <- matrix(ncol = 2, nrow = nrow(a) + nrow(b))
c[1:nrow(a), ] <- a[1:nrow(a), ]
c[nrow(a) + 1:nrow(b), 1] <- b2[, 1]
c[nrow(a) + 1:nrow(b), 2] <- b2[, 2]
res$c <- c
plot(c, type = "n", xlim = NULL, ylim = NULL, log = "", main = NULL,
sub = NULL, xlab = NULL, ylab = NULL, ann = FALSE, axes = FALSE,
frame.plot = FALSE)
if (type == "cladogram") {
for (i in 1:(nrow(a) - 1)) segments(a[x$edge[i, 1], 1],
a[x$edge[i, 1], 2], a[x$edge[i, 2], 1], a[x$edge[i,
2], 2])
for (i in 1:(nrow(b) - 1)) segments(b2[y$edge[i, 1],
1], b2[y$edge[i, 1], 2], b2[y$edge[i, 2], 1], b2[y$edge[i,
2], 2])
}
if (type == "phylogram") {
for (i in (N.tip.x + 1):nrow(a)) {
l <- length(x$edge[x$edge[, 1] == i, ][, 1])
for (j in 1:l) {
segments(
a[x$edge[x$edge[, 1] == i, ][1, 1], 1],
a[x$edge[x$edge[, 1] == i, 2], 2][1],
a[x$edge[x$edge[, 1] == i, ][1, 1], 1],
a[x$edge[x$edge[, 1] == i, 2], 2][j]
)
segments(
a[x$edge[x$edge[, 1] == i, ][1, 1], 1],
a[x$edge[x$edge[, 1] == i, 2], 2][j],
a[x$edge[x$edge[, 1] == i, 2], 1][j],
a[x$edge[x$edge[, 1] == i, 2], 2][j]
)
}
}
for (i in (N.tip.y + 1):nrow(b)) {
l <- length(y$edge[y$edge[, 1] == i, ][, 1])
for (j in 1:l) {
segments(b2[y$edge[y$edge[, 1] == i, ][1, 1],
1], b2[y$edge[y$edge[, 1] == i, 2], 2][1],
b2[y$edge[y$edge[, 1] == i, ][1, 1], 1], b2[y$edge[y$edge[,
1] == i, 2], 2][j])
segments(b2[y$edge[y$edge[, 1] == i, ][1, 1],
1], b2[y$edge[y$edge[, 1] == i, 2], 2][j],
b2[y$edge[y$edge[, 1] == i, 2], 1][j], b2[y$edge[y$edge[,
1] == i, 2], 2][j])
}
}
}
if (show.tip.label) {
text(a[1:N.tip.x, ], cex = 0, font = font, pos = 4, labels = x$tip.label)
text(b2[1:N.tip.y, ], cex = 1, font = font, pos = 2,
labels = y$tip.label)
}
lsa <- 1:N.tip.x
lsb <- 1:N.tip.y
decx <- array(nrow(assoc))
decy <- array(nrow(assoc))
for (i in 1:nrow(assoc)) {
if (show.tip.label) {
decx[i] <- strwidth(x$tip.label[lsa[x$tip.label ==
assoc[i, 1]]])
decy[i] <- strwidth(y$tip.label[lsb[y$tip.label ==
assoc[i, 2]]])
}
else {
decx[i] <- decy[i] <- 0
}
segments(a[lsa[x$tip.label == assoc[i, 1]], 1] + decx[i] +
gap, a[lsa[x$tip.label == assoc[i, 1]], 2], a[lsa[x$tip.label ==
assoc[i, 1]], 1] + gap + left, a[lsa[x$tip.label ==
assoc[i, 1]], 2], col = col , lwd = as.numeric(assoc[i,3]))
segments(b2[lsb[y$tip.label == assoc[i, 2]], 1] - (decy[i] +
gap), b2[lsb[y$tip.label == assoc[i, 2]], 2], b2[lsb[y$tip.label ==
assoc[i, 2]], 1] - (gap + right), b2[lsb[y$tip.label ==
assoc[i, 2]], 2], col = col , lwd = as.numeric(assoc[i,3]))
segments(a[lsa[x$tip.label == assoc[i, 1]], 1] + gap +
left, a[lsa[x$tip.label == assoc[i, 1]], 2], b2[lsb[y$tip.label ==
assoc[i, 2]], 1] - (gap + right), b2[lsb[y$tip.label ==
assoc[i, 2]], 2], col = col , lwd = as.numeric(assoc[i,3]))
}
if (return == TRUE)
return(res)
}

November 1, 2009

Welsh test by permutations

Filed under: Algorithm, Test — Tags: — Timothée Poisot @ 3:26 pm
WelshPerm <- function(response,variable,nperm=999,...){

base <- oneway.test(response~variable,...)

base.p <- base$p.value
base.W 	<- base$statistic

count <- 1

# Permutation loop
for(i in 1:nperm){

	SAMPLE <- sample(response)

	welsh.perm<-oneway.test(SAMPLE~variable,...)
	welsh.perm.p<-welsh.perm$p.value
	welsh.perm.W<-welsh.perm$statistic 	if(abs(welsh.perm.W) >= abs(base.W)) {count <- count+1}
}
result=count/(nperm+1)
return(result)
}

October 31, 2009

Sorting a matrix/data.frame on a column

Filed under: Algorithm — Tags: , , — Timothée Poisot @ 1:44 am
mat.sort <- function(mat,n) 
{
	mat[rank(mat[,n]),] <- mat[c(1:nrow(mat)),]
	return(mat)
}

a <- matrix(rnorm(100),ncol=10)
mat.sort(a,1)

October 30, 2009

Decimal log scale on a plot

Filed under: Plot — Tags: , — Timothée Poisot @ 8:30 pm

R only does natural (neperian) log scales by default, and this is lame.

Here is a simple code to do decimal log scale, pretty much a requirement for scientists…

The force(x/y)lim options works for natural and log scales (for the later case, you need to specify the power of 10 that you want : c(-2,2) fixes the limit from 0.01 to 100)

drawlogaxis <- function(side,range)
{
	par(tck=0.02)
#	d <- log(range,10)
	d <- range
	mlog <- floor(min(d))
	Mlog <- ceiling(max(d))
	SeqLog <- c(mlog:Mlog)
	Nlog <- (Mlog-mlog)+1
	axis(side,at=SeqLog,labels=10^SeqLog)
	ats <- log(seq(from=2,to=9,by=1),10)
	mod <- NULL
	for(i in SeqLog)
	{
		mod <- c(mod,rep(i,length(ats)))
	}
	ats <- rep(ats,Nlog)
	ats <- ats+mod
	par(tck=0.02/3)
	axis(side,at=ats,labels=NA)
}

logplot <- function(x,y,log='xy',...,forceylim=c(0,0),forcexlim=c(0,0))
{
	par(tck=0.02)
	xlg <- FALSE
	ylg <- FALSE
	if('x'%in%strsplit(log,'')[[1]]){x <- log(x,10);xlg=TRUE}
	if('y'%in%strsplit(log,'')[[1]]){y <- log(y,10);ylg=TRUE}
	yl <- ifelse(forceylim==c(0,0),range(y),forceylim)
	xl <- ifelse(forcexlim==c(0,0),range(x),forcexlim)
	plot(x,y,...,axes=FALSE,ylim=yl,xlim=xl)
	if(xlg){drawlogaxis(1,xl)}else{axis(1,at=pretty(xl),labels=pretty(xl))}
	if(ylg){drawlogaxis(2,yl)}else{axis(2,at=pretty(yl),labels=pretty(yl))}
	box()
}

addlog <- function(x,y,log='xy',...)
{
	xlg <- FALSE
	ylg <- FALSE
	if('x'%in%strsplit(log,'')[[1]]){x <- log(x,10);xlg=TRUE}
	if('y'%in%strsplit(log,'')[[1]]){y <- log(y,10);ylg=TRUE}
	points(x,y,...)

}

October 26, 2009

MultBar : Advanced multiple barplot with SEM

Filed under: Plot — Tags: — Timothée Poisot @ 8:40 pm

Producing this kind of graphs (below) in R can be a pain in the a*s.

Rplot001

Here is a simple code that requires that data are presented in lists (see the example below).

multbar <- function(list.of.lists,...,condnames=0,pal=colorRampPalette(c('grey','cornsilk')),seriesnames=0,legendpos='topleft',legh=TRUE,do.pty='s')
{
	par(pty=do.pty,mgp=c(1.9,0.8,0),oma=c(0,0,0,0),mar=c(4,3,2,1),bg='transparent',bty='o',tck=0.02,yaxs='i')
	NofList <- length(list.of.lists)
	NofSubList <- length(list.of.lists[[1]])
	if(condnames==0){condnames=c(1:NofList)}
	if(seriesnames==0){seriesnames=c(1:NofSubList)}
	dim.mat.treat <- (NofList+1)*NofSubList
	pos <- c(1:dim.mat.treat)
	pos <- matrix(pos,nrow=(NofList+1))
	nbreaks <- dim.mat.treat+1
	par(xaxs='i',yaxs='i',bty='o')
	plot(0,0,pch=NA,xlim=c(0,nbreaks),ylim=c(min(pretty(range(list.of.lists))),max(pretty(range(list.of.lists)))),xaxt='n',yaxt='n',...)
	calc.ylab <- pretty(range(list.of.lists))
	lab.ylab <- round(calc.ylab,2)
	abline(h=pretty(range(list.of.lists)),lty=3,col='grey',lwd=2)
	abline(h=0,col='darkgrey',lwd=1.4)
	palette <- pal(NofList)
	for(condition in 1:NofList)
	{
		for(treatment in 1:NofSubList)
		{
			CD <- list.of.lists[[condition]][[treatment]]
			CDm <- mean(CD)
			CDv <- sd(CD)
			x.coord <- pos[condition,treatment]
			rect(x.coord,0,x.coord+1,CDm,col=palette[condition])
			arrows(x.coord+0.5,CDm+CDv,x.coord+0.5,CDm-CDv,angle=90,code=3,length=0.05)
		}
	}
	axis(side=2,at=calc.ylab,labels=lab.ylab,tick=TRUE)
	#abline(h=calc.ylab[1])
	abline(h=pretty(range(list.of.lists))[1])
	fills <- c(1:NofList)
	legend(legendpos,legend=condnames,fill=palette[fills],bty='n',horiz=legh,cex=1)
	cl.tab <- (c(1:NofSubList)-1)*NofSubList + NofList/2 +1
	lab.cond <- NULL
	int.lab.cond <- pos[c(1:(nrow(pos)-1)),]
	for(COL in 1:ncol(int.lab.cond))
	{
		lab.cond[COL] <- mean(int.lab.cond[,COL])+0.5
	}
	axis(side=1,at=lab.cond,labels=seriesnames,tick=FALSE)
	par(xaxs='r')
	if(min(pretty(range(list.of.lists)))<0){abline(h=0,lwd=1.1)}
	box()
}

list.1 <- list(rnorm(10,mean=3.4),rnorm(10,mean=6.8))
list.2 <- list(rnorm(10,mean=4),rnorm(10,mean=8.8))
list.3 <- list(rnorm(10,mean=3.1),rnorm(10,mean=7.4))

data <- list(list.1,list.2,list.3)

png()
par(family='serif')
multbar(data,main='Faked data',seriesnames=c('Control','Stress'),condnames=c('Ind. 1','Ind. 2','Ind. 3'),xlab='Treatment',ylab='Response')
dev.off()

October 18, 2009

GenEstim : A simple genetic algorithm for parameters estimation

Filed under: Algorithm, Estimation — Tags: , , — Timothée Poisot @ 2:43 pm

The GenEstim function presented here uses a very simple genetic algorithm to estimate parameters. The function returns the best estimated set of parameters ($estim), the AIC ($information) at each generation, and the cost of the best model ($bestcost) at each generation.

Results of running the program with a logistic function :

Rplot001.png
Logis = function(x,p)	p[[1]]/(1+p[[2]]*exp(-p[[3]]*x))
RSS = function(par)	sum((Logis(X,par)-Y)^2)
aic <- function(yvalues,rss,par)
{
	k <- length(par)
	n <- length(yvalues)
	aic <- 2*k+n*log(rss/n)
	return(aic)
}
P	<-	list(2,10,4)
X	<-	seq(from=-5,to=5,by=0.1)
Y	<-	Logis(X,P) + rnorm(length(X),sd=0.1)
plot(X,Y,pch=19,col='grey')
GenEstim	<- function(
		start.pars,
		cost = RSS,
		...,
		numiter = 1e3,
		npop = 1e2)
{
	bestcost <- NULL
	cur.AIC <- NULL
	for(it in 1:numiter)
	{
		pop <- matrix(0,ncol=length(start.pars),nrow=npop)
		for(p in 1:length(start.pars))
		{
			pop[,p] 	<- rnorm(npop,start.pars[[p]],sd=1)
			pop[1,p]	<- start.pars[[p]]
		}
		Costs <- NULL
		for(i in 1:nrow(pop))
		{
			li <- as.list(pop[i,])
			Costs[i] <- cost(li)
		}
		bestcost <- c(bestcost,min(Costs))
		best <-which.min(Costs)
		start.pars <- as.list(pop[best,])
		cur.AIC <- c(cur.AIC,aic(Y,RSS(start.pars),start.pars))
	}
	return(list(estim=start.pars,information=cur.AIC,convergence=bestcost))
}

simul <- GenEstim(list(0,0,0))
x.control <- seq(from=-6,to=6,by=0.1)
lines(x.control,Logis(x.control,simul$estim),col='orange',lwd=3)

October 15, 2009

Filled contour with log-log scale

Filed under: Plot — Tags: , , , — Timothée Poisot @ 8:23 pm

A quick workaround to have a filled.contour plot with natural log10-log10 scale (instead of the default natural log scale)

plotmat <- function(mat,main='',factor='M',MeasuredResponse='Coexistence')
{
X <- as.numeric(rownames(mat))
Y <- as.numeric(colnames(mat))
if(factor=='C')
{
Y <- Y/0.16
}
rownames(mat) <- as.numeric(X)
colnames(mat) <- as.numeric(Y)
colorFun <- colorRampPalette(c("black","darkblue","blue","green",
"orange",'yellow',"red","darkred",'white'))

lX <- log(X, 10)
lY <- log(Y, 10)

pretty.X.at <- pretty(range(lX),n=6)
pretty.X.lab <- round(10^pretty.X.at,0)
pretty.Y.at <- pretty(lY,n=4)
pretty.Y.lab <- round(10^pretty.Y.at,2) pretty.Y.lab[pretty.Y.lab>1] <- round(pretty.Y.lab[pretty.Y.lab>1],0)
pretty.Y.lab[(pretty.Y.lab>0.1)&(pretty.Y.lab<1)] <- round(pretty.Y.lab[(pretty.Y.lab>0.1)&(pretty.Y.lab<1)],1)

filled.contour(lX,lY,mat,
axes=FALSE,
frame.plot=TRUE,
color=colorFun,
ylab= MeasuredResponse,
xlab='Time between perturbation events',
main=main,
key.title=title(main=""),
key.axes=axis(4,at=pretty(vmat)),
plot.axes={ axis(1,at=pretty.X.at,labels=pretty.X.lab)
axis(2,at=pretty.Y.at,labels=pretty.Y.lab) })
}

May 3, 2009

Colouring a 3D plot according to z-values

Filed under: Plot — Tags: , , — Timothée Poisot @ 11:29 pm

persp3dHere is a script that colors a 3D plot according to the z value (height) of each point.

persp.withcol <- function(x,y,z,pal,nb.col,...,xlg=TRUE,ylg=TRUE)
{
	colnames(z) <- y
	rownames(z) <- x

	nrz <- nrow(z)
	ncz <- ncol(z) 

	color <- pal(nb.col)
	zfacet <- z[-1, -1] + z[-1, -ncz] + z[-nrz, -1] + z[-nrz, -ncz]
	facetcol <- cut(zfacet, nb.col)
	par(xlog=xlg,ylog=ylg)
	persp(
		as.numeric(rownames(z)),
		as.numeric(colnames(z)),
		as.matrix(z),
		col=color[facetcol],
		...
		)
}

dd

An algorithm to find local extrema in a vector

Filed under: Algorithm — Tags: , — Timothée Poisot @ 6:46 pm

I spend some time looking for an algorithm to find local extrema in a vector (time series). The solution I used is to “walk” through the vector by step larger than 1, in order to retain only one value even when the values are very noisy (see the picture at the end of the post).

It goes like this :

findpeaks <- function(vec,bw=1,x.coo=c(1:length(vec)))
{
	pos.x.max <- NULL
	pos.y.max <- NULL
	pos.x.min <- NULL
	pos.y.min <- NULL 	for(i in 1:(length(vec)-1)) 	{ 		if((i+1+bw)>length(vec)){
                sup.stop <- length(vec)}else{sup.stop <- i+1+bw
                }
		if((i-bw)<1){inf.stop <- 1}else{inf.stop <- i-bw}
		subset.sup <- vec[(i+1):sup.stop]
		subset.inf <- vec[inf.stop:(i-1)]

		is.max   <- sum(subset.inf > vec[i]) == 0
		is.nomin <- sum(subset.sup > vec[i]) == 0

		no.max   <- sum(subset.inf > vec[i]) == length(subset.inf)
		no.nomin <- sum(subset.sup > vec[i]) == length(subset.sup)

		if(is.max & is.nomin){
			pos.x.max <- c(pos.x.max,x.coo[i])
			pos.y.max <- c(pos.y.max,vec[i])
		}
		if(no.max & no.nomin){
			pos.x.min <- c(pos.x.min,x.coo[i])
			pos.y.min <- c(pos.y.min,vec[i])
		}
	}
	return(list(pos.x.max,pos.y.max,pos.x.min,pos.y.min))
}

findpeaks

The Shocking Blue Green Theme Blog at WordPress.com.

Follow

Get every new post delivered to your Inbox.