[R] tricks

November 1, 2009

Welsh test by permutations

Filed under: Algorithm, Test — Tags: — Timothée Poisot @ 3:26 pm
WelshPerm <- function(response,variable,nperm=999,...){

base <- oneway.test(response~variable,...)

base.p <- base$p.value
base.W 	<- base$statistic

count <- 1

# Permutation loop
for(i in 1:nperm){

	SAMPLE <- sample(response)

	welsh.perm<-oneway.test(SAMPLE~variable,...)
	welsh.perm.p<-welsh.perm$p.value
	welsh.perm.W<-welsh.perm$statistic 	if(abs(welsh.perm.W) >= abs(base.W)) {count <- count+1}
}
result=count/(nperm+1)
return(result)
}

October 31, 2009

Sorting a matrix/data.frame on a column

Filed under: Algorithm — Tags: , , — Timothée Poisot @ 1:44 am
mat.sort <- function(mat,n) 
{
	mat[rank(mat[,n]),] <- mat[c(1:nrow(mat)),]
	return(mat)
}

a <- matrix(rnorm(100),ncol=10)
mat.sort(a,1)

October 18, 2009

GenEstim : A simple genetic algorithm for parameters estimation

Filed under: Algorithm, Estimation — Tags: , , — Timothée Poisot @ 2:43 pm

The GenEstim function presented here uses a very simple genetic algorithm to estimate parameters. The function returns the best estimated set of parameters ($estim), the AIC ($information) at each generation, and the cost of the best model ($bestcost) at each generation.

Results of running the program with a logistic function :

Rplot001.png
Logis = function(x,p)	p[[1]]/(1+p[[2]]*exp(-p[[3]]*x))
RSS = function(par)	sum((Logis(X,par)-Y)^2)
aic <- function(yvalues,rss,par)
{
	k <- length(par)
	n <- length(yvalues)
	aic <- 2*k+n*log(rss/n)
	return(aic)
}
P	<-	list(2,10,4)
X	<-	seq(from=-5,to=5,by=0.1)
Y	<-	Logis(X,P) + rnorm(length(X),sd=0.1)
plot(X,Y,pch=19,col='grey')
GenEstim	<- function(
		start.pars,
		cost = RSS,
		...,
		numiter = 1e3,
		npop = 1e2)
{
	bestcost <- NULL
	cur.AIC <- NULL
	for(it in 1:numiter)
	{
		pop <- matrix(0,ncol=length(start.pars),nrow=npop)
		for(p in 1:length(start.pars))
		{
			pop[,p] 	<- rnorm(npop,start.pars[[p]],sd=1)
			pop[1,p]	<- start.pars[[p]]
		}
		Costs <- NULL
		for(i in 1:nrow(pop))
		{
			li <- as.list(pop[i,])
			Costs[i] <- cost(li)
		}
		bestcost <- c(bestcost,min(Costs))
		best <-which.min(Costs)
		start.pars <- as.list(pop[best,])
		cur.AIC <- c(cur.AIC,aic(Y,RSS(start.pars),start.pars))
	}
	return(list(estim=start.pars,information=cur.AIC,convergence=bestcost))
}

simul <- GenEstim(list(0,0,0))
x.control <- seq(from=-6,to=6,by=0.1)
lines(x.control,Logis(x.control,simul$estim),col='orange',lwd=3)

May 3, 2009

An algorithm to find local extrema in a vector

Filed under: Algorithm — Tags: , — Timothée Poisot @ 6:46 pm

I spend some time looking for an algorithm to find local extrema in a vector (time series). The solution I used is to “walk” through the vector by step larger than 1, in order to retain only one value even when the values are very noisy (see the picture at the end of the post).

It goes like this :

findpeaks <- function(vec,bw=1,x.coo=c(1:length(vec)))
{
	pos.x.max <- NULL
	pos.y.max <- NULL
	pos.x.min <- NULL
	pos.y.min <- NULL 	for(i in 1:(length(vec)-1)) 	{ 		if((i+1+bw)>length(vec)){
                sup.stop <- length(vec)}else{sup.stop <- i+1+bw
                }
		if((i-bw)<1){inf.stop <- 1}else{inf.stop <- i-bw}
		subset.sup <- vec[(i+1):sup.stop]
		subset.inf <- vec[inf.stop:(i-1)]

		is.max   <- sum(subset.inf > vec[i]) == 0
		is.nomin <- sum(subset.sup > vec[i]) == 0

		no.max   <- sum(subset.inf > vec[i]) == length(subset.inf)
		no.nomin <- sum(subset.sup > vec[i]) == length(subset.sup)

		if(is.max & is.nomin){
			pos.x.max <- c(pos.x.max,x.coo[i])
			pos.y.max <- c(pos.y.max,vec[i])
		}
		if(no.max & no.nomin){
			pos.x.min <- c(pos.x.min,x.coo[i])
			pos.y.min <- c(pos.y.min,vec[i])
		}
	}
	return(list(pos.x.max,pos.y.max,pos.x.min,pos.y.min))
}

findpeaks

Create a free website or blog at WordPress.com.