#@linear regression several explanatory variables file <- url("http://www.trutschnig.net/geo_reg_d2.RData") load(file) A<-geo_reg_d2 head(A) model<-lm(data=A,y~x1+x2) model summary(model) #(cor(model$fitted.values,y))^2 residuals<-A$y-model$fitted.values R2<-1-var(residuals)/var(A$y) #systematic n<-100 R<-1000 E<-data.frame(a1=rep(0,R),a2=rep(0,R),b=rep(0,R)) for(i in 1:R){ x1<-runif(n,-3,3) x2<-runif(n,-5,5) error<-rnorm(n,0,1) y<-2*x1+5*x2-1+error A<-data.frame(x1=x1,x2=x2,y=y) model<-lm(data=A,y~x1+x2) E[i,3]<-as.numeric(coefficients(model))[1] E[i,1:2]<-as.numeric(coefficients(model))[2:3] } summary(E) library(rgl) open3d() plot3d(E$a1, E$a2, E$b, col="blue",xlab = "a1", ylab = "a2", zlab = "b",size=3) plot3d(2,5,-1,add=TRUE,col="red",size=10) #------------------------------------------------------------------------------------ #@multicollinearity n<-100 x1<-runif(n=n,-10,10) x2<-2*x1+runif(n,-0.3,0.3) y<-3*x1+x2+runif(n,-1,1) A<-data.frame(x1=x1,x2=x2,y=y) model<-lm(data=A,y~x1+x2) model cor(A$x1,A$x2) #------------------------------------------------------------------------------------ #@overfitting n<-101 x1<-runif(n,0,2) x2<-runif(n,0,2) x3<-runif(n,0,2) x4<-runif(n,0,2) error<-rnorm(n,0,1) y<-2*x1+x2+error A<-data.frame(x1=x1,x2=x2,x3=x3,x4=x4,y=y) model<-lm(data=A,y~x1+x2+x3+x4) model #calculate regression models on all subsets of explanatory variables - use leaps package library(leaps) ov<-regsubsets(y~x1+x2+x3+x4,data=A,nbest=2) summary(ov) plot(ov) #----------------------------------------------------------------------------------- #@kernel regression: dir <- url("http://www.trutschnig.net/reg_data.RData") load(dir) A<-reg_data head(A) plot(A,col="gray") abline(lm(data=A,y~x),col="darkgreen") library(sm) nreg<-sm.regression(A$x,A$y,eval.points=c(0.6),display="none") #kernreggresion an stelle x=0.6 nreg$estimate points(0.6,nreg$estimate,col="blue",cex=1) nreg<-sm.regression(A$x,A$y,eval.points=seq(0,1,length=101),display="none") lines(nreg$eval.points,nreg$estimate,type="l",col="blue",lwd=2) #calculate R^2 nreg<-sm.regression(A$x,A$y,eval.points=A$x,display="none") res<-A$y-nreg$estimate R2<-1-var(res)/var(A$y) R2 #--------------------------------------------------------------------------- #systematische Evaluierung - Beispiel 1 n=1000 #n vergrößern x<-seq(0,1,length=n) error<-rnorm(n,0,0.5) y<-atan(6*x-3)+error A<-data.frame(x=x,y=y) plot(A,col="gray") lines(x,atan(6*x-3),type="l",col="red",lwd=2) nreg<-sm.regression(A$x,A$y,eval.points=seq(0,1,length=101),display="none") lines(nreg$eval.points,nreg$estimate,type="l",col="blue",lwd=2) #--------------------------------------------------------------------------- #systematische Evaluierung - Beispiel 2 a<-2;b<-3 n<-400 x<-seq(-3,3,length=n) y<-a*x+b+runif(n,-1,1) A<-data.frame(x=x,y=y) plot(A,col="gray") abline(3,2,col="red") nreg<-sm.regression(A$x,A$y,eval.points=A$x,display="none") lines(nreg$eval.points,nreg$estimate,type="l",col="blue",lwd=2) R2<-1-var(A$y-nreg$estimate)/var(A$y) R2 #zum vergleich model<-lm(data=A,y~x) summary(model) #----------------------------------------------------------------------------- #systematische Evaluierung - Beispiel 3 n<-500 a<-3; b<-2 x<-runif(n,0,10) error<-rnorm(n,0,1) f<-function(x){y<-a/(1+b*exp(-x))} y<-f(x) + error A<-data.frame(x=x,y=y) plot(A,col="gray") xg<-seq(0,10,length=101) lines(xg,f(xg),col="red") nreg<-sm.regression(A$x,A$y,eval.points=xg,display="none") lines(nreg$eval.points,nreg$estimate,type="l",col="blue",lwd=2)