#--------------------------------------------------------------------------------- #------------------------------ Correlation -------------------------------------- #--------------------------------------------------------------------------------- file <- url("http://www.trutschnig.net/geo_reg1.RData") load(file) A<-geo_reg1 head(geo_reg1) #Pearson correlation cor(A$x,A$y) cor(2*A$x,3*A$y) cor(-2*A$x,-3*A$y) cor(-2*A$x,3*A$y) cor(A$x^3,A$y^3) #Spearman rank correlation x1 <- c(3, 1, 4, 15, 13) r1 <- rank(x1) x1 r1 x1 <- c(3, 1, 3, 15, 13) r1 <- rank(x1) x1 r1 x1 <- c(3, 1, 3, 15, 3,3) r1 <- rank(x1) x1 r1 plot(A) plot(rank(A$x),rank(A$y)) cor(A$x,A$y,method = "spearman") cor(2*A$x,3*A$y,method = "spearman") cor(-2*A$x,-3*A$y,method = "spearman") cor(A$x^3,A$y^3,method = "spearman") #comparison Dazu<-data.frame(x=c(10,10.3),y=c(2,2.4)) A1<-rbind(A,Dazu) plot(A1) cor(A1$x,A1$y) cor(A1$x,A1$y,method = "spearman") #---------- #Exercise 01 #load Reg_ex01.RData, plot the data and the corresponding ranks and calculate # the Pearson and the Spearman rank correlation file<-url("http://www.trutschnig.net/R/Reg_ex02.RData") load(file) head(B) #---------- #Exercise 02 #repeat Exercise 01 for the dataset Reg_ex02.RData #Exercise 03 #Can you find a three points (x_1,y_1),(x_2,y_2),(x_3,y_3) for which the Pearson correlation and the Spearman correlation have different sign? #Exercise 04: #Write an R-function that has vectors x and y (of the same length) as input and that does the following: #(1) It produces a scatterplot of x,y as well as a scatterplot of the ranks; use par(mfrow=c(1,2)) to generate a graphic with two subplots #(2) It returns both Pearson correlation and Spearman correlation #--------------------------------------------------------------------------------- #------------------------------ Linear regression -------------------------------- #--------------------------------------------------------------------------------- #part 01: #--------------------------------------------------------------------------------- file <- url("http://www.trutschnig.net/geo_reg1.RData") load(file) head(geo_reg1) A<-geo_reg1 model<-lm(data=A,y~x) #use whatever name you want instead of model summary(model) plot(A) abline(model,col="magenta") model$coefficients a<-as.numeric(model$coefficients[2]) a res<-model$residuals hist(res) summary(model)$r.squared rho<-cor(A$x,A$y) rho rho^2 New<-data.frame(x=c(1.5)) p<-predict(model,new=New) p Neu<-data.frame(x=seq(-2,4,length=101)) p<-predict(model,new=Neu) p #----------- #Exercise 05: file <- url("http://www.trutschnig.net/geo_reg1.RData") load(file) B <- geo_reg1 head(B) #Exercise 06: A <- read.table("http://www.trutschnig.net/brainhead.txt",head=TRUE) #------------------------------------------------------------------------------------ #part02 #------------------------------------------------------------------------------------ #one run a<-2;b<-1 n<-100 x<-runif(n,-3,4) #generate random x values error<-rnorm(n,0,1) #error from normal distribution N(0,1) y<-a*x+b+error A<-data.frame(x=x,y=y) plot(A) model<-lm(data=A,y~x) abline(model) summary(model) sum(model$residuals^2)/(n-2) #------------------------------------------------------------------------------------ #part03 #------------------------------------------------------------------------------------ #several runs R<-1000 E<-data.frame(a=rep(0,R),b=rep(0,R)) a<-2;b<-1 n<-100 for(i in 1:R){ x<-runif(n,-3,4) #generate random x values error<-rnorm(n,0,1) y<-a*x+b+error A<-data.frame(x=x,y=y) model<-lm(data=A,y~x) E[i,]<-as.numeric(coefficients(model))[2:1] } head(E); summary(E) #nice rug plot library(ggplot2) p <- ggplot(E, aes(x=a, y=b)) p <- p + geom_point() p <- p + geom_rug(size=0.25) p <- p + labs(title = paste("sample size n= ",n,"\n","",sep="")) p <- p + theme_bw() p #------------ #Exercises 07-09: #your exercises 07-09 come here #-------------------------------------------------------------------------------------- #-------------------------------------------------------------------------------------- #nonparametric regression #-------------------------------------------------------------------------------------- #-------------------------------------------------------------------------------------- library(sm) dir <- url("http://www.trutschnig.net/reg_data.RData") load(dir) A<-reg_data head(A) plot(A,col="gray") abline(lm(data=A,y~x),col="darkgreen") library(sm) nreg<-sm.regression(A$x,A$y,eval.points=c(0.6),display="none") #kernel regression at x=0.6 nreg$estimate points(0.6,nreg$estimate,col="blue",cex=1) nreg<-sm.regression(A$x,A$y,eval.points=seq(0,1,length=101),display="none") lines(nreg$eval.points,nreg$estimate,type="l",col="blue",lwd=2) #calculate R^2 nreg<-sm.regression(A$x,A$y,eval.points=A$x,display="none") res<-A$y-nreg$estimate R2<-1-var(res)/var(A$y) R2 #--------------------------------------------------------------------------- #systematic eval - example 1 n <- 1000 #change n x <- seq(0,1,length=n) error <- rnorm(n,0,1) y <- atan(6*x-3)+error A <- data.frame(x=x,y=y) plot(A,col="gray") lines(x,atan(6*x-3),type="l",col="red",lwd=2) nreg <- sm.regression(A$x,A$y,eval.points=seq(0,1,length=101),display="none") lines(nreg$eval.points,nreg$estimate,type="l",col="blue",lwd=2) #--------------------------------------------------------------------------- #systematic eval - example 2 a <- 2; b <- 3 n <- 400 #change n x <- seq(-3,3,length=n) y <- a*x+b+rnorm(n,0,3) A <- data.frame(x=x,y=y) plot(A,col="gray") abline(3,2,col="red") nreg <- sm.regression(A$x,A$y,eval.points=A$x,display="none") lines(nreg$eval.points,nreg$estimate,type="l",col="blue",lwd=2) R2 <- 1-var(A$y-nreg$estimate)/var(A$y) R2 #compare with the linear model model<-lm(data=A,y~x) summary(model) #Exercise 10 comes here #Exercise 11 #Systolic blood pressure dataset dir <- url("http://www.trutschnig.net/SBP.RData") load(dir) A<-SBP head(A) summary(A) A3<-subset(A,A$age>=30 & A$age<=40) A5<-subset(A,A$age>=50 & A$age<=60) A7<-subset(A,A$age>=70 & A$age<=80)