library(doBy) library(ggplot2) library(gridExtra) #------------------------------------------ Sys.setlocale("LC_TIME","English") #----------------------------------------------------------------------------------- #Background info #we analyze (anonymized and cleaned) mobile data from https://www.netztest.at/de/ #----------------------------------------------------------------------------------- #Step 0: load data and geo_data of austria #dir <- url("http://www.trutschnig.net/RTR_data.RData") dir <- url("http://www.trutschnig.net/RTR.RData") load(dir) dir <- url("http://www.trutschnig.net/AT.RData") load(dir) close(dir) ls() names(AT)<-c("long","lat") A<-RTR summary(RTR) #Step 1: plot the measurment coords p <- ggplot(data=AT,aes(x=long,y=lat)) p <- p + geom_path() p <- p + geom_point(data=A) p <- p + theme_bw() print(p) #Step 1b: Produce the same plot for each op_name #......................your answer comes here #Step 2: produce a 2-dim histogram of the data xbin<-ybin<-0.02 farbe<-rainbow(100,start=.40,end=.17) p <- ggplot(data=AT,aes(x=long,y=lat)) p <- p + geom_path() p <- p + stat_binhex(data=A,binwidth = c(xbin,ybin)) p <- p + theme_bw() p <- p + labs(title = paste("Histogram: Data with gps coords, time window: ",min(A$mymd)," till ", max(A$mymd),"\n","",sep="")) p <- p + scale_fill_gradientn(colours=farbe,name="count",trans="log10") print(p) #Step2b: Same plot for each of the three op_names; #......................your code comes here #----------------------------------------------------------------------------------------------------------- #--------------------------- (I) analyse perfomance of the three operators ------------------------------------- #----------------------------------------------------------------------------------------------------------- #Step 3a: which was the measurement with highest download speed (variable rtr_speed_dl) ? # when was this measurement done ? # in which network was the measurement done ? # which device was used for this measurement ? # in which city was the measurement done ? #......................your answer comes here #Step 3b: Which was the iso_adm2 (=district) in which most measurments were done ? #......................your answer comes here #Step 3c: Which was the day with the most measurements ? #......................your answer comes here #Step 3d: calculate number of samples per op_name and month (mym) using doBy and produce a (dodged) barplot AA<-summaryBy(data=A,id~op_name+mym,FUN=c(length)) names(AA)[3]<-"count" #................................your plot comes here #Step 4: Calculate the sample size per per operator and mymdh and produce a heatmap (geom_tile) with mymd as x-coordinate, #hour as y-coordinate and colour according to sample size per hour. distinguish the 3 operators using facet_wrap A$mymdh<-substr(A$mtime,1,13) AA<-summaryBy(data=A,id~mymdh+op_name,FUN=c(length)) names(AA)[3]<-"count" AA$hour<-substr(AA$mymdh,12,13) AA$mymd<-as.Date(substr(AA$mymdh,1,10)) farbe<-rainbow(100,start=.40,end=.17) #................................your plot comes here #Step 5: produce boxplots rtr_speed_dl per month, colour according to op_name - how is the speed developement over the last months ? p <- ggplot(data=A,aes(x=op_name,y=rtr_speed_dl,fill=op_name)) p <- p + geom_boxplot() p <- p + scale_fill_manual(values = c("green", "magenta","gray40")) p <- p + theme_bw() p <- p + facet_wrap(~mym,nrow=1) p <- p + labs(title = paste("boxplot downlink speed (2G+3G+4G)","\n","",sep="")) p #Step 5b: produce boxplots rtr_speed_dl per month, colour according to op_name, use logarithmic y-scale #................................your plot comes here #Step 6: ecdf per month, choose colour according to op_name #................................your plot comes here #Step 7: repeat Step 5 and 6 for uplink #................................your plot comes here #Step 8a: Which is the iso_adm2 with the highest median downlink speed ? #................................your code comes here #Step 8b: Which is the iso_adm2 with the highest median downlink speed in January 2015? #................................your code comes here #Step 9b: Calculate the median download speed for each operator in the month 2014-01 and in the month 2015-01 # How many percent did the speed of each operator increase ? #................................your code comes here #----------------------------------------------------------------------------------------------------------- #--------------------------- (II) analyse device perfomance since 2014-09-01-------------------------------- #----------------------------------------------------------------------------------------------------------- B<-subset(A,A$nw_cat%in%c("3G","4G")&A$mymd>=as.Date("2014-09-01")) #Rest of Code only uses B #Step 9a: Which device was used for the measurement with the highest downlink speed #................................your code comes here #Step 9b: Which device has the highest number of measurements with nw_cat 4G ? #................................your code comes here #Step 9c: Which platform (Android or iOS) had more measurements in 4G ? #................................your code comes here #Step 10: Calculate number of trials per device and filter to 40 most used devices GG<-summaryBy(data=B,id~device+device_platform,FUN=c(length)) names(GG)[3]<-c("count") GG<-GG[order(-GG$count),] GG$nr<-1:nrow(GG) G1<-GG[1:40,] freqdev<-as.character(G1$device) FD<-data.frame(device=freqdev,nr=1:40) p <- ggplot(data=G1,aes(x=nr,y=count,fill=device_platform)) p <- p + geom_bar(stat="identity") p <- p + labs(title = paste("Samples per top 40 devices","\n",sep="")) p <- p + theme_bw() p <- p + scale_fill_manual(values = c("green", "magenta")) p <- p + scale_x_discrete(breaks=1:40,labels=G1$device,"device") p <- p + theme(axis.text.x=element_text(size=8,angle=90)) p <- p + theme(axis.text.y=element_text(size=8)) print(p) #Step 11: Calculate maximum and 95%-quantile of downlink speed per each of the 40 devices; distinguish nw_cat 2G/3G/4G. #and prepare a plot with the results B1<-subset(B,B$device%in%freqdev) #................................your plot comes here