# setwd("~/Desktop/laura/")

####Read in data ####
df <- read.csv("all_root_data_2D.csv") #single root data
summary(df)
gt<-read.csv("ground_truth_data_2D.csv") # ground truth per image
meas<-read.table("all_analyses_fixed.txt",header = T, #the image analyses data
                 sep="\t", 
                 strip.white = TRUE
) 
# 200 dpi
# inch = 25.4 mm
# 7.874016 pixel/mm
# 0.127 mm/pixel

df$dia_length<-df$diameter*df$length #needed for wm diameter
meas$dpi<-as.numeric(substr(meas$RHIZO.2013e,1,4)) #
meas$image<-substr(meas$RHIZO.2013e,6,35)
summary(meas)
head(meas)

Suma<-aggregate(length ~ image, df, sum) #summarize the data by image
names(Suma)[2]<-"Sum_length"
Suma$diam<-aggregate(dia_length~image,df,sum)[,2]/Suma$Sum_length#length weighted mean diameter
Suma$N<-aggregate(surface ~ image, df, length)[,2] #Number of root segments for sd
df<-merge(df,Suma,by="image") #add the total length and length weighted mean diameter per image to each root segment
Suma$diam_sd<-sqrt(aggregate((diameter-diam)^2*length~image,df,sum)[,2] 
                   /(((Suma$N-1)*Suma$Sum_length)/Suma$N))#length weighted sd of diameter

Suma$vol<-aggregate(volume ~ image, df, sum)[,2]
Suma$sufa<-aggregate(surface ~ image, df, sum)[,2]

gt_o<-gt[,c(1,20)] #overlap

all_D1<-merge(meas,Suma,by="image")
all_D<-merge(all_D1,gt_o,by="image") # add overlap
names(all_D)

# correct for different units, SufArea not corrected because not used
# diamter mm, length+vol cm
all_D$IJ_Eq_Vol<-all_D$IJ_Eq_Vol/1000 #mm3 to cm3
all_D$vol<-all_D$vol/1000
all_D$IJ_Kimura_length<-all_D$IJ_Kimura_length/10 #mm to cm
all_D$Sum_length<-all_D$Sum_length/10
all_D$CV_diam<-all_D$diam_sd/all_D$diam #coefficient of variation
all_D$density<-all_D$Sum_length/all_D$AnalysedRegionArea.cm2. #root length per image size

# Create groups

all_D$group <- "fibrous"
all_D$group[grepl("dicot", all_D$image)] <- "taproot"
all_D<-all_D[order(all_D$overlap) , ]

#### Create Subset resolution ####
all_D<-subset(all_D,dpi==1200)

#### Compute rel errors for Tab 1 ####
# relative differences #
rel_diff_start<-function(Var,Ref,name) { #funktion to create summary of errors txt
  err<-(Var-Ref)/Ref
  # summary(abs(err)*100)
  # summary((err)*100)#error % 
  # 
  model <-name #the variable
  min <- as.numeric(summary((err)*100)[1]) #standarderror
  mean <- as.numeric(summary((err)*100)[4]) #standarderror
  max<- as.numeric(summary((err)*100)[6]) #adjR2
  abs<-  as.numeric(summary(abs(err)*100)[4]) #adjR2
  
  sink(file = "1200errors_fixed.txt", append = T)
  d<-data.frame(model=model,min=min, mean=mean, max=max, abs=abs)
  print(d)
  sink()
  
  
}

rel_diff<-function(Var,Ref,name) { #funktion to create summary of errors txt
  err<-(Var-Ref)/Ref
  # summary(abs(err)*100)
  # summary((err)*100)#error % 
  # 
  model <-name #the variable
  min <- as.numeric(summary((err)*100)[1]) #standarderror
  mean <- as.numeric(summary((err)*100)[4]) #standarderror
  max<- as.numeric(summary((err)*100)[6]) #adjR2
  abs<-  as.numeric(summary(abs(err)*100)[4]) #adjR2
  
  sink(file = "1200errors_fixed.txt", append = T)
  d<-data.frame(model=model,min=min, mean=mean, max=max, abs=abs)
  names(d)<-NULL
  print(d)
  sink()
  
  
}
# Diamter #
#WinRhizo average
all_D$d_d_WR<-(all_D$WR_AvgDiam-all_D$diam)/all_D$diam
summary(abs(all_D$d_d_WR)*100)
summary((all_D$d_d_WR)*100)#error %
rel_diff_start(all_D$WR_AvgDiam,all_D$diam,"WR_AvgDiam") #write in txt

#WinRhizo weighted mean
all_D$d_d_WR_wm<-(all_D$WR_Weighted_mean_diam-all_D$diam)/all_D$diam
summary(abs(all_D$d_d_WR_wm)*100)
summary((all_D$d_d_WR_wm)*100)
rel_diff(all_D$WR_Weighted_mean_diam,all_D$diam,"WR_Weighted_mean_diam") #write in txt

#IJ
all_D$d_d_IJ<-(all_D$IJ_Mean_Dia-all_D$diam)/all_D$diam
summary(abs(all_D$d_d_IJ)*100)
summary((all_D$d_d_IJ)*100)
rel_diff(all_D$IJ_Mean_Dia,all_D$diam,"IJ_Mean_Dia") #write in txt

#IJC
all_D$d_d_IJC<-(all_D$IJ_Mean_Dia_C-all_D$diam)/all_D$diam
summary(abs(all_D$d_d_IJC)*100)
summary((all_D$d_d_IJC)*100)
rel_diff(all_D$IJ_Mean_Dia_C,all_D$diam,"IJ_Mean_Dia_C") #write in txt

# length #
# #WinRhizo
all_D$d_l_WR<-(all_D$WR_Length_Sum-all_D$Sum_length)/all_D$Sum_length
summary(abs(all_D$d_l_WR)*100)
summary((all_D$d_l_WR)*100)
rel_diff(all_D$WR_Length_Sum,all_D$Sum_length,"WR_Length_Sum") #write in txt
#IJ
all_D$d_l_IJ<-(all_D$IJ_Kimura_length-all_D$Sum_length)/all_D$Sum_length
summary(abs(all_D$d_l_IJ)*100)
summary((all_D$d_l_IJ)*100)
rel_diff(all_D$IJ_Kimura_length,all_D$Sum_length,"IJ_Kimura_length") #write in txt

#volume #
#WinRhizo correct
all_D$d_Vc_WR<-(all_D$WR_V_sum-all_D$vol)/all_D$vol
summary(abs(all_D$d_Vc_WR)*100)
summary((all_D$d_Vc_WR)*100)
rel_diff(all_D$WR_V_sum,all_D$vol,"WR_V_sum") #write in txt
#WinRhizo wrong
all_D$d_V_WR<-(all_D$WR_RootVolume-all_D$vol)/all_D$vol
summary(abs(all_D$d_V_WR)*100)
summary((all_D$d_V_WR)*100)
rel_diff(all_D$WR_RootVolume,all_D$vol,"WR_RootVolume") #write in txt
#IJ
all_D$d_V_IJ<-(all_D$IJ_Eq_Vol-all_D$vol)/all_D$vol
summary(abs(all_D$d_V_IJ)*100)
summary((all_D$d_V_IJ)*100)
rel_diff(all_D$IJ_Eq_Vol,all_D$vol,"IJ_Eq_Vol") #write in txt


#### Analyse Relationships to ground truth ####
# plots relationships to ground truth#

outp_models_start<-function(Vari,x) { #with columnames
  model <-Vari #model
  int <- as.numeric(summary(x)$coefficients[1]) #intercept
  stderr_int <- as.numeric(summary(x)$coefficients[1,2]) #standarderror
  p_int<- as.numeric(summary(x)$coefficients[1,4]) #p_int
  est <- as.numeric(summary(x)$coefficients[2]) #estimates
  stderr_est <- as.numeric(summary(x)$coefficients[2,2]) #standarderror
  p_est <- as.numeric(summary(x)$coefficients[2,4]) #standarderror
  r2<- as.numeric(summary(x)$adj.r.squared) #adjR2
  
  sink(file = "1200models_fixed.txt", append = T)
  d<-data.frame(model=model,int =int, stderr_int=stderr_int, p_int=p_int, est=est, stderr_est=stderr_est, p_est=p_est, r2=r2)
  print(d)
  sink()
}
outp_models<-function(Vari,x) { #without columnames
  model <-Vari #model
  int <- as.numeric(summary(x)$coefficients[1]) #intercept
  stderr_int <- as.numeric(summary(x)$coefficients[1,2]) #standarderror
  p_int<- as.numeric(summary(x)$coefficients[1,4]) #p_int
  est <- as.numeric(summary(x)$coefficients[2]) #estimates
  stderr_est <- as.numeric(summary(x)$coefficients[2,2]) #standarderror
  p_est <- as.numeric(summary(x)$coefficients[2,4]) #standarderror
  r2<- as.numeric(summary(x)$adj.r.squared) #adjR2
  
  sink(file = "1200models_fixed.txt", append = T)
  d<-data.frame(model=model,int =int, stderr_int=stderr_int, p_int=p_int, est=est, stderr_est=stderr_est, p_est=p_est, r2=r2)
  names(d)<-NULL
  print(d)
  sink()
}

# diameter WinRhizo average
plot(all_D$diam,all_D$WR_AvgDiam)
abline(0,1, col="grey",lty="dashed")
r1<-lm(all_D$WR_AvgDiam~all_D$diam)
summary(r1)

outp_models_start("WR_AvgDiam",r1)

# diameter WinRhizo weighted mean
plot(all_D$diam,all_D$WR_Weighted_mean_diam)
abline(0,1, col="grey",lty="dashed")
r1<-lm(all_D$WR_Weighted_mean_diam~all_D$diam)
summary(r1)

outp_models("WR_Weighted_mean_diam",r1)

# diameter IJ
plot(all_D$diam,all_D$IJ_Mean_Dia)
abline(0,1, col="grey",lty="dashed")
r1<-lm(all_D$IJ_Mean_Dia~all_D$diam)
summary(r1)
outp_models("IJ_Mean_Dia",r1)

# diameter IJC
plot(all_D$diam,all_D$IJ_Mean_Dia_C)
abline(0,1, col="grey",lty="dashed")
r1<-lm(all_D$IJ_Mean_Dia_C~all_D$diam)
summary(r1)
outp_models("IJ_Mean_Dia_C",r1)

# length WinRhizo
plot(all_D$Sum_length,all_D$WR_Length_Sum)
abline(0,1, col="grey",lty="dashed")
r1<-lm(all_D$WR_Length_Sum~all_D$Sum_length)
summary(r1)
outp_models("WR_Length_Sum",r1)

# length IJ
plot(all_D$Sum_length,all_D$IJ_Kimura_length)
abline(0,1, col="grey",lty="dashed")
r1<-lm(all_D$IJ_Kimura_length~all_D$Sum_length)
summary(r1)
outp_models("IJ_Kimura_length",r1)

# volume WinRhizo wrong
plot(all_D$vol,all_D$WR_RootVolume)
abline(0,1, col="grey",lty="dashed")
r1<-lm(all_D$WR_RootVolume~all_D$vol) 
abline(lm(all_D$WR_RootVolume~all_D$vol))
summary(r1)
outp_models("WR_RootVolume",r1)

# volume WinRhizo correct
plot(all_D$vol,all_D$WR_V_sum)
abline(0,1, col="grey",lty="dashed")
r1<-lm(all_D$WR_V_sum~all_D$vol)
summary(r1)
outp_models("WR_V_sum",r1)

# volume IJ
plot(all_D$vol,all_D$IJ_Eq_Vol)
abline(0,1, col="grey",lty="dashed")
r1<-lm(all_D$IJ_Eq_Vol~all_D$vol) 
summary(r1)
outp_models("IJ_Eq_Vol",r1)

# volume IJ vs. volume WinRhizo wrong
plot(all_D$IJ_Eq_Vol,all_D$WR_RootVolume)
abline(0,1, col="grey",lty="dashed")
r1<-lm(all_D$WR_RootVolume~all_D$IJ_Eq_Vol)
summary(r1)

#### Structural parameters ####
plot(all_D$d_d_IJC~all_D$d_l_IJ)
plot(all_D$d_d_IJC~all_D$overlap)
plot(all_D$d_d_IJC~all_D$density)
plot(all_D$d_d_IJC~all_D$diam)
plot(all_D$d_d_IJC~all_D$Sum_length)
plot(all_D$d_d_IJC~all_D$Crossings)

# difference D WR vs. CV diameter
plot(all_D$CV_diam,abs(all_D$d_d_WR))
summary(lm(abs(all_D$d_d_WR)~all_D$CV_diam))# sig #R2 < 0.1

# difference D WR vs.overlap
plot(all_D$overlap,abs(all_D$d_d_WR))
summary(lm(abs(all_D$d_d_WR)~all_D$overlap))#sig
abline(lm(abs(all_D$d_d_WR)~all_D$overlap))

# difference D WR vs.diameter
plot(all_D$diam,abs(all_D$d_d_WR))
summary(lm(abs(all_D$d_d_WR)~all_D$diam)) #not sig

# difference D IJ vs. CV diameter
plot(all_D$CV_diam,abs(all_D$d_d_IJC))
summary(lm(abs(all_D$d_d_IJC)~all_D$CV_diam))# sig #R2 < 0.1

# difference D IJ vs.overlap
plot(all_D$overlap,abs(all_D$d_d_IJC))
summary(lm(abs(all_D$d_d_IJC)~all_D$overlap))# not sig
abline(lm(abs(all_D$d_d_IJC)~all_D$overlap))

# difference D IJ  vs. diameter
plot(all_D$diam,abs(all_D$d_d_IJC))
summary(lm(abs(all_D$d_d_IJC)~all_D$diam)) # sig
abline(lm(abs(all_D$d_d_IJC)~all_D$diam))

cor.test(all_D$overlap,(all_D$diam))
cor.test(all_D$overlap,(all_D$CV_diam))
cor.test(all_D$CV_diam,(all_D$diam))

# difference length WR vs.overlap
plot(all_D$overlap,abs(all_D$d_l_WR))
summary(lm(abs(all_D$d_l_WR)~all_D$overlap)) #sig
abline(lm(abs(all_D$d_l_WR)~all_D$overlap))

# difference length WR  vs. length
plot((all_D$Sum_length),abs(all_D$d_l_WR)) 
summary(lm(abs(all_D$d_l_WR)~(all_D$Sum_length))) # sig
abline(lm(abs(all_D$d_l_WR)~(all_D$Sum_length)))

# difference length WR  vs. density
plot(all_D$density,abs(all_D$d_l_WR))
summary(lm(abs(all_D$d_l_WR)~all_D$density)) #sig relationship
summary(lm(abs(all_D$d_l_WR)~log(all_D$density))) #not better
abline(lm(abs(all_D$d_l_WR)~(all_D$density)))

### check for relationship in subsets with lower densities
all_D1<-subset(all_D,all_D$density<1)

summary(lm(abs(all_D$d_l_WR)~(all_D$density)))
summary(lm(abs(all_D1$d_l_WR)~(all_D1$density))) # not sig


summary(lm(abs(d_l_WR)~(Sum_length)+(density)+overlap,all_D))
anova(lm(abs(d_l_WR)~(Sum_length)+(density)+overlap,all_D))
summary(lm(abs(d_l_WR)~(Sum_length)+overlap+(density),all_D))
anova(lm(abs(d_l_WR)~(Sum_length)+overlap+(density),all_D))
summary(lm(abs(d_l_WR)~overlap+(density)+(Sum_length),all_D))
anova(lm(abs(d_l_WR)~overlap+(density)+(Sum_length),all_D))
summary(lm(abs(d_l_WR)~(density)+overlap+(Sum_length),all_D))
anova(lm(abs(d_l_WR)~(density)+overlap+(Sum_length),all_D))
### Sum length effect function of overlap and density

### doule check following Zuur ea 2010
viflength<-1/(1-summary(lm(Sum_length~density+overlap,all_D))$r.sq)
vifRLD<-1/(1-summary(lm(density~Sum_length+overlap,all_D))$r.sq)
vifover<-1/(1-summary(lm(overlap~Sum_length+density,all_D))$r.sq)
# exclude length
vifRLD1<-1/(1-summary(lm(density~overlap,all_D))$r.sq)
vifover1<-1/(1-summary(lm(overlap~density,all_D))$r.sq)
#new multiple regression
summary(lm(abs(d_l_WR)~(density)+overlap,all_D))
anova(lm(abs(d_l_WR)~(density)+overlap,all_D))
summary(lm(abs(d_l_WR)~overlap+(density),all_D))
anova(lm(abs(d_l_WR)~overlap+(density),all_D))

#change of order changes anova outp due to collinearity:
cor.test(all_D$overlap,(all_D$Sum_length))
cor.test(all_D$overlap,all_D$density)
cor.test((all_D$Sum_length),all_D$density)

# difference length IJ vs.overlap
plot(all_D$overlap,abs(all_D$d_l_IJ))
summary(lm(abs(all_D$d_l_IJ)~all_D$overlap)) #not sig
abline(lm(abs(all_D$d_l_IJ)~all_D$overlap))

# difference length IJ  vs. length
plot(all_D$Sum_length,abs(all_D$d_l_IJ))
summary(lm(abs(all_D$d_l_IJ)~(all_D$Sum_length))) #not sig


# difference length IJ  vs. density
plot(all_D$density,abs(all_D$d_l_IJ))
summary(lm(abs(all_D$d_l_IJ)~all_D$density)) # not sig
abline(lm(abs(all_D$d_l_IJ)~all_D$density))
summary(lm(d_l_IJ~density,all_D))
summary(lm(d_l_IJ~density,all_D1))# sig but nonsense, R2< 0.1

# difference volume WR wrong vs. CV diameter
plot(all_D$CV_diam,abs(all_D$d_V_WR))
summary(lm(abs(all_D$d_V_WR)~all_D$CV_diam)) #sig

# difference volume WR wrong diameter
plot((all_D$diam),abs(all_D$d_V_WR))
summary(lm(abs(all_D$d_V_WR)~log(all_D$diam))) # sig R2< 0.1 slightly better, no real differnce
summary(lm(abs(all_D$d_V_WR)~all_D$diam)) # sig R2< 0.1
abline(lm(abs(all_D$d_V_WR)~log(all_D$diam)))

plot(all_D$diam,(all_D$CV_diam))
cor.test(all_D$diam,(all_D$CV_diam)) #negative not sig
cor.test(all_D$vol,(all_D$CV_diam)) #negative not sig
cor.test(all_D$vol,(all_D$diam)) #negative

# difference volume WR wrong vs. Volume
plot(all_D$vol,abs(all_D$d_V_WR))
summary(lm(abs(all_D$d_V_WR)~all_D$vol)) # not sig
summary(lm(abs(all_D$d_V_WR)~log(all_D$vol))) # not sig

# difference volume WR wrong vs. overlap
plot(all_D$overlap,abs(all_D$d_V_WR))
summary(lm(abs(all_D$d_V_WR)~all_D$overlap)) #not sig

# difference volume IJ  vs. CV diameter
plot(all_D$CV_diam,abs(all_D$d_V_IJ))
summary(lm(abs(all_D$d_V_IJ)~all_D$CV_diam)) #sig
abline(lm(abs(all_D$d_V_IJ)~all_D$CV_diam))
#plot(lm(all_D$d_V_IJ~all_D$CV_diam))

# difference volume IJ diameter
plot(log(all_D$diam),abs(all_D$d_V_IJ))
summary(lm(abs(all_D$d_V_IJ)~log(all_D$diam))) # sig better R2< 0.1
summary(lm(abs(all_D$d_V_IJ)~all_D$diam)) # sig
abline(lm(all_D$d_V_IJ~log(all_D$diam)))

# difference volume IJ vs. Volume
plot(all_D$vol,abs(all_D$d_V_IJ))
summary(lm(abs(all_D$d_V_IJ)~all_D$vol)) #not sig
summary(lm(abs(all_D$d_V_IJ)~log(all_D$vol))) #not sig

# difference volume IJ vs. overlap
plot(all_D$overlap,abs(all_D$d_V_IJ))
summary(lm(abs(all_D$d_V_IJ)~all_D$overlap)) #not sig

# difference volume WR correct vs. CV diameter
plot(all_D$CV_diam,abs(all_D$d_Vc_WR))
summary(lm(abs(all_D$d_Vc_WR)~all_D$CV_diam)) # sig
cor.test(all_D$vol,(all_D$CV_diam)) #negative

# difference volume WR correct vs. diameter
plot((all_D$diam),abs(all_D$d_Vc_WR))
summary(lm(abs(all_D$d_Vc_WR)~log(all_D$diam))) # not sig
summary(lm(abs(all_D$d_Vc_WR)~all_D$diam)) # not sig
abline(lm(all_D$d_Vc_WR~log(all_D$diam)))

summary(lm(all_D$diam~all_D$vol))
summary(lm(abs(all_D$d_Vc_WR)~all_D$diam+all_D$vol))

# difference volume WR correct vs. Volume
plot(log(all_D$vol),abs(all_D$d_Vc_WR))
plot((all_D$vol),abs(all_D$d_Vc_WR))
summary(lm(abs(all_D$d_Vc_WR)~all_D$vol)) #sig
summary(lm(abs(all_D$d_Vc_WR)~log(all_D$vol))) # sig better
abline(lm(abs(all_D$d_Vc_WR)~all_D$vol))

# difference volume WR correct vs. overlap
plot(all_D$overlap,abs(all_D$d_Vc_WR))
summary(lm(abs(all_D$d_Vc_WR)~all_D$overlap)) # sig R2< 0.1


# Checked!
summary(lm(all_D$diam~all_D$vol)) #not significant
plot(all_D$diam~all_D$vol)
summary(lm(all_D$CV_diam~all_D$vol)) #not significant
plot(all_D$CV_diam~all_D$vol)


#### checking whether the error depends on the group
# true values
wilcox.test(Sum_length~group,all_D) #highly sign
wilcox.test(diam~group,all_D) #highly sign
wilcox.test(vol~group,all_D) #highly sign
wilcox.test(CV_diam~group,all_D) #
wilcox.test(overlap~group,all_D) #

#diameter
wilcox.test(abs(d_d_WR)~group,all_D) # sig
wilcox.test(d_d_WR~group,all_D) #highly sign
summary(lm(abs(d_d_WR)~group,all_D))
summary(lm(d_d_WR~group,all_D))
boxplot(abs(d_d_WR)~group,all_D)
boxplot(d_d_WR~group,all_D)

wilcox.test(abs(d_d_IJC)~group,all_D) #not significant
wilcox.test(d_d_IJC~group,all_D) #
summary(lm(abs(d_d_IJC)~group,all_D))
summary(lm(d_d_IJC~group,all_D))
boxplot(abs(d_d_IJC)~group,all_D)
boxplot(d_d_IJC~group,all_D)

#length
wilcox.test(abs(d_l_WR)~group,all_D) #significant
wilcox.test(d_l_WR~group,all_D) #
summary(lm(abs(d_l_WR)~group,all_D))
summary(lm(d_l_WR~group,all_D))

wilcox.test(abs(d_l_IJ)~group,all_D) #not significant
wilcox.test(d_l_IJ~group,all_D) #
summary(lm(abs(d_l_IJ)~group,all_D)) 
summary(lm(d_l_IJ~group,all_D)) #  
boxplot(d_l_IJ~group,all_D) 

#Volume
wilcox.test(abs(d_V_WR)~group,all_D) #less underestimation for fibrous
wilcox.test(d_V_WR~group,all_D) #
summary(lm(abs(d_V_WR)~group,all_D)) 
summary(lm(d_V_WR~group,all_D)) #less underestimation for fibrous
boxplot(d_V_WR~group,all_D)
#the reason for this
boxplot(CV_diam~group,all_D)
wilcox.test(CV_diam~group,all_D) #

wilcox.test(overlap~group,all_D) #

#relationship length RLD
summary(lm(density~Sum_length,all_D))
plot(density~Sum_length,all_D)
lines(predict(lm(density~Sum_length,all_D))~all_D$Sum_length)


