#load the required libraries
library(ggplot2)
library(equatiomatic)

#read in data file (this containg average mitogenome-wide NUMT counts and COI barcode NUMT counts)
mitogenome_results<-read.csv(file="results_mitogenome_coding_only.csv",header = TRUE, sep = ,) 

#replace values of '0' with NA 
mitogenome_results[mitogenome_results==0] <- NA

#remove rows with NAs
mitogenome_results_nonzero<-mitogenome_results[complete.cases(mitogenome_results),]

#save the non-zero file, it will be used to select one representative species per genus
write.csv(mitogenome_results_nonzero, file="results_mitogenome_coding_only.nonzero.csv")

#linear model using log2-transformed data
lm_NUMTs<-lm(log2(mitogenome_results_nonzero$Average_NUMTs_mitogenome)~log2(mitogenome_results_nonzero$COI_NUMTs))
summary(lm_NUMTs)
extract_eq(lm_NUMTs,use_coefs = TRUE)

#plot results (panel A in Figure 4)
ggplot(mitogenome_results_nonzero, aes(log2(COI_NUMTs),log2(Average_NUMTs_mitogenome)))+
  geom_point(size=2, alpha=0.6, colour="lightgrey")+
  geom_smooth(method = lm, colour="#409dcdff", fill="#409dcdff") +
  geom_abline(intercept = 0, slope =1, color="red",linetype="dashed", size=0.7)+
  theme_bw()+
  theme(axis.title=element_text(size=12),
        axis.text = element_text(size=12),
        panel.grid.minor = element_blank(),
        panel.grid.major = element_blank(),
        legend.position = "none")+
  xlab("\nCOI barcode NUMTs (log2-transformed)")+
  ylab("Average mitogenome-wide NUMTs (log2-transformed)\n")


#repeat the analyses above using the pruned dataset (one species per genus)
results_unique_genera<-read.csv(file="results_mitogenome_coding_only.one_sp_per_genus.csv",header = TRUE, sep = ,)

#linear model
lm_NUMTs_unique_genera<-lm(log2(results_unique_genera$Average_NUMTs_mitogenome)~log2(results_unique_genera$COI_NUMTs))
summary(lm_NUMTs_unique_genera)
extract_eq(lm_NUMTs_unique_genera,use_coefs = TRUE)

pvals<-c(2.2e-16,2.2e-16)
p.adjust(pvals,method = "bonferroni")

#plot results
ggplot(results_unique_genera, aes(log2(COI_NUMTs),log2(Average_NUMTs_mitogenome)))+
  geom_point(size=2, alpha=0.6, colour="lightgrey")+
  geom_smooth(method = lm, colour="#409dcdff", fill="#409dcdff") +
  geom_abline(intercept = 0, slope =1, color="red",linetype="dashed", size=0.7)+
  theme_bw()+
  theme(axis.title=element_text(size=12),
        axis.text = element_text(size=12),
        panel.grid.minor = element_blank(),
        panel.grid.major = element_blank(),
        legend.position = "none")+
  xlab("\nCOI barcode NUMTs (log2-transformed)")+
  ylab("Average mitogenome-wide NUMTs (log2-transformed)\n")
