#load the required libraries
library(ggplot2)
library(scales)

#read in the data file for NUMT counts (one species pair per genus)
data_NUMTs<-read.csv(file="one_species_pair_per_genus_NUMTs.csv",header=TRUE, sep=,)

#replace values of '0' with NA 
data_NUMTs[data_NUMTs==0] <- NA

#remove rows with NAs
data_NUMTs_nonzero<-data_NUMTs[complete.cases(data_NUMTs),]

#plot the results for NUMT counts
ggplot(data = data_NUMTs_nonzero,aes(x = NUMTs_sp1, y = NUMTs_sp2)) +
  geom_point(size=4, alpha=0.2)+
  geom_point(size=4, shape=21)+
  stat_smooth(method="lm",colour="firebrick",se=FALSE,size=1, linetype="dashed")+
  theme_bw()+
  theme(axis.title=element_text(size=14, colour = "black"),
        axis.text = element_text(size=14, colour = "black"),
        panel.grid.minor = element_blank(),
        panel.grid.major = element_blank(),
        legend.position = "none")+
  scale_y_continuous(trans = log2_trans())+
  scale_x_continuous(trans = log2_trans())+
  xlab("\n NUMT Count in Species 1")+
  ylab("NUMT Count in Species 2\n")

#Pearson correlation using log2 NUMT counts (all non-zero values)
cor.test(log2(data_NUMTs_nonzero$NUMTs_sp1),log2(data_NUMTs_nonzero$NUMTs_sp2),method = "pearson")


#read in the data file for genome size (one species pair per genus)
data_genome_size<-read.csv(file="one_species_pair_per_genus_genome_size.csv",header=TRUE, sep=,)

#plot the results for genome size
ggplot(data = data_genome_size, aes(x = Assembly_length_sp1, y = Assembly_length_sp2)) +
  geom_point(size=4, alpha=0.2)+
  geom_point(size=4, shape=21)+
  stat_smooth(method="lm",colour="firebrick",se=FALSE,size=1, linetype="dashed")+
  theme_bw()+
  theme(axis.title=element_text(size=14, colour = "black"),
        axis.text = element_text(size=14, colour = "black"),
        panel.grid.minor = element_blank(),
        panel.grid.major = element_blank(),
        legend.position = "none")+
  scale_y_continuous(trans = log2_trans())+
  scale_x_continuous(trans = log2_trans())+
  xlab("\n Genome size of Species 1 (Mb)")+
  ylab("Genome Size of Species 2 (Mb)\n")

#Pearson correlation using log2 of genome size values
cor.test(log2(data_genome_size$Assembly_length_sp1),log2(data_genome_size$Assembly_length_sp2),method = "pearson")

