#load the required libraries
library(GenABEL)
library(ggplot2)

#read in data file
data<-read.csv(file="NUMTs_major_orders.csv",header=TRUE, sep=,)

#make separate datasets for complete and incomplete metamorphosis
data_complete<-data[data$Metamorphosis == 'complete_metamorphosis',]
data_incomplete<-data[data$Metamorphosis == 'incomplete_metamorphosis',]

#Wilcoxon rank-sum test based on metamorphosis categories
wilcox.test(x=data_complete$NUMT_Count_Total, y=data_incomplete$NUMT_Count_Total)

#make a separate dataset for the main orders
data_main_orders<-data[data$Major_order != 'other',]

#Kruskal-Wallis test for differences among the five major orders
kruskal.test(NUMT_Count_Total ~ Order, data=data_main_orders)

#plot NUMT count (rank-transformed) based on metamorphosis categories and order membership (Figure 7)
data$NUMT_Count_Total_RT<-rntransform(data$NUMT_Count_Total)

ggplot(data, aes(x=Order_for_plotting, y=NUMT_Count_Total_RT, fill=Order_for_plotting))+
  geom_boxplot(alpha=0.5, size=0.6, outlier.color = NA)+
  geom_jitter(aes(color=Order_for_plotting), width=0.15,size=2.5, alpha=0.6)+
  theme_bw()+
  theme(axis.text = element_text(size = 10),
        axis.title.x = element_blank(),
        axis.text.x = element_text(angle = 45, vjust = 1, hjust=1),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        legend.position="none")+
  scale_fill_manual(values = c("#6ab644ff","lightgrey","#b9242aff","#1c8ebaff","#eea32dff","#3a2f2dff","lightgrey"))+
  scale_colour_manual(values = c("#6ab644ff","lightgrey","#b9242aff","#1c8ebaff","#eea32dff","#3a2f2dff","lightgrey"))+
  ylab("NUMT Count (rank-transformed)\n")+
  ylim(-1.4,4)+
  geom_segment(aes(x=0.8, xend=2.2, y=3.5, yend=3.5)) + 
  geom_segment(aes(x=2.8, xend=7.2, y=2.7, yend=2.7)) + 
  annotate("text", x=1.5, y=3.7, label="incomplete metamorphosis", size=3.5)+
  annotate("text", x=4.9, y=2.9, label="complete metamorphosis", size=3.5)+
  scale_x_discrete(labels=c("Hemiptera\n (N = 49) ", "other   \n(N = 18)", "Hymenoptera\n (N = 131) ","Coleoptera\n (N = 54) ","Lepidoptera\n (N = 190) ","Diptera  \n (N = 213)","other   \n(N = 13)"))
