##########################
install.packages("dplyr")
library(dplyr)
library(tidyverse)
### Fisher Exact tests

### Fisher’s exact tests were used to determine statistical significance of SNP variation between S. nuchalis and S. varius, between each species and hybrids, and between zones of allopatry and sympatry within each species. 

### BETWEEN SPECIES

SNPAssignment <- read.csv("SNPS_JHChanges2021.csv")
SNPAssignment

### Reran 24 Feb 2021 as per JH sample classification changes from original "SNPs.csv"

YBxRN <- filter(SNPAssignment, Species != "HYSA")
YBxHY <- filter(SNPAssignment, Species != "RNSA")
RNxHY <- filter(SNPAssignment, Species != "YBSA")

### Enol Fisher test
### YBxRN
SNP_table_Enol_YBxRN <- table(YBxRN$Species, YBxRN$Enol)
SNP_table_Enol_YBxRN
fisher.test(SNP_table_Enol_YBxRN)
sum(SNP_table_Enol_YBxRN)

# data:  SNP_table_Enol_YBxRN
# p-value < 2.2e-16
# alternative hypothesis: true odds ratio is not equal to 1
# 95 percent confidence interval:
#   0.03022759 0.11083797
# sample estimates:
#   odds ratio 
# 0.05913572 

#sum = 288

### GAPD Fisher test
### YB x RN
SNP_table_Gapd_YBxRN <- table(YBxRN$Species, YBxRN$Gapd)
SNP_table_Gapd_YBxRN
fisher.test(SNP_table_Gapd_YBxRN)
sum(SNP_table_Gapd_YBxRN)

# data:  SNP_table_Gapd_YBxRN
# p-value = 7.361e-08
# alternative hypothesis: true odds ratio is not equal to 1
# 95 percent confidence interval:
#   0.08048245 0.35906672
# sample estimates:
#   odds ratio 
# 0.1761154  

# 
# > sum(SNP_table_Gapd_YBxRN)
# [1] 258

### ANM Fisher test
### YB x RN
SNP_table_ANM_YBxRN <- table(YBxRN$Species, YBxRN$ANM)
SNP_table_ANM_YBxRN
fisher.test(SNP_table_ANM_YBxRN)
sum(SNP_table_ANM_YBxRN)

#Fishers Exact Test for Count Data

# data:  SNP_table_ANM_YBxRN
# p-value = 4.614e-06
# alternative hypothesis: true odds ratio is not equal to 1
# 95 percent confidence interval:
#   2.040170 6.828653
# sample estimates:
#   odds ratio 
# 3.696429 
# 
# > sum(SNP_table_ANM_YBxRN)
# [1] 322

####################

### AB comparisons
AB <- filter(SNPAssignment, Pop == "AB")
AB

### pairwise comparison groups
AB_YBxRN <- filter(AB, Species != "HYSA")
AB_YBxHY <- filter(AB, Species != "RNSA")
AB_RNxHY <- filter(AB, Species != "YBSA")

# YBxRN

### Enol
AB_YBxRN_Enol <- table(AB_YBxRN$Species, AB_YBxRN$Enol)
AB_YBxRN_Enol
fisher.test(AB_YBxRN_Enol)
sum(AB_YBxRN_Enol)

# data:  AB_YBxRN_Enol
# p-value = 1.199e-06
# alternative hypothesis: true odds ratio is not equal to 1
# 95 percent confidence interval:
#   0.08683407 0.39122129
# sample estimates:
#   odds ratio 
# 0.1878545 

# 
# > sum(AB_YBxRN)
# [1] 160

### Gapd
AB_Gapd_YBxRN <- table(AB_YBxRN$Species, AB_YBxRN$Gapd)
AB_Gapd_YBxRN
fisher.test(AB_Gapd_YBxRN)
sum(AB_Gapd_YBxRN)

# data:  AB_Gapd_YBxRN
# p-value = 5.132e-07
# alternative hypothesis: true odds ratio is not equal to 1
# 95 percent confidence interval:
#   0.02948218 0.29674613
# sample estimates:
#   odds ratio 
# 0.1042781
# 
# > sum(AB_Gapd_YBxRN)
# [1] 146

### ANM
AB_ANM_YBxRN <- table(AB_YBxRN$Species, AB_YBxRN$ANM)
AB_ANM_YBxRN
fisher.test(AB_ANM_YBxRN)
sum(AB_ANM_YBxRN)

# # data:  AB_ANM_YBxRN
# p-value = 0.00216
# alternative hypothesis: true odds ratio is not equal to 1
# 95 percent confidence interval:
#   1.410414 5.998247
# sample estimates:
#   odds ratio 
# 2.882499  
# 
# > sum(AB_ANM_YBxRN)
# [1] 190

# YBxHY

### Enol
AB_YBxHY_Enol <- table(AB_YBxHY$Species, AB_YBxHY$Enol)
AB_YBxHY_Enol
fisher.test(AB_YBxHY_Enol)
sum(AB_YBxHY_Enol)

# data:  AB_YBxHY_Enol
# p-value = 0.8761
# alternative hypothesis: true odds ratio is not equal to 1
# 95 percent confidence interval:
#   0.4701928 1.7497321
# sample estimates:
#   odds ratio 
# 0.9077921
# 
# > sum(AB_YBxHY_Enol)
# [1] 172

### Gapd
AB_Gapd_YBxHY <- table(AB_YBxHY$Species, AB_YBxHY$Gapd)
AB_Gapd_YBxHY
fisher.test(AB_Gapd_YBxHY)
sum(AB_Gapd_YBxHY)

# data:  AB_Gapd_YBxHY
# p-value = 0.4271
# alternative hypothesis: true odds ratio is not equal to 1
# 95 percent confidence interval:
#   0.3780474 1.4462962
# sample estimates:
#   odds ratio 
# 0.7408602 
# 
# > sum(AB_Gapd_YBxHY)
# [1] 166

### ANM
AB_ANM_YBxHY <- table(AB_YBxHY$Species, AB_YBxHY$ANM)
AB_ANM_YBxHY
fisher.test(AB_ANM_YBxHY)
sum(AB_ANM_YBxHY)

# data:  AB_ANM_YBxHY
# p-value = 0.3307
# alternative hypothesis: true odds ratio is not equal to 1
# 95 percent confidence interval:
#   0.2666525 1.5379607
# sample estimates:
#   odds ratio 
# 0.655767 
# 
# > sum(AB_ANM_YBxHY)
# [1] 204

### RNxHY
### Enol
AB_RNxHY_Enol <- table(AB_RNxHY$Species, AB_RNxHY$Enol)
AB_RNxHY_Enol
fisher.test(AB_RNxHY_Enol)
sum(AB_RNxHY_Enol)

#data:  AB_RNxHY_Enol
# p-value = 5.536e-06
# alternative hypothesis: true odds ratio is not equal to 1
# 95 percent confidence interval:
#   2.326709 10.418499
# sample estimates:
#   odds ratio 
# 4.832977 
# 
# > sum(AB_RNxHY_Enol)
# [1] 160

### Gapd
AB_Gapd_RNxHY <- table(AB_RNxHY$Species, AB_RNxHY$Gapd)
AB_Gapd_RNxHY
fisher.test(AB_Gapd_RNxHY)
sum(AB_Gapd_RNxHY)

# data:  AB_Gapd_RNxHY
# p-value = 1.906e-05
# alternative hypothesis: true odds ratio is not equal to 1
# 95 percent confidence interval:
#   2.516485 25.016677
# sample estimates:
#   odds ratio 
# 7.117135 
# 
# > sum(AB_Gapd_RNxHY)
# [1] 156

### ANM
AB_ANM_RNxHY <- table(AB_RNxHY$Species, AB_RNxHY$ANM)
AB_ANM_RNxHY
fisher.test(AB_ANM_RNxHY)
sum(AB_ANM_RNxHY)

# data:  AB_ANM_RNxHY
# p-value = 0.0001167
# alternative hypothesis: true odds ratio is not equal to 1
# 95 percent confidence interval:
#   0.09345801 0.52159558
# sample estimates:
#   odds ratio 
# 0.2277958 
# 
# > sum(AB_ANM_RNxHY)
# [1] 166

################################################
####### Allopatry vs sympatry RNSA

ABonly <- filter(SNPAssignment, Pop == "AB")
ABonly

### Enol Fisher test
SNP_table_RNaxSymp_Enol <- table(ABonly$SubPop, ABonly$Enol)
SNP_table_RNaxSymp_Enol
SNP_table_RNaxSymp_Enol_df <- data.frame(C=c(9,78), T=c(37,66), row.names = c("Allo_RNSA","Symp"))
SNP_table_RNaxSymp_Enol_df
fisher.test(SNP_table_Enol_df)

# data:  SNP_table_Enol_df
# p-value = 3.64e-05
# alternative hypothesis: true odds ratio is not equal to 1
# 95 percent confidence interval:
#   0.08186919 0.47780405
# sample estimates:
#   odds ratio 
# 0.2074845 



### GAPD Fisher test
SNP_table_RNaxSymp_GAPD <- table(ABonly$SubPop, ABonly$Gapd)
SNP_table_RNaxSymp_GAPD
SNP_table_RNaxSymp_GAPD_df <- data.frame(C=c(2,51), T=c(44,89), row.names = c("Allo_RNSA","Symp"))
SNP_table_RNaxSymp_GAPD_df
fisher.test(SNP_table_GAPD_df)

# data:  SNP_table_GAPD_df
# p-value = 7.976e-06
# alternative hypothesis: true odds ratio is not equal to 1
# 95 percent confidence interval:
#   0.009024693 0.329530032
# sample estimates:
#   odds ratio 
# 0.08009108 

### ANM Fisher test
SNP_table_RNaxSymp_ANM <- table(ABonly$SubPop, ABonly$ANM)
SNP_table_RNaxSymp_ANM
SNP_table_RNaxSymp_ANM_df <- data.frame(C=c(17,29), T=c(31,125), row.names = c("Allo_RNSA","Symp"))
SNP_table_RNaxSymp_ANM_df
fisher.test(SNP_table_RNaxSymp_ANM_df)

# data:  SNP_table_RNaxSymp_ANM_df
# p-value = 0.02874
# alternative hypothesis: true odds ratio is not equal to 1
# 95 percent confidence interval:
#   1.071078 5.097431
# sample estimates:
#   odds ratio 
# 2.352403 

################################################
####### Allopatry vs sympatry YBSA


### Enol Fisher test
SNP_table_YBaxSymp_Enol <- table(ABonly$SubPop, ABonly$Enol)
SNP_table_YBaxSymp_Enol
SNP_table_YBaxSymp_Enol_df <- data.frame(C=c(34,78), T=c(22,66), row.names = c("Allo_YBSA","Symp"))
SNP_table_YBaxSymp_Enol_df
fisher.test(SNP_table_YBaxSymp_Enol_df)

# data:  SNP_table_YBaxSymp_Enol_df
# p-value = 0.4312
# alternative hypothesis: true odds ratio is not equal to 1
# 95 percent confidence interval:
#   0.6678501 2.5899882
# sample estimates:
#   odds ratio 
# 1.305943 


### GAPD Fisher test
SNP_table_YBaxSymp_GAPD <- table(ABonly$SubPop, ABonly$Gapd)
SNP_table_YBaxSymp_GAPD
SNP_table_YBaxSymp_GAPD_df <- data.frame(C=c(18,51), T=c(30,89), row.names = c("Allo_YBSA","Symp"))
SNP_table_YBaxSymp_GAPD_df
fisher.test(SNP_table_YBaxSymp_GAPD_df)

# data:  SNP_table_YBaxSymp_GAPD_df
# p-value = 1
# alternative hypothesis: true odds ratio is not equal to 1
# 95 percent confidence interval:
#   0.4968842 2.1639618
# sample estimates:
#   odds ratio 
# 1.046782 

### ANM Fisher test
SNP_table_YBaxSymp_ANM <- table(ABonly$SubPop, ABonly$ANM)
SNP_table_YBaxSymp_ANM
SNP_table_YBaxSymp_ANM_df <- data.frame(C=c(14,29), T=c(64,125), row.names = c("Allo_YBSA","Symp"))
SNP_table_YBaxSymp_ANM_df
fisher.test(SNP_table_YBaxSymp_ANM_df)

# data:  SNP_table_YBaxSymp_ANM_df
# p-value = 1
# alternative hypothesis: true odds ratio is not equal to 1
# 95 percent confidence interval:
#   0.4287777 1.9969322
# sample estimates:
#   odds ratio 
# 0.9431173 

################################################
####### Allopatry RNSA vs Allopatry YBSA


### Enol Fisher test
SNP_table_YBaxSymp_Enol <- table(ABonly$SubPop, ABonly$Enol)
SNP_table_YBaxSymp_Enol
SNP_table_RNaxYBa_Enol_df <- data.frame(C=c(9,34), T=c(37,22), row.names = c("Allo_RNSA","Allo_YBSA"))
SNP_table_RNaxYBa_Enol_df
fisher.test(SNP_table_RNaxYBa_Enol_df)

# data:  SNP_table_RNaxYBa_Enol_df
# p-value = 4.32e-05
# alternative hypothesis: true odds ratio is not equal to 1
# 95 percent confidence interval:
#   0.05648023 0.41976043
# sample estimates:
#   odds ratio 
# 0.1605657 


### GAPD Fisher test
SNP_table_YBaxSymp_GAPD <- table(ABonly$SubPop, ABonly$Gapd)
SNP_table_YBaxSymp_GAPD
SNP_table_RNaxYBa_GAPD_df <- data.frame(C=c(2,18), T=c(44,30), row.names = c("Allo_RNSA","Allo_YBSA"))
SNP_table_RNaxYBa_GAPD_df
fisher.test(SNP_table_RNaxYBa_GAPD_df)

# data:  SNP_table_RNaxYBa_GAPD_df
# p-value = 8.51e-05
# alternative hypothesis: true odds ratio is not equal to 1
# 95 percent confidence interval:
#   0.008149242 0.362156674
# sample estimates:
#   odds ratio 
# 0.07765656 

### ANM Fisher test
SNP_table_YBaxSymp_ANM <- table(ABonly$SubPop, ABonly$ANM)
SNP_table_YBaxSymp_ANM
SNP_table_RNaxYBa_ANM_df <- data.frame(C=c(17,14), T=c(31,64), row.names = c("Allo_RNSA","Allo_YBSA"))
SNP_table_RNaxYBa_ANM_df
fisher.test(SNP_table_RNaxYBa_ANM_df)

# data:  SNP_table_RNaxYBa_ANM_df
# p-value = 0.0339
# alternative hypothesis: true odds ratio is not equal to 1
# 95 percent confidence interval:
#   1.009417 6.242000
# sample estimates:
#   odds ratio 
# 2.487497 

