# Andrew & Regis
# Nov 2017
# Doing an adonis analysis of the positions of the animals on the first princiapal component axes


# load data ---------------------------------------------------------------


library(fwdata)
library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(tidyverse)
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Warning: package 'tidyr' was built under R version 3.4.2
## Warning: package 'purrr' was built under R version 3.4.2
## Warning: package 'dplyr' was built under R version 3.4.2
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, plotly, stats
## lag():    dplyr, stats
library(vegan)
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.4-4
# define functions --------------------------------------------------------

calculate_adonis_taxo_level <- function(taxon, .taxa_below_taxon, .first_four_axes){
  
  # join the taxa with enough information for an adonis with the first four PCAs
  below_taxon_complete <- .taxa_below_taxon %>% 
    select(species_id, taxon_name, taxon_level, family, ord) %>% 
    left_join(.first_four_axes) %>% 
    drop_na(Axis.1)
  
  # axis scores
  axis_values <- as.matrix(below_taxon_complete[,c("Axis.1", "Axis.2", "Axis.3", "Axis.4")])
  
  ff <- sprintf("axis_values ~ %s", taxon)
  res <- adonis(as.formula(ff), data = below_taxon_complete, method = "euclidian")

  return(res)
  
}

# load the full dataset This is the very last version of the PCA  -- the species
# scores on axes 1 and 4 of the PCA according to the most recent data version.
axes.raw <- read.table("Current Results - 0.7.7/RES_pca_individuals_0.7.7.txt",
                       header = TRUE, row.names = 1)

fwd <- fw_data("0.7.7")
str(fwd, max.level = 1)
## List of 7
##  $ datasets        :'data.frame':    44 obs. of  20 variables:
##  $ visits          :'data.frame':    77 obs. of  15 variables:
##  $ traits          :Classes 'tbl_df', 'tbl' and 'data.frame':    892 obs. of  89 variables:
##  $ bromeliads      :'data.frame':    1757 obs. of  91 variables:
##  $ abundance       :Classes 'tbl_df', 'tbl' and 'data.frame':    12701 obs. of  5 variables:
##  $ synonymous_names:Classes 'tbl_df', 'tbl' and 'data.frame':    9 obs. of  2 variables:
##  $ abundance_matrix:Classes 'tbl_df', 'tbl' and 'data.frame':    1803 obs. of  1238 variables:
first_four_axes <- axes.raw %>% 
  rownames_to_column("species_id") %>% 
  select(species_id, Axis.1:Axis.4)

# put in the taxonomic information

# first filter for all taxa identified to the level of below family: everything
# which was either genus or species

glimpse(fwd$traits)
## Observations: 892
## Variables: 89
## $ species_id       <chr> "3361", "3366", "3371", "3376", "3381", "3386...
## $ names            <chr> "Bryocamptus sp", "Daphnidae sp", "Pupa_Dipte...
## $ bwg_name         <chr> "Copepoda.1", "Branchiopoda.2", "Diptera.222"...
## $ domain           <chr> "Eukaryota", "Eukaryota", "Eukaryota", "Eukar...
## $ kingdom          <chr> "Animalia", "Animalia", "Animalia", "Animalia...
## $ phylum           <chr> "Arthropoda", "Arthropoda", "Arthropoda", "Ar...
## $ subphylum        <chr> "Crustacea", "Crustacea", NA, NA, NA, NA, NA,...
## $ class            <chr> "Maxillopoda", "Branchiopoda", "Insecta", "In...
## $ subclass         <chr> "Copepoda", "Phyllopoda", "Neoptera", "Neopte...
## $ ord              <chr> "Harpacticoida", "Diplostraca", "Diptera", "D...
## $ subord           <chr> NA, "Cladocera", NA, NA, NA, NA, NA, "Brachyc...
## $ family           <chr> "Canthocamptidae", "Daphnidae", NA, NA, NA, N...
## $ subfamily        <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ tribe            <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ genus            <chr> "Bryocamptus", NA, NA, NA, NA, NA, NA, NA, NA...
## $ species          <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ subspecies       <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## $ functional_group <chr> "filter.feeder", "filter.feeder", NA, NA, NA,...
## $ predation        <chr> "prey", "prey", NA, NA, NA, NA, NA, NA, NA, N...
## $ realm            <chr> "aquatic", "aquatic", "aquatic", "aquatic", "...
## $ micro_macro      <chr> "micro", "micro", "macro", "macro", "macro", ...
## $ barcode          <chr> "EEE", "123", NA, NA, NA, NA, NA, NA, NA, NA,...
## $ taxon_level      <chr> "genus", "family", "ord", "ord", "ord", "ord"...
## $ taxon_name       <chr> "Bryocamptus", "Daphnidae", "Diptera", "Dipte...
## $ taxon_number     <dbl> 12, 9, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 13, 7, 9...
## $ AS1              <int> 0, 0, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 0, 2, 1, ...
## $ AS2              <int> 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 3, 1, ...
## $ AS3              <int> 0, 0, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 0, 3, 0, ...
## $ AS4              <int> 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, ...
## $ BS1              <int> 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 3, ...
## $ BS2              <int> 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, ...
## $ BS3              <int> 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, ...
## $ BS4              <int> 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, ...
## $ BS5              <int> 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, ...
## $ DM1              <int> 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 0, ...
## $ DM2              <int> 0, 0, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 3, 3, ...
## $ FD1              <int> 0, 3, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 3, 2, 0, ...
## $ FD2              <int> 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, ...
## $ FD3              <int> 2, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 0, 1, 0, ...
## $ FD4              <int> 0, 3, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 3, 2, 3, ...
## $ FD5              <int> 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, ...
## $ FD6              <int> 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, ...
## $ FD7              <int> 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, ...
## $ FD8              <int> 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, ...
## $ FG1              <int> 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 0, 1, 3, ...
## $ FG2              <int> 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, ...
## $ FG3              <int> 3, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, ...
## $ FG4              <int> 3, 3, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 3, 2, 0, ...
## $ FG5              <int> 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, ...
## $ FG6              <int> 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, ...
## $ RE1              <int> 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, ...
## $ RE2              <int> 3, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, ...
## $ RE3              <int> 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, ...
## $ RE4              <int> 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 0, 1, 1, ...
## $ RE5              <int> 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, ...
## $ RE6              <int> 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, ...
## $ RE7              <int> 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, ...
## $ RE8              <int> 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, ...
## $ RF1              <int> 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, ...
## $ RF2              <int> 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, ...
## $ RF3              <int> 3, 3, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 3, 2, 1, ...
## $ RF4              <int> 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 0, 1, 1, ...
## $ LO1              <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ LO2              <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, ...
## $ LO3              <int> 0, 3, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 3, 1, 0, ...
## $ LO4              <int> 2, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 0, 1, 2, ...
## $ LO5              <int> 3, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 2, ...
## $ LO6              <int> 3, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, ...
## $ LO7              <int> 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, ...
## $ RM1              <int> 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 0, ...
## $ RM2              <int> 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, ...
## $ RM3              <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ RM4              <int> 0, 0, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 0, 2, 3, ...
## $ RM5              <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ MD1              <int> 0, 3, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 1, 3, ...
## $ MD2              <int> 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, ...
## $ MD3              <int> 3, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, ...
## $ MD4              <int> 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, ...
## $ MD5              <int> 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, ...
## $ MD6              <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ MD7              <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ MD8              <int> 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, ...
## $ CP1              <int> 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 3, ...
## $ CP2              <int> 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 0, 1, 0, ...
## $ CP3              <int> 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, ...
## $ BF1              <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ BF2              <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ BF3              <int> 0, 0, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 0, 3, 0, ...
## $ BF4              <int> 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 3, ...
# each taxonomic level has a little number to make it easy to filter by "rank of
# taxonomic group"


# hey so what is the distribution of lowest taxonomic levels anyway?

fwd$traits %>%
  group_by(taxon_number, taxon_level) %>% 
  tally %>% 
  arrange(taxon_number)
## # A tibble: 11 x 3
## # Groups:   taxon_number [11]
##    taxon_number  taxon_level     n
##           <dbl>        <chr> <int>
##  1            3       phylum    16
##  2            5        class     9
##  3            6     subclass    23
##  4            7          ord    71
##  5            8       subord    39
##  6            9       family   299
##  7           10    subfamily    78
##  8           11        tribe     3
##  9           12        genus   255
## 10           13 species_name    96
## 11           NA         <NA>     3
# Give me every morphospecies which was identified below family 
taxa_below_family <- fwd$traits %>% 
  select(species_id:taxon_number) %>% 
  filter(!(taxon_number <= 9))



# to conduct an adonis we need to put this info together with the axis scores:


taxa_below_family %>% 
  select(species_id, taxon_name, taxon_level, family) %>% 
  left_join(first_four_axes) %>% 
  ggplot(aes(x  = Axis.1, y = Axis.2, colour = family))+ geom_point() + guides(colour = FALSE)
## Joining, by = "species_id"
## Warning: Removed 12 rows containing missing values (geom_point).

# # Why is there missing data? 
# at_least_to_genera %>% 
#   select(species_id, taxon_name, taxon_level, genus) %>% 
#   left_join(first_four_axes) %>% 
#   visdat::vis_miss(.)
# 
# # Who are these sad animals? 
# at_least_to_genera %>% 
#   select(species_id, taxon_name, taxon_level, genus) %>% 
#   left_join(first_four_axes) %>% 
#   filter(is.na(Axis.1))

genus_adonis <- calculate_adonis_taxo_level("family", taxa_below_family, first_four_axes)
## Joining, by = "species_id"
summary(genus_adonis)
##              Length Class  Mode   
## aov.tab          6  anova  list   
## call             4  -none- call   
## coefficients   120  -none- numeric
## coef.sites   12600  -none- numeric
## f.perms        999  -none- numeric
## model.matrix 12600  -none- numeric
## terms            3  terms  call
genus_adonis
## 
## Call:
## adonis(formula = as.formula(ff), data = below_taxon_complete,      method = "euclidian") 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##            Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## family     29   10.4525 0.36043  69.696 0.83825  0.001 ***
## Residuals 390    2.0169 0.00517         0.16175           
## Total     419   12.4694                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# order level -------------------------------------------------------------


# Give me every morphospecies which was identified below order
taxa_below_order <- fwd$traits %>% 
  select(species_id:taxon_number) %>% 
  filter(!(taxon_number <= 8 ))

order_adonis <- calculate_adonis_taxo_level("ord", taxa_below_order, first_four_axes)
## Joining, by = "species_id"
summary(order_adonis)
##              Length Class  Mode   
## aov.tab         6   anova  list   
## call            4   -none- call   
## coefficients   44   -none- numeric
## coef.sites   7700   -none- numeric
## f.perms       999   -none- numeric
## model.matrix 7700   -none- numeric
## terms           3   terms  call
order_adonis
## 
## Call:
## adonis(formula = as.formula(ff), data = below_taxon_complete,      method = "euclidian") 
## 
## Permutation: free
## Number of permutations: 999
## 
## Terms added sequentially (first to last)
## 
##            Df SumsOfSqs MeanSqs F.Model      R2 Pr(>F)    
## ord        10    5.9202 0.59202  27.117 0.28242  0.001 ***
## Residuals 689   15.0422 0.02183         0.71758           
## Total     699   20.9624                 1.00000           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# 3D plots of the dots ----------------------------------------------------

family_axis <- taxa_below_family %>% 
  select(species_id, taxon_name, taxon_level, family) %>% 
  left_join(first_four_axes)
## Joining, by = "species_id"
family_axis %>% glimpse %>% 
  plot_ly(x = ~ Axis.1, y = ~Axis.2, z = ~Axis.3) %>% 
  add_markers(color = ~family)
## Observations: 432
## Variables: 8
## $ species_id  <chr> "3361", "3421", "3436", "3511", "3516", "3521", "3...
## $ taxon_name  <chr> "Bryocamptus", "Ceriodaphnia_laticaudata", "Olbiog...
## $ taxon_level <chr> "genus", "species_name", "genus", "genus", "genus"...
## $ family      <chr> "Canthocamptidae", "Daphniidae", "Anisopodidae", "...
## $ Axis.1      <dbl> -0.004783728, -0.032288128, 0.033901272, -0.097143...
## $ Axis.2      <dbl> 0.08212398, 0.05231581, -0.01495065, 0.04403564, 0...
## $ Axis.3      <dbl> 0.19377813, 0.22204900, -0.03344394, -0.02525012, ...
## $ Axis.4      <dbl> 0.13664584, 0.01086506, -0.05017423, -0.03276062, ...
## Warning: Ignoring 12 observations
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
family_axis %>% glimpse %>% 
  plot_ly(x = ~ Axis.2, y = ~Axis.3, z = ~Axis.4) %>% 
  add_markers(color = ~family)
## Observations: 432
## Variables: 8
## $ species_id  <chr> "3361", "3421", "3436", "3511", "3516", "3521", "3...
## $ taxon_name  <chr> "Bryocamptus", "Ceriodaphnia_laticaudata", "Olbiog...
## $ taxon_level <chr> "genus", "species_name", "genus", "genus", "genus"...
## $ family      <chr> "Canthocamptidae", "Daphniidae", "Anisopodidae", "...
## $ Axis.1      <dbl> -0.004783728, -0.032288128, 0.033901272, -0.097143...
## $ Axis.2      <dbl> 0.08212398, 0.05231581, -0.01495065, 0.04403564, 0...
## $ Axis.3      <dbl> 0.19377813, 0.22204900, -0.03344394, -0.02525012, ...
## $ Axis.4      <dbl> 0.13664584, 0.01086506, -0.05017423, -0.03276062, ...
## Warning: Ignoring 12 observations

## Warning: n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors